diff --git a/checkpoint-1000/config.json b/checkpoint-1000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-1000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-1000/generation_config.json b/checkpoint-1000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-1000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..04bcf88578ff52a5fcb65eeb693717e5a4a97bab --- /dev/null +++ b/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2da35eae1fa4d0f4d64dfaf13611139c7fc5071e4c3c68541849d845f43d47c2 +size 1934161093 diff --git a/checkpoint-1000/preprocessor_config.json b/checkpoint-1000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-1000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-1000/pytorch_model.bin b/checkpoint-1000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..537ce6d5646a02ba0545399d74d3f06d456c7e5d --- /dev/null +++ b/checkpoint-1000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6412ab0b63845c764c6eb8a9c86bd92518cd9f98695d342206dea155a7b8d658 +size 967102601 diff --git a/checkpoint-1000/rng_state.pth b/checkpoint-1000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bb55d85f927e3669e1aaae7a1a9a881ad9c84488 --- /dev/null +++ b/checkpoint-1000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dc62a67522cc3c0c64b59a755907ab97d94448fa5d895120dc520ded0092da5 +size 14639 diff --git a/checkpoint-1000/scaler.pt b/checkpoint-1000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f84ecdd3aff4435d828cf9605cca32636575763 --- /dev/null +++ b/checkpoint-1000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2292b0497ddb0554f3fc4518d7cc9b046879e509117234d9fc7434b7c01df20c +size 557 diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..851c9b3efdedbf5486e98cfbd38d50d6e7417237 --- /dev/null +++ b/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b0888ebd79f5d0cf62353bb8847946cd9a34fe2467936dd8536d23eebe9b51c +size 627 diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..86a189c23fbbec76c79eb701da0522891b2271c1 --- /dev/null +++ b/checkpoint-1000/trainer_state.json @@ -0,0 +1,265 @@ +{ + "best_metric": 40.657571667832, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-1000", + "epoch": 0.025, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 4.61736640512e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-10000/config.json b/checkpoint-10000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-10000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-10000/generation_config.json b/checkpoint-10000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-10000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-10000/optimizer.pt b/checkpoint-10000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ac4dc9091bc4a39c4af7a89ab0d7bc4ea253cef --- /dev/null +++ b/checkpoint-10000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a32d9805c59193708ca3457928f4e0338c3798a0997a5d0140a2681e805eed2 +size 1934161093 diff --git a/checkpoint-10000/preprocessor_config.json b/checkpoint-10000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-10000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-10000/pytorch_model.bin b/checkpoint-10000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..ff46768f0cc96588e73acfc908ee953cb58a7e1c --- /dev/null +++ b/checkpoint-10000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94254e2ff2be8fa9da3df84401ad9f9c5890f1a4ddc50b64f8354ec4c73f0f37 +size 967102601 diff --git a/checkpoint-10000/rng_state.pth b/checkpoint-10000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bc89ca4ec6268ccee65583788fe374e8de38921f --- /dev/null +++ b/checkpoint-10000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49cf56c88b088b236492e7c5ae93416b8f881a5577a4e18dd7faa6b701450b61 +size 14575 diff --git a/checkpoint-10000/scaler.pt b/checkpoint-10000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9fc1d4cd06d0e20530267a4763f6816126c06641 --- /dev/null +++ b/checkpoint-10000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54dbacd4cb4bdfc1c8e244d48f82af5f3eeb9c7ff475cb8e857c534db9234089 +size 557 diff --git a/checkpoint-10000/scheduler.pt b/checkpoint-10000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..39749a376624411602a90d42baa40e4b364c1598 --- /dev/null +++ b/checkpoint-10000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f77fc232ee012a7636d6474825911122e451aa1757106e94044784b07ca6c9bf +size 627 diff --git a/checkpoint-10000/trainer_state.json b/checkpoint-10000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..df7b1e783b721b7e4366860d58516483389a2be6 --- /dev/null +++ b/checkpoint-10000/trainer_state.json @@ -0,0 +1,2506 @@ +{ + "best_metric": 31.58262735979447, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-9000", + "epoch": 5.031125, + "global_step": 10000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 4.6159234781184e+19, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-10000/training_args.bin b/checkpoint-10000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-10000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-11000/config.json b/checkpoint-11000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-11000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-11000/generation_config.json b/checkpoint-11000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-11000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-11000/optimizer.pt b/checkpoint-11000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3473db87f136c4e2c375c7a798ffc520c7b08985 --- /dev/null +++ b/checkpoint-11000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ebe9a7167ac6d8714c10503f224fa5b58a019c86e9388beb5e866d95654371d +size 1934161093 diff --git a/checkpoint-11000/preprocessor_config.json b/checkpoint-11000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-11000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-11000/pytorch_model.bin b/checkpoint-11000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..2993689ac50810dae093f48950e67322589b88dd --- /dev/null +++ b/checkpoint-11000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8185a13e4eb3a24a6da5cf406243ba0a61f2f3047f67b491635f12b941b8fbe8 +size 967102601 diff --git a/checkpoint-11000/rng_state.pth b/checkpoint-11000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c6ac4e3796b442a9c43e756f1712077ee98248c5 --- /dev/null +++ b/checkpoint-11000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fb11b8fe7afefbde3385e8e8f0bb0a92eca3f4813d20d1e16253d70edd930a0 +size 14639 diff --git a/checkpoint-11000/scaler.pt b/checkpoint-11000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3a90ee56e6f24cd083fdb2267593580bf5387ad --- /dev/null +++ b/checkpoint-11000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:652e9e9b1f49df5c7d4e2eda2b4c0d4bbda8a9746e3a136d3022a7b8fb113822 +size 557 diff --git a/checkpoint-11000/scheduler.pt b/checkpoint-11000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..febc5347dff0ef48a5b51a24d0819e755850ab39 --- /dev/null +++ b/checkpoint-11000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:724fd087a1e24275c921af58d6e25f4867cedc7578db7212e10c786cd348c477 +size 627 diff --git a/checkpoint-11000/trainer_state.json b/checkpoint-11000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9a014d1ef92bb149d083a27e0dcc6dccc4b263e1 --- /dev/null +++ b/checkpoint-11000/trainer_state.json @@ -0,0 +1,2755 @@ +{ + "best_metric": 31.58262735979447, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-9000", + "epoch": 6.01235, + "global_step": 11000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 5.07737153323008e+19, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-11000/training_args.bin b/checkpoint-11000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-11000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-12000/config.json b/checkpoint-12000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-12000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-12000/generation_config.json b/checkpoint-12000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-12000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-12000/optimizer.pt b/checkpoint-12000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e8082b59fc06a27d815efaa1a33bcb336321443f --- /dev/null +++ b/checkpoint-12000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92e664ec5154e726a35e6d42bf3352ab51341193a6689efdaea9604923bda9cc +size 1934161093 diff --git a/checkpoint-12000/preprocessor_config.json b/checkpoint-12000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-12000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-12000/pytorch_model.bin b/checkpoint-12000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8c95d1732c88a04486e2a6f9f768138059f5e530 --- /dev/null +++ b/checkpoint-12000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:163238221ef8cc3002eb5d1ae169577ad908ddae8b499b8370880e5e726f9ef6 +size 967102601 diff --git a/checkpoint-12000/rng_state.pth b/checkpoint-12000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..401973770a5dc9da14e27d6769765023797d80ad --- /dev/null +++ b/checkpoint-12000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8c552ce94d91a466160e0526f23f77fc2c5a8568c307954a912646bc8e340ae +size 14639 diff --git a/checkpoint-12000/scaler.pt b/checkpoint-12000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..366b21573631fcab3dbd76e5a4a2f819b3368414 --- /dev/null +++ b/checkpoint-12000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5a815a4758f309647094935e93cd183cdf97272d965f1f809d37a65b8075906 +size 557 diff --git a/checkpoint-12000/scheduler.pt b/checkpoint-12000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c34a884d7677440ad52476107cc2724ed2f28f4 --- /dev/null +++ b/checkpoint-12000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e340fa9311f70ee0dc384eeb43490c95a739982fb89fc03044dd1c37ffa4ee2 +size 627 diff --git a/checkpoint-12000/trainer_state.json b/checkpoint-12000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4caf4e49552987f0c12008cfc8c81429b3259bc2 --- /dev/null +++ b/checkpoint-12000/trainer_state.json @@ -0,0 +1,3004 @@ +{ + "best_metric": 31.58262735979447, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-9000", + "epoch": 6.03735, + "global_step": 12000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 5.53910817374208e+19, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-12000/training_args.bin b/checkpoint-12000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-12000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-13000/config.json b/checkpoint-13000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-13000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-13000/generation_config.json b/checkpoint-13000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-13000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-13000/optimizer.pt b/checkpoint-13000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc719abcc9ae48328225217e1d4522e635a176c2 --- /dev/null +++ b/checkpoint-13000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca0307a5ac957394d12917783450a1a36dfb78a489e3c19c508da9731c864e51 +size 1934161093 diff --git a/checkpoint-13000/preprocessor_config.json b/checkpoint-13000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-13000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-13000/pytorch_model.bin b/checkpoint-13000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..cd1e81ccfab7e7e00659a68dccbdab1647017b17 --- /dev/null +++ b/checkpoint-13000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e67a8b1d3e0f7bf51629723dd88a387b19d607a0669ee2acc20a9c14be05b85 +size 967102601 diff --git a/checkpoint-13000/rng_state.pth b/checkpoint-13000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a7fa568558b74749bc95d770a02aec9680f96b8b --- /dev/null +++ b/checkpoint-13000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15cd7a103721023beab2179ac18954faf15caa4e1cb6377a37ce555a07f66e53 +size 14575 diff --git a/checkpoint-13000/scaler.pt b/checkpoint-13000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d30d28b04eebe38093407432eb8a3f0329f6eff0 --- /dev/null +++ b/checkpoint-13000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba0bdea6f52c5e2b58651f8fe7c517a709a88dc947af37831e8d1ee42deb4b5f +size 557 diff --git a/checkpoint-13000/scheduler.pt b/checkpoint-13000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d24f20fa2086f6dbfe90ba85ef9fe716d95e5331 --- /dev/null +++ b/checkpoint-13000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:affdb4a0764216c9f0dc59a9078d6c986f9bd648222f67a55a33342eaed0c1e7 +size 627 diff --git a/checkpoint-13000/trainer_state.json b/checkpoint-13000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b159f9f25bfb64a89931996b42c8af9013ab728a --- /dev/null +++ b/checkpoint-13000/trainer_state.json @@ -0,0 +1,3253 @@ +{ + "best_metric": 27.36142051090262, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-13000", + "epoch": 7.018575, + "global_step": 13000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 6.00055622885376e+19, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-13000/training_args.bin b/checkpoint-13000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-13000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-14000/config.json b/checkpoint-14000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-14000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-14000/generation_config.json b/checkpoint-14000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-14000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-14000/optimizer.pt b/checkpoint-14000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b87d067efbc8c8f8b658c84b7b1ffdd0719e242b --- /dev/null +++ b/checkpoint-14000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3599da00bfae74ac28cc6acc8b23de569ac16e20f8507d9b7a108231a90e60be +size 1934161093 diff --git a/checkpoint-14000/preprocessor_config.json b/checkpoint-14000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-14000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-14000/pytorch_model.bin b/checkpoint-14000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..2aa4929165e507e959ffd67e93e26ffffeab010f --- /dev/null +++ b/checkpoint-14000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:947e37c920612b21a73b6e057cd766c570c5581372ba28281c732f58033cdcf3 +size 967102601 diff --git a/checkpoint-14000/rng_state.pth b/checkpoint-14000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..228ce8ff89fde1daf0a7612a49e36bf56560d10f --- /dev/null +++ b/checkpoint-14000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70b7765c51304ea585bb4ffc3ceda55b76af41ea506994b367a14b036439ec2d +size 14575 diff --git a/checkpoint-14000/scaler.pt b/checkpoint-14000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..159116fcf1ac43a95ec17deb75875404ff09e8a7 --- /dev/null +++ b/checkpoint-14000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac6796d976487bb73573afff887c55b78662bdf5ebc13ff47f13b2d333b2f440 +size 557 diff --git a/checkpoint-14000/scheduler.pt b/checkpoint-14000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb1098b7e347473289fedb1fcc6e2947aca2f9a0 --- /dev/null +++ b/checkpoint-14000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e8f0bc22dc053e0aa1cc30e72ced57645003ff64f26dc9ecb88064191db9802 +size 627 diff --git a/checkpoint-14000/trainer_state.json b/checkpoint-14000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5e1e00242a26b20a7f04d74cebc37d79398f5948 --- /dev/null +++ b/checkpoint-14000/trainer_state.json @@ -0,0 +1,3502 @@ +{ + "best_metric": 27.36142051090262, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-13000", + "epoch": 7.043575, + "global_step": 14000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + }, + { + "epoch": 7.02, + "learning_rate": 6.830632911392405e-06, + "loss": 0.0159, + "step": 13025 + }, + { + "epoch": 7.02, + "learning_rate": 6.824303797468354e-06, + "loss": 0.0186, + "step": 13050 + }, + { + "epoch": 7.02, + "learning_rate": 6.817974683544304e-06, + "loss": 0.0246, + "step": 13075 + }, + { + "epoch": 7.02, + "learning_rate": 6.811645569620253e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 7.02, + "learning_rate": 6.805316455696203e-06, + "loss": 0.0287, + "step": 13125 + }, + { + "epoch": 7.02, + "learning_rate": 6.798987341772152e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 7.02, + "learning_rate": 6.792658227848102e-06, + "loss": 0.035, + "step": 13175 + }, + { + "epoch": 7.02, + "learning_rate": 6.786329113924052e-06, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 7.02, + "learning_rate": 6.780000000000001e-06, + "loss": 0.0283, + "step": 13225 + }, + { + "epoch": 7.02, + "learning_rate": 6.773670886075951e-06, + "loss": 0.0203, + "step": 13250 + }, + { + "epoch": 7.03, + "learning_rate": 6.7673417721519e-06, + "loss": 0.0193, + "step": 13275 + }, + { + "epoch": 7.03, + "learning_rate": 6.7610126582278494e-06, + "loss": 0.0174, + "step": 13300 + }, + { + "epoch": 7.03, + "learning_rate": 6.754683544303798e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 7.03, + "learning_rate": 6.7483544303797474e-06, + "loss": 0.023, + "step": 13350 + }, + { + "epoch": 7.03, + "learning_rate": 6.742025316455697e-06, + "loss": 0.0266, + "step": 13375 + }, + { + "epoch": 7.03, + "learning_rate": 6.735696202531646e-06, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 7.03, + "learning_rate": 6.729367088607596e-06, + "loss": 0.0164, + "step": 13425 + }, + { + "epoch": 7.03, + "learning_rate": 6.723037974683545e-06, + "loss": 0.019, + "step": 13450 + }, + { + "epoch": 7.03, + "learning_rate": 6.716708860759495e-06, + "loss": 0.0209, + "step": 13475 + }, + { + "epoch": 7.03, + "learning_rate": 6.710379746835444e-06, + "loss": 0.0241, + "step": 13500 + }, + { + "epoch": 7.03, + "learning_rate": 6.7040506329113934e-06, + "loss": 0.0233, + "step": 13525 + }, + { + "epoch": 7.03, + "learning_rate": 6.697721518987342e-06, + "loss": 0.0204, + "step": 13550 + }, + { + "epoch": 7.03, + "learning_rate": 6.6913924050632914e-06, + "loss": 0.0131, + "step": 13575 + }, + { + "epoch": 7.03, + "learning_rate": 6.685063291139241e-06, + "loss": 0.0268, + "step": 13600 + }, + { + "epoch": 7.03, + "learning_rate": 6.67873417721519e-06, + "loss": 0.0156, + "step": 13625 + }, + { + "epoch": 7.03, + "learning_rate": 6.67240506329114e-06, + "loss": 0.0205, + "step": 13650 + }, + { + "epoch": 7.04, + "learning_rate": 6.666075949367089e-06, + "loss": 0.0217, + "step": 13675 + }, + { + "epoch": 7.04, + "learning_rate": 6.659746835443039e-06, + "loss": 0.0258, + "step": 13700 + }, + { + "epoch": 7.04, + "learning_rate": 6.653417721518988e-06, + "loss": 0.0211, + "step": 13725 + }, + { + "epoch": 7.04, + "learning_rate": 6.647088607594937e-06, + "loss": 0.0188, + "step": 13750 + }, + { + "epoch": 7.04, + "learning_rate": 6.640759493670886e-06, + "loss": 0.0234, + "step": 13775 + }, + { + "epoch": 7.04, + "learning_rate": 6.6344303797468355e-06, + "loss": 0.0202, + "step": 13800 + }, + { + "epoch": 7.04, + "learning_rate": 6.628101265822785e-06, + "loss": 0.0251, + "step": 13825 + }, + { + "epoch": 7.04, + "learning_rate": 6.621772151898734e-06, + "loss": 0.017, + "step": 13850 + }, + { + "epoch": 7.04, + "learning_rate": 6.615443037974684e-06, + "loss": 0.0254, + "step": 13875 + }, + { + "epoch": 7.04, + "learning_rate": 6.609113924050633e-06, + "loss": 0.0289, + "step": 13900 + }, + { + "epoch": 7.04, + "learning_rate": 6.602784810126583e-06, + "loss": 0.0238, + "step": 13925 + }, + { + "epoch": 7.04, + "learning_rate": 6.596455696202532e-06, + "loss": 0.0218, + "step": 13950 + }, + { + "epoch": 7.04, + "learning_rate": 6.590126582278481e-06, + "loss": 0.0289, + "step": 13975 + }, + { + "epoch": 7.04, + "learning_rate": 6.583797468354432e-06, + "loss": 0.0271, + "step": 14000 + }, + { + "epoch": 7.04, + "eval_loss": 0.26684999465942383, + "eval_runtime": 1448.0228, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "eval_wer": 42.464105107400115, + "step": 14000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 6.46229286936576e+19, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-14000/training_args.bin b/checkpoint-14000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-14000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-15000/config.json b/checkpoint-15000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-15000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-15000/generation_config.json b/checkpoint-15000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-15000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-15000/optimizer.pt b/checkpoint-15000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..dcd24f6d73881afd976b9f14e3a0644b4a0af1ff --- /dev/null +++ b/checkpoint-15000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61248aa443cb282c40bf4d18888333580f9efc8b08153ecd9432c2703249893f +size 1934161093 diff --git a/checkpoint-15000/preprocessor_config.json b/checkpoint-15000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-15000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-15000/pytorch_model.bin b/checkpoint-15000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..bf5816a9e78fd3325056811a884528b9bce1c3e1 --- /dev/null +++ b/checkpoint-15000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:851771899e9a36719a791ef15eccd8245bfa869446f8a7e383de00af2d328c8b +size 967102601 diff --git a/checkpoint-15000/rng_state.pth b/checkpoint-15000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ef26be0f1c9496245f2840e466c40bfd4408fbbd --- /dev/null +++ b/checkpoint-15000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ac201f9ca725406ee56aab822505b885902ad8bc123b3bfd6b8feb679cf802b +size 14575 diff --git a/checkpoint-15000/scaler.pt b/checkpoint-15000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..01f11c88a2a211f77a95351c953c5a3e639437ca --- /dev/null +++ b/checkpoint-15000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0b4a15101ad001cf58c0d10d946b7bcc8070dc0f39020b2e91ad59b4b98926d +size 557 diff --git a/checkpoint-15000/scheduler.pt b/checkpoint-15000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..168fdfa6334655b65b822598dc2de778dec48cf7 --- /dev/null +++ b/checkpoint-15000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:027eb012f368adceabf1cf881488da132fb00861ac86b79466210c78cbb39f48 +size 627 diff --git a/checkpoint-15000/trainer_state.json b/checkpoint-15000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c2ca61c19080ce35bd6cb89fb2a1b58b1312f12a --- /dev/null +++ b/checkpoint-15000/trainer_state.json @@ -0,0 +1,3751 @@ +{ + "best_metric": 27.36142051090262, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-13000", + "epoch": 8.0248, + "global_step": 15000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + }, + { + "epoch": 7.02, + "learning_rate": 6.830632911392405e-06, + "loss": 0.0159, + "step": 13025 + }, + { + "epoch": 7.02, + "learning_rate": 6.824303797468354e-06, + "loss": 0.0186, + "step": 13050 + }, + { + "epoch": 7.02, + "learning_rate": 6.817974683544304e-06, + "loss": 0.0246, + "step": 13075 + }, + { + "epoch": 7.02, + "learning_rate": 6.811645569620253e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 7.02, + "learning_rate": 6.805316455696203e-06, + "loss": 0.0287, + "step": 13125 + }, + { + "epoch": 7.02, + "learning_rate": 6.798987341772152e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 7.02, + "learning_rate": 6.792658227848102e-06, + "loss": 0.035, + "step": 13175 + }, + { + "epoch": 7.02, + "learning_rate": 6.786329113924052e-06, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 7.02, + "learning_rate": 6.780000000000001e-06, + "loss": 0.0283, + "step": 13225 + }, + { + "epoch": 7.02, + "learning_rate": 6.773670886075951e-06, + "loss": 0.0203, + "step": 13250 + }, + { + "epoch": 7.03, + "learning_rate": 6.7673417721519e-06, + "loss": 0.0193, + "step": 13275 + }, + { + "epoch": 7.03, + "learning_rate": 6.7610126582278494e-06, + "loss": 0.0174, + "step": 13300 + }, + { + "epoch": 7.03, + "learning_rate": 6.754683544303798e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 7.03, + "learning_rate": 6.7483544303797474e-06, + "loss": 0.023, + "step": 13350 + }, + { + "epoch": 7.03, + "learning_rate": 6.742025316455697e-06, + "loss": 0.0266, + "step": 13375 + }, + { + "epoch": 7.03, + "learning_rate": 6.735696202531646e-06, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 7.03, + "learning_rate": 6.729367088607596e-06, + "loss": 0.0164, + "step": 13425 + }, + { + "epoch": 7.03, + "learning_rate": 6.723037974683545e-06, + "loss": 0.019, + "step": 13450 + }, + { + "epoch": 7.03, + "learning_rate": 6.716708860759495e-06, + "loss": 0.0209, + "step": 13475 + }, + { + "epoch": 7.03, + "learning_rate": 6.710379746835444e-06, + "loss": 0.0241, + "step": 13500 + }, + { + "epoch": 7.03, + "learning_rate": 6.7040506329113934e-06, + "loss": 0.0233, + "step": 13525 + }, + { + "epoch": 7.03, + "learning_rate": 6.697721518987342e-06, + "loss": 0.0204, + "step": 13550 + }, + { + "epoch": 7.03, + "learning_rate": 6.6913924050632914e-06, + "loss": 0.0131, + "step": 13575 + }, + { + "epoch": 7.03, + "learning_rate": 6.685063291139241e-06, + "loss": 0.0268, + "step": 13600 + }, + { + "epoch": 7.03, + "learning_rate": 6.67873417721519e-06, + "loss": 0.0156, + "step": 13625 + }, + { + "epoch": 7.03, + "learning_rate": 6.67240506329114e-06, + "loss": 0.0205, + "step": 13650 + }, + { + "epoch": 7.04, + "learning_rate": 6.666075949367089e-06, + "loss": 0.0217, + "step": 13675 + }, + { + "epoch": 7.04, + "learning_rate": 6.659746835443039e-06, + "loss": 0.0258, + "step": 13700 + }, + { + "epoch": 7.04, + "learning_rate": 6.653417721518988e-06, + "loss": 0.0211, + "step": 13725 + }, + { + "epoch": 7.04, + "learning_rate": 6.647088607594937e-06, + "loss": 0.0188, + "step": 13750 + }, + { + "epoch": 7.04, + "learning_rate": 6.640759493670886e-06, + "loss": 0.0234, + "step": 13775 + }, + { + "epoch": 7.04, + "learning_rate": 6.6344303797468355e-06, + "loss": 0.0202, + "step": 13800 + }, + { + "epoch": 7.04, + "learning_rate": 6.628101265822785e-06, + "loss": 0.0251, + "step": 13825 + }, + { + "epoch": 7.04, + "learning_rate": 6.621772151898734e-06, + "loss": 0.017, + "step": 13850 + }, + { + "epoch": 7.04, + "learning_rate": 6.615443037974684e-06, + "loss": 0.0254, + "step": 13875 + }, + { + "epoch": 7.04, + "learning_rate": 6.609113924050633e-06, + "loss": 0.0289, + "step": 13900 + }, + { + "epoch": 7.04, + "learning_rate": 6.602784810126583e-06, + "loss": 0.0238, + "step": 13925 + }, + { + "epoch": 7.04, + "learning_rate": 6.596455696202532e-06, + "loss": 0.0218, + "step": 13950 + }, + { + "epoch": 7.04, + "learning_rate": 6.590126582278481e-06, + "loss": 0.0289, + "step": 13975 + }, + { + "epoch": 7.04, + "learning_rate": 6.583797468354432e-06, + "loss": 0.0271, + "step": 14000 + }, + { + "epoch": 7.04, + "eval_loss": 0.26684999465942383, + "eval_runtime": 1448.0228, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "eval_wer": 42.464105107400115, + "step": 14000 + }, + { + "epoch": 8.0, + "learning_rate": 6.577468354430381e-06, + "loss": 0.0292, + "step": 14025 + }, + { + "epoch": 8.0, + "learning_rate": 6.57113924050633e-06, + "loss": 0.0192, + "step": 14050 + }, + { + "epoch": 8.0, + "learning_rate": 6.564810126582279e-06, + "loss": 0.027, + "step": 14075 + }, + { + "epoch": 8.0, + "learning_rate": 6.558481012658229e-06, + "loss": 0.03, + "step": 14100 + }, + { + "epoch": 8.0, + "learning_rate": 6.552151898734178e-06, + "loss": 0.0375, + "step": 14125 + }, + { + "epoch": 8.0, + "learning_rate": 6.5458227848101274e-06, + "loss": 0.0338, + "step": 14150 + }, + { + "epoch": 8.0, + "learning_rate": 6.539493670886077e-06, + "loss": 0.0305, + "step": 14175 + }, + { + "epoch": 8.0, + "learning_rate": 6.533164556962026e-06, + "loss": 0.0266, + "step": 14200 + }, + { + "epoch": 8.01, + "learning_rate": 6.526835443037976e-06, + "loss": 0.0378, + "step": 14225 + }, + { + "epoch": 8.01, + "learning_rate": 6.520506329113925e-06, + "loss": 0.0256, + "step": 14250 + }, + { + "epoch": 8.01, + "learning_rate": 6.514177215189874e-06, + "loss": 0.0203, + "step": 14275 + }, + { + "epoch": 8.01, + "learning_rate": 6.507848101265823e-06, + "loss": 0.0212, + "step": 14300 + }, + { + "epoch": 8.01, + "learning_rate": 6.501518987341773e-06, + "loss": 0.0204, + "step": 14325 + }, + { + "epoch": 8.01, + "learning_rate": 6.495189873417722e-06, + "loss": 0.0259, + "step": 14350 + }, + { + "epoch": 8.01, + "learning_rate": 6.4888607594936714e-06, + "loss": 0.0214, + "step": 14375 + }, + { + "epoch": 8.01, + "learning_rate": 6.482531645569621e-06, + "loss": 0.021, + "step": 14400 + }, + { + "epoch": 8.01, + "learning_rate": 6.47620253164557e-06, + "loss": 0.0241, + "step": 14425 + }, + { + "epoch": 8.01, + "learning_rate": 6.46987341772152e-06, + "loss": 0.0216, + "step": 14450 + }, + { + "epoch": 8.01, + "learning_rate": 6.463544303797469e-06, + "loss": 0.0209, + "step": 14475 + }, + { + "epoch": 8.01, + "learning_rate": 6.457215189873418e-06, + "loss": 0.0292, + "step": 14500 + }, + { + "epoch": 8.01, + "learning_rate": 6.450886075949367e-06, + "loss": 0.0286, + "step": 14525 + }, + { + "epoch": 8.01, + "learning_rate": 6.444556962025317e-06, + "loss": 0.0218, + "step": 14550 + }, + { + "epoch": 8.01, + "learning_rate": 6.438227848101266e-06, + "loss": 0.0209, + "step": 14575 + }, + { + "epoch": 8.01, + "learning_rate": 6.4318987341772154e-06, + "loss": 0.0244, + "step": 14600 + }, + { + "epoch": 8.02, + "learning_rate": 6.425569620253165e-06, + "loss": 0.0212, + "step": 14625 + }, + { + "epoch": 8.02, + "learning_rate": 6.419240506329114e-06, + "loss": 0.0155, + "step": 14650 + }, + { + "epoch": 8.02, + "learning_rate": 6.412911392405064e-06, + "loss": 0.0209, + "step": 14675 + }, + { + "epoch": 8.02, + "learning_rate": 6.406582278481013e-06, + "loss": 0.0203, + "step": 14700 + }, + { + "epoch": 8.02, + "learning_rate": 6.400253164556962e-06, + "loss": 0.0192, + "step": 14725 + }, + { + "epoch": 8.02, + "learning_rate": 6.393924050632911e-06, + "loss": 0.0216, + "step": 14750 + }, + { + "epoch": 8.02, + "learning_rate": 6.387594936708861e-06, + "loss": 0.012, + "step": 14775 + }, + { + "epoch": 8.02, + "learning_rate": 6.38126582278481e-06, + "loss": 0.0241, + "step": 14800 + }, + { + "epoch": 8.02, + "learning_rate": 6.37493670886076e-06, + "loss": 0.0192, + "step": 14825 + }, + { + "epoch": 8.02, + "learning_rate": 6.36860759493671e-06, + "loss": 0.0202, + "step": 14850 + }, + { + "epoch": 8.02, + "learning_rate": 6.362278481012659e-06, + "loss": 0.0257, + "step": 14875 + }, + { + "epoch": 8.02, + "learning_rate": 6.3559493670886086e-06, + "loss": 0.0218, + "step": 14900 + }, + { + "epoch": 8.02, + "learning_rate": 6.349620253164558e-06, + "loss": 0.0232, + "step": 14925 + }, + { + "epoch": 8.02, + "learning_rate": 6.3432911392405074e-06, + "loss": 0.03, + "step": 14950 + }, + { + "epoch": 8.02, + "learning_rate": 6.337215189873418e-06, + "loss": 0.0252, + "step": 14975 + }, + { + "epoch": 8.02, + "learning_rate": 6.3308860759493675e-06, + "loss": 0.0176, + "step": 15000 + }, + { + "epoch": 8.02, + "eval_loss": 0.2596448063850403, + "eval_runtime": 1417.6481, + "eval_samples_per_second": 7.344, + "eval_steps_per_second": 0.459, + "eval_wer": 28.52241499861786, + "step": 15000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 6.92374092447744e+19, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-15000/training_args.bin b/checkpoint-15000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-15000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-16000/config.json b/checkpoint-16000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-16000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-16000/generation_config.json b/checkpoint-16000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-16000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-16000/optimizer.pt b/checkpoint-16000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..94a2339e61aebe86c8d8bd8774824671e2288f63 --- /dev/null +++ b/checkpoint-16000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a8211392db7458d1420460ffeaba675c14922231c727e6207607173460422dd +size 1934161093 diff --git a/checkpoint-16000/preprocessor_config.json b/checkpoint-16000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-16000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-16000/pytorch_model.bin b/checkpoint-16000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..3323f352c3e715e9d8ce45d53f5f91c166805e79 --- /dev/null +++ b/checkpoint-16000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10388c8bbb381a6eba5508221317200f6f49019abffb04db4882eb63f34b4940 +size 967102601 diff --git a/checkpoint-16000/rng_state.pth b/checkpoint-16000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1ea89c81b9b5a6cb17b855a20074ee5c565c11b8 --- /dev/null +++ b/checkpoint-16000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ca15ef22421ffbd1120d6ea8603c568e6ddea529464b043d491a37f2acdc38a +size 14575 diff --git a/checkpoint-16000/scaler.pt b/checkpoint-16000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..031d3ba6cc8b8a7bab8fb5a8d3e4fd762102ed42 --- /dev/null +++ b/checkpoint-16000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d8e225e4c578565f607837ef256c257ed84a1fe32d9616255c831e94f4322d3 +size 557 diff --git a/checkpoint-16000/scheduler.pt b/checkpoint-16000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..833d1ec136aabfc193ebcb2cdc90b9c853e91eca --- /dev/null +++ b/checkpoint-16000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78bf406c3af91d63a8c32095686e7d8d3aacd8eece7f67a9463b74c7b343e1ae +size 627 diff --git a/checkpoint-16000/trainer_state.json b/checkpoint-16000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ab5d08dd655f8559b96b4e813a9b8883a35a73b1 --- /dev/null +++ b/checkpoint-16000/trainer_state.json @@ -0,0 +1,4000 @@ +{ + "best_metric": 27.36142051090262, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-13000", + "epoch": 9.006025, + "global_step": 16000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + }, + { + "epoch": 7.02, + "learning_rate": 6.830632911392405e-06, + "loss": 0.0159, + "step": 13025 + }, + { + "epoch": 7.02, + "learning_rate": 6.824303797468354e-06, + "loss": 0.0186, + "step": 13050 + }, + { + "epoch": 7.02, + "learning_rate": 6.817974683544304e-06, + "loss": 0.0246, + "step": 13075 + }, + { + "epoch": 7.02, + "learning_rate": 6.811645569620253e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 7.02, + "learning_rate": 6.805316455696203e-06, + "loss": 0.0287, + "step": 13125 + }, + { + "epoch": 7.02, + "learning_rate": 6.798987341772152e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 7.02, + "learning_rate": 6.792658227848102e-06, + "loss": 0.035, + "step": 13175 + }, + { + "epoch": 7.02, + "learning_rate": 6.786329113924052e-06, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 7.02, + "learning_rate": 6.780000000000001e-06, + "loss": 0.0283, + "step": 13225 + }, + { + "epoch": 7.02, + "learning_rate": 6.773670886075951e-06, + "loss": 0.0203, + "step": 13250 + }, + { + "epoch": 7.03, + "learning_rate": 6.7673417721519e-06, + "loss": 0.0193, + "step": 13275 + }, + { + "epoch": 7.03, + "learning_rate": 6.7610126582278494e-06, + "loss": 0.0174, + "step": 13300 + }, + { + "epoch": 7.03, + "learning_rate": 6.754683544303798e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 7.03, + "learning_rate": 6.7483544303797474e-06, + "loss": 0.023, + "step": 13350 + }, + { + "epoch": 7.03, + "learning_rate": 6.742025316455697e-06, + "loss": 0.0266, + "step": 13375 + }, + { + "epoch": 7.03, + "learning_rate": 6.735696202531646e-06, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 7.03, + "learning_rate": 6.729367088607596e-06, + "loss": 0.0164, + "step": 13425 + }, + { + "epoch": 7.03, + "learning_rate": 6.723037974683545e-06, + "loss": 0.019, + "step": 13450 + }, + { + "epoch": 7.03, + "learning_rate": 6.716708860759495e-06, + "loss": 0.0209, + "step": 13475 + }, + { + "epoch": 7.03, + "learning_rate": 6.710379746835444e-06, + "loss": 0.0241, + "step": 13500 + }, + { + "epoch": 7.03, + "learning_rate": 6.7040506329113934e-06, + "loss": 0.0233, + "step": 13525 + }, + { + "epoch": 7.03, + "learning_rate": 6.697721518987342e-06, + "loss": 0.0204, + "step": 13550 + }, + { + "epoch": 7.03, + "learning_rate": 6.6913924050632914e-06, + "loss": 0.0131, + "step": 13575 + }, + { + "epoch": 7.03, + "learning_rate": 6.685063291139241e-06, + "loss": 0.0268, + "step": 13600 + }, + { + "epoch": 7.03, + "learning_rate": 6.67873417721519e-06, + "loss": 0.0156, + "step": 13625 + }, + { + "epoch": 7.03, + "learning_rate": 6.67240506329114e-06, + "loss": 0.0205, + "step": 13650 + }, + { + "epoch": 7.04, + "learning_rate": 6.666075949367089e-06, + "loss": 0.0217, + "step": 13675 + }, + { + "epoch": 7.04, + "learning_rate": 6.659746835443039e-06, + "loss": 0.0258, + "step": 13700 + }, + { + "epoch": 7.04, + "learning_rate": 6.653417721518988e-06, + "loss": 0.0211, + "step": 13725 + }, + { + "epoch": 7.04, + "learning_rate": 6.647088607594937e-06, + "loss": 0.0188, + "step": 13750 + }, + { + "epoch": 7.04, + "learning_rate": 6.640759493670886e-06, + "loss": 0.0234, + "step": 13775 + }, + { + "epoch": 7.04, + "learning_rate": 6.6344303797468355e-06, + "loss": 0.0202, + "step": 13800 + }, + { + "epoch": 7.04, + "learning_rate": 6.628101265822785e-06, + "loss": 0.0251, + "step": 13825 + }, + { + "epoch": 7.04, + "learning_rate": 6.621772151898734e-06, + "loss": 0.017, + "step": 13850 + }, + { + "epoch": 7.04, + "learning_rate": 6.615443037974684e-06, + "loss": 0.0254, + "step": 13875 + }, + { + "epoch": 7.04, + "learning_rate": 6.609113924050633e-06, + "loss": 0.0289, + "step": 13900 + }, + { + "epoch": 7.04, + "learning_rate": 6.602784810126583e-06, + "loss": 0.0238, + "step": 13925 + }, + { + "epoch": 7.04, + "learning_rate": 6.596455696202532e-06, + "loss": 0.0218, + "step": 13950 + }, + { + "epoch": 7.04, + "learning_rate": 6.590126582278481e-06, + "loss": 0.0289, + "step": 13975 + }, + { + "epoch": 7.04, + "learning_rate": 6.583797468354432e-06, + "loss": 0.0271, + "step": 14000 + }, + { + "epoch": 7.04, + "eval_loss": 0.26684999465942383, + "eval_runtime": 1448.0228, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "eval_wer": 42.464105107400115, + "step": 14000 + }, + { + "epoch": 8.0, + "learning_rate": 6.577468354430381e-06, + "loss": 0.0292, + "step": 14025 + }, + { + "epoch": 8.0, + "learning_rate": 6.57113924050633e-06, + "loss": 0.0192, + "step": 14050 + }, + { + "epoch": 8.0, + "learning_rate": 6.564810126582279e-06, + "loss": 0.027, + "step": 14075 + }, + { + "epoch": 8.0, + "learning_rate": 6.558481012658229e-06, + "loss": 0.03, + "step": 14100 + }, + { + "epoch": 8.0, + "learning_rate": 6.552151898734178e-06, + "loss": 0.0375, + "step": 14125 + }, + { + "epoch": 8.0, + "learning_rate": 6.5458227848101274e-06, + "loss": 0.0338, + "step": 14150 + }, + { + "epoch": 8.0, + "learning_rate": 6.539493670886077e-06, + "loss": 0.0305, + "step": 14175 + }, + { + "epoch": 8.0, + "learning_rate": 6.533164556962026e-06, + "loss": 0.0266, + "step": 14200 + }, + { + "epoch": 8.01, + "learning_rate": 6.526835443037976e-06, + "loss": 0.0378, + "step": 14225 + }, + { + "epoch": 8.01, + "learning_rate": 6.520506329113925e-06, + "loss": 0.0256, + "step": 14250 + }, + { + "epoch": 8.01, + "learning_rate": 6.514177215189874e-06, + "loss": 0.0203, + "step": 14275 + }, + { + "epoch": 8.01, + "learning_rate": 6.507848101265823e-06, + "loss": 0.0212, + "step": 14300 + }, + { + "epoch": 8.01, + "learning_rate": 6.501518987341773e-06, + "loss": 0.0204, + "step": 14325 + }, + { + "epoch": 8.01, + "learning_rate": 6.495189873417722e-06, + "loss": 0.0259, + "step": 14350 + }, + { + "epoch": 8.01, + "learning_rate": 6.4888607594936714e-06, + "loss": 0.0214, + "step": 14375 + }, + { + "epoch": 8.01, + "learning_rate": 6.482531645569621e-06, + "loss": 0.021, + "step": 14400 + }, + { + "epoch": 8.01, + "learning_rate": 6.47620253164557e-06, + "loss": 0.0241, + "step": 14425 + }, + { + "epoch": 8.01, + "learning_rate": 6.46987341772152e-06, + "loss": 0.0216, + "step": 14450 + }, + { + "epoch": 8.01, + "learning_rate": 6.463544303797469e-06, + "loss": 0.0209, + "step": 14475 + }, + { + "epoch": 8.01, + "learning_rate": 6.457215189873418e-06, + "loss": 0.0292, + "step": 14500 + }, + { + "epoch": 8.01, + "learning_rate": 6.450886075949367e-06, + "loss": 0.0286, + "step": 14525 + }, + { + "epoch": 8.01, + "learning_rate": 6.444556962025317e-06, + "loss": 0.0218, + "step": 14550 + }, + { + "epoch": 8.01, + "learning_rate": 6.438227848101266e-06, + "loss": 0.0209, + "step": 14575 + }, + { + "epoch": 8.01, + "learning_rate": 6.4318987341772154e-06, + "loss": 0.0244, + "step": 14600 + }, + { + "epoch": 8.02, + "learning_rate": 6.425569620253165e-06, + "loss": 0.0212, + "step": 14625 + }, + { + "epoch": 8.02, + "learning_rate": 6.419240506329114e-06, + "loss": 0.0155, + "step": 14650 + }, + { + "epoch": 8.02, + "learning_rate": 6.412911392405064e-06, + "loss": 0.0209, + "step": 14675 + }, + { + "epoch": 8.02, + "learning_rate": 6.406582278481013e-06, + "loss": 0.0203, + "step": 14700 + }, + { + "epoch": 8.02, + "learning_rate": 6.400253164556962e-06, + "loss": 0.0192, + "step": 14725 + }, + { + "epoch": 8.02, + "learning_rate": 6.393924050632911e-06, + "loss": 0.0216, + "step": 14750 + }, + { + "epoch": 8.02, + "learning_rate": 6.387594936708861e-06, + "loss": 0.012, + "step": 14775 + }, + { + "epoch": 8.02, + "learning_rate": 6.38126582278481e-06, + "loss": 0.0241, + "step": 14800 + }, + { + "epoch": 8.02, + "learning_rate": 6.37493670886076e-06, + "loss": 0.0192, + "step": 14825 + }, + { + "epoch": 8.02, + "learning_rate": 6.36860759493671e-06, + "loss": 0.0202, + "step": 14850 + }, + { + "epoch": 8.02, + "learning_rate": 6.362278481012659e-06, + "loss": 0.0257, + "step": 14875 + }, + { + "epoch": 8.02, + "learning_rate": 6.3559493670886086e-06, + "loss": 0.0218, + "step": 14900 + }, + { + "epoch": 8.02, + "learning_rate": 6.349620253164558e-06, + "loss": 0.0232, + "step": 14925 + }, + { + "epoch": 8.02, + "learning_rate": 6.3432911392405074e-06, + "loss": 0.03, + "step": 14950 + }, + { + "epoch": 8.02, + "learning_rate": 6.337215189873418e-06, + "loss": 0.0252, + "step": 14975 + }, + { + "epoch": 8.02, + "learning_rate": 6.3308860759493675e-06, + "loss": 0.0176, + "step": 15000 + }, + { + "epoch": 8.02, + "eval_loss": 0.2596448063850403, + "eval_runtime": 1417.6481, + "eval_samples_per_second": 7.344, + "eval_steps_per_second": 0.459, + "eval_wer": 28.52241499861786, + "step": 15000 + }, + { + "epoch": 8.03, + "learning_rate": 6.324556962025317e-06, + "loss": 0.0166, + "step": 15025 + }, + { + "epoch": 8.03, + "learning_rate": 6.318227848101266e-06, + "loss": 0.017, + "step": 15050 + }, + { + "epoch": 8.03, + "learning_rate": 6.311898734177216e-06, + "loss": 0.0291, + "step": 15075 + }, + { + "epoch": 8.03, + "learning_rate": 6.305569620253164e-06, + "loss": 0.0149, + "step": 15100 + }, + { + "epoch": 8.03, + "learning_rate": 6.299240506329114e-06, + "loss": 0.0223, + "step": 15125 + }, + { + "epoch": 8.03, + "learning_rate": 6.292911392405063e-06, + "loss": 0.0212, + "step": 15150 + }, + { + "epoch": 8.03, + "learning_rate": 6.286582278481013e-06, + "loss": 0.0125, + "step": 15175 + }, + { + "epoch": 8.03, + "learning_rate": 6.280253164556962e-06, + "loss": 0.0169, + "step": 15200 + }, + { + "epoch": 8.03, + "learning_rate": 6.273924050632912e-06, + "loss": 0.0244, + "step": 15225 + }, + { + "epoch": 8.03, + "learning_rate": 6.267594936708862e-06, + "loss": 0.015, + "step": 15250 + }, + { + "epoch": 8.03, + "learning_rate": 6.261265822784811e-06, + "loss": 0.0171, + "step": 15275 + }, + { + "epoch": 8.03, + "learning_rate": 6.254936708860761e-06, + "loss": 0.017, + "step": 15300 + }, + { + "epoch": 8.03, + "learning_rate": 6.24860759493671e-06, + "loss": 0.0162, + "step": 15325 + }, + { + "epoch": 8.03, + "learning_rate": 6.2422784810126594e-06, + "loss": 0.0133, + "step": 15350 + }, + { + "epoch": 8.03, + "learning_rate": 6.235949367088608e-06, + "loss": 0.0139, + "step": 15375 + }, + { + "epoch": 8.03, + "learning_rate": 6.2296202531645575e-06, + "loss": 0.0177, + "step": 15400 + }, + { + "epoch": 8.04, + "learning_rate": 6.223291139240507e-06, + "loss": 0.0207, + "step": 15425 + }, + { + "epoch": 8.04, + "learning_rate": 6.216962025316456e-06, + "loss": 0.0192, + "step": 15450 + }, + { + "epoch": 8.04, + "learning_rate": 6.210632911392406e-06, + "loss": 0.0176, + "step": 15475 + }, + { + "epoch": 8.04, + "learning_rate": 6.204303797468355e-06, + "loss": 0.0133, + "step": 15500 + }, + { + "epoch": 8.04, + "learning_rate": 6.197974683544305e-06, + "loss": 0.0199, + "step": 15525 + }, + { + "epoch": 8.04, + "learning_rate": 6.191645569620254e-06, + "loss": 0.0135, + "step": 15550 + }, + { + "epoch": 8.04, + "learning_rate": 6.1853164556962035e-06, + "loss": 0.0121, + "step": 15575 + }, + { + "epoch": 8.04, + "learning_rate": 6.178987341772152e-06, + "loss": 0.0152, + "step": 15600 + }, + { + "epoch": 8.04, + "learning_rate": 6.1726582278481015e-06, + "loss": 0.0201, + "step": 15625 + }, + { + "epoch": 8.04, + "learning_rate": 6.166329113924051e-06, + "loss": 0.0226, + "step": 15650 + }, + { + "epoch": 8.04, + "learning_rate": 6.16e-06, + "loss": 0.0222, + "step": 15675 + }, + { + "epoch": 8.04, + "learning_rate": 6.15367088607595e-06, + "loss": 0.0184, + "step": 15700 + }, + { + "epoch": 8.04, + "learning_rate": 6.147341772151899e-06, + "loss": 0.0219, + "step": 15725 + }, + { + "epoch": 8.04, + "learning_rate": 6.141012658227849e-06, + "loss": 0.0219, + "step": 15750 + }, + { + "epoch": 9.0, + "learning_rate": 6.134683544303798e-06, + "loss": 0.0208, + "step": 15775 + }, + { + "epoch": 9.0, + "learning_rate": 6.1283544303797475e-06, + "loss": 0.0143, + "step": 15800 + }, + { + "epoch": 9.0, + "learning_rate": 6.122025316455696e-06, + "loss": 0.0254, + "step": 15825 + }, + { + "epoch": 9.0, + "learning_rate": 6.1156962025316455e-06, + "loss": 0.0287, + "step": 15850 + }, + { + "epoch": 9.0, + "learning_rate": 6.109367088607595e-06, + "loss": 0.0252, + "step": 15875 + }, + { + "epoch": 9.0, + "learning_rate": 6.103037974683544e-06, + "loss": 0.0275, + "step": 15900 + }, + { + "epoch": 9.0, + "learning_rate": 6.096708860759494e-06, + "loss": 0.0193, + "step": 15925 + }, + { + "epoch": 9.0, + "learning_rate": 6.090379746835443e-06, + "loss": 0.0255, + "step": 15950 + }, + { + "epoch": 9.01, + "learning_rate": 6.084050632911393e-06, + "loss": 0.0248, + "step": 15975 + }, + { + "epoch": 9.01, + "learning_rate": 6.077721518987342e-06, + "loss": 0.0328, + "step": 16000 + }, + { + "epoch": 9.01, + "eval_loss": 0.2512108087539673, + "eval_runtime": 1474.2883, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.442, + "eval_wer": 35.66236849379665, + "step": 16000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 7.38518897958912e+19, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-16000/training_args.bin b/checkpoint-16000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-16000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-17000/config.json b/checkpoint-17000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-17000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-17000/generation_config.json b/checkpoint-17000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-17000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-17000/optimizer.pt b/checkpoint-17000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f95d2b907df1eaa0061e37266b666da781222e0 --- /dev/null +++ b/checkpoint-17000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2568f46ac1bc67a1fa97f9372ac56c5ced59fa82fc4ffd2c6493bbd2e52d01d2 +size 1934161093 diff --git a/checkpoint-17000/preprocessor_config.json b/checkpoint-17000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-17000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-17000/pytorch_model.bin b/checkpoint-17000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a37c41a69ec641f2290cf80feaa3f1e4ae65140d --- /dev/null +++ b/checkpoint-17000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a8d1839aeaf43185526829aa2810c91fd34bb4d0a3b7851c40e18f17ee72eeb +size 967102601 diff --git a/checkpoint-17000/rng_state.pth b/checkpoint-17000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1b8f3547226f937b37e39e5bb24e5440e2d1b217 --- /dev/null +++ b/checkpoint-17000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb92150580faeb60c507a92856a57f097c326158bb93726190bd617108aea6d6 +size 14575 diff --git a/checkpoint-17000/scaler.pt b/checkpoint-17000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f458b3f08c1b10917e5ca964a6e0b36831f69ffd --- /dev/null +++ b/checkpoint-17000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:699191d53806d4bc4c424dbdb3778c68df1035c0eb39024cc8bbd51db337bea4 +size 557 diff --git a/checkpoint-17000/scheduler.pt b/checkpoint-17000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a67edbf6595f878fbad39c8592996de6f64adf95 --- /dev/null +++ b/checkpoint-17000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9e4a04ff509d31280d033157e3e8448fc9b0005cecb062c721b3afcf8f4d5c5 +size 627 diff --git a/checkpoint-17000/trainer_state.json b/checkpoint-17000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..15550a42354d40bfaf5a70ad80f4714a1962a440 --- /dev/null +++ b/checkpoint-17000/trainer_state.json @@ -0,0 +1,4249 @@ +{ + "best_metric": 25.920746678807788, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-17000", + "epoch": 9.031025, + "global_step": 17000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + }, + { + "epoch": 7.02, + "learning_rate": 6.830632911392405e-06, + "loss": 0.0159, + "step": 13025 + }, + { + "epoch": 7.02, + "learning_rate": 6.824303797468354e-06, + "loss": 0.0186, + "step": 13050 + }, + { + "epoch": 7.02, + "learning_rate": 6.817974683544304e-06, + "loss": 0.0246, + "step": 13075 + }, + { + "epoch": 7.02, + "learning_rate": 6.811645569620253e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 7.02, + "learning_rate": 6.805316455696203e-06, + "loss": 0.0287, + "step": 13125 + }, + { + "epoch": 7.02, + "learning_rate": 6.798987341772152e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 7.02, + "learning_rate": 6.792658227848102e-06, + "loss": 0.035, + "step": 13175 + }, + { + "epoch": 7.02, + "learning_rate": 6.786329113924052e-06, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 7.02, + "learning_rate": 6.780000000000001e-06, + "loss": 0.0283, + "step": 13225 + }, + { + "epoch": 7.02, + "learning_rate": 6.773670886075951e-06, + "loss": 0.0203, + "step": 13250 + }, + { + "epoch": 7.03, + "learning_rate": 6.7673417721519e-06, + "loss": 0.0193, + "step": 13275 + }, + { + "epoch": 7.03, + "learning_rate": 6.7610126582278494e-06, + "loss": 0.0174, + "step": 13300 + }, + { + "epoch": 7.03, + "learning_rate": 6.754683544303798e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 7.03, + "learning_rate": 6.7483544303797474e-06, + "loss": 0.023, + "step": 13350 + }, + { + "epoch": 7.03, + "learning_rate": 6.742025316455697e-06, + "loss": 0.0266, + "step": 13375 + }, + { + "epoch": 7.03, + "learning_rate": 6.735696202531646e-06, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 7.03, + "learning_rate": 6.729367088607596e-06, + "loss": 0.0164, + "step": 13425 + }, + { + "epoch": 7.03, + "learning_rate": 6.723037974683545e-06, + "loss": 0.019, + "step": 13450 + }, + { + "epoch": 7.03, + "learning_rate": 6.716708860759495e-06, + "loss": 0.0209, + "step": 13475 + }, + { + "epoch": 7.03, + "learning_rate": 6.710379746835444e-06, + "loss": 0.0241, + "step": 13500 + }, + { + "epoch": 7.03, + "learning_rate": 6.7040506329113934e-06, + "loss": 0.0233, + "step": 13525 + }, + { + "epoch": 7.03, + "learning_rate": 6.697721518987342e-06, + "loss": 0.0204, + "step": 13550 + }, + { + "epoch": 7.03, + "learning_rate": 6.6913924050632914e-06, + "loss": 0.0131, + "step": 13575 + }, + { + "epoch": 7.03, + "learning_rate": 6.685063291139241e-06, + "loss": 0.0268, + "step": 13600 + }, + { + "epoch": 7.03, + "learning_rate": 6.67873417721519e-06, + "loss": 0.0156, + "step": 13625 + }, + { + "epoch": 7.03, + "learning_rate": 6.67240506329114e-06, + "loss": 0.0205, + "step": 13650 + }, + { + "epoch": 7.04, + "learning_rate": 6.666075949367089e-06, + "loss": 0.0217, + "step": 13675 + }, + { + "epoch": 7.04, + "learning_rate": 6.659746835443039e-06, + "loss": 0.0258, + "step": 13700 + }, + { + "epoch": 7.04, + "learning_rate": 6.653417721518988e-06, + "loss": 0.0211, + "step": 13725 + }, + { + "epoch": 7.04, + "learning_rate": 6.647088607594937e-06, + "loss": 0.0188, + "step": 13750 + }, + { + "epoch": 7.04, + "learning_rate": 6.640759493670886e-06, + "loss": 0.0234, + "step": 13775 + }, + { + "epoch": 7.04, + "learning_rate": 6.6344303797468355e-06, + "loss": 0.0202, + "step": 13800 + }, + { + "epoch": 7.04, + "learning_rate": 6.628101265822785e-06, + "loss": 0.0251, + "step": 13825 + }, + { + "epoch": 7.04, + "learning_rate": 6.621772151898734e-06, + "loss": 0.017, + "step": 13850 + }, + { + "epoch": 7.04, + "learning_rate": 6.615443037974684e-06, + "loss": 0.0254, + "step": 13875 + }, + { + "epoch": 7.04, + "learning_rate": 6.609113924050633e-06, + "loss": 0.0289, + "step": 13900 + }, + { + "epoch": 7.04, + "learning_rate": 6.602784810126583e-06, + "loss": 0.0238, + "step": 13925 + }, + { + "epoch": 7.04, + "learning_rate": 6.596455696202532e-06, + "loss": 0.0218, + "step": 13950 + }, + { + "epoch": 7.04, + "learning_rate": 6.590126582278481e-06, + "loss": 0.0289, + "step": 13975 + }, + { + "epoch": 7.04, + "learning_rate": 6.583797468354432e-06, + "loss": 0.0271, + "step": 14000 + }, + { + "epoch": 7.04, + "eval_loss": 0.26684999465942383, + "eval_runtime": 1448.0228, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "eval_wer": 42.464105107400115, + "step": 14000 + }, + { + "epoch": 8.0, + "learning_rate": 6.577468354430381e-06, + "loss": 0.0292, + "step": 14025 + }, + { + "epoch": 8.0, + "learning_rate": 6.57113924050633e-06, + "loss": 0.0192, + "step": 14050 + }, + { + "epoch": 8.0, + "learning_rate": 6.564810126582279e-06, + "loss": 0.027, + "step": 14075 + }, + { + "epoch": 8.0, + "learning_rate": 6.558481012658229e-06, + "loss": 0.03, + "step": 14100 + }, + { + "epoch": 8.0, + "learning_rate": 6.552151898734178e-06, + "loss": 0.0375, + "step": 14125 + }, + { + "epoch": 8.0, + "learning_rate": 6.5458227848101274e-06, + "loss": 0.0338, + "step": 14150 + }, + { + "epoch": 8.0, + "learning_rate": 6.539493670886077e-06, + "loss": 0.0305, + "step": 14175 + }, + { + "epoch": 8.0, + "learning_rate": 6.533164556962026e-06, + "loss": 0.0266, + "step": 14200 + }, + { + "epoch": 8.01, + "learning_rate": 6.526835443037976e-06, + "loss": 0.0378, + "step": 14225 + }, + { + "epoch": 8.01, + "learning_rate": 6.520506329113925e-06, + "loss": 0.0256, + "step": 14250 + }, + { + "epoch": 8.01, + "learning_rate": 6.514177215189874e-06, + "loss": 0.0203, + "step": 14275 + }, + { + "epoch": 8.01, + "learning_rate": 6.507848101265823e-06, + "loss": 0.0212, + "step": 14300 + }, + { + "epoch": 8.01, + "learning_rate": 6.501518987341773e-06, + "loss": 0.0204, + "step": 14325 + }, + { + "epoch": 8.01, + "learning_rate": 6.495189873417722e-06, + "loss": 0.0259, + "step": 14350 + }, + { + "epoch": 8.01, + "learning_rate": 6.4888607594936714e-06, + "loss": 0.0214, + "step": 14375 + }, + { + "epoch": 8.01, + "learning_rate": 6.482531645569621e-06, + "loss": 0.021, + "step": 14400 + }, + { + "epoch": 8.01, + "learning_rate": 6.47620253164557e-06, + "loss": 0.0241, + "step": 14425 + }, + { + "epoch": 8.01, + "learning_rate": 6.46987341772152e-06, + "loss": 0.0216, + "step": 14450 + }, + { + "epoch": 8.01, + "learning_rate": 6.463544303797469e-06, + "loss": 0.0209, + "step": 14475 + }, + { + "epoch": 8.01, + "learning_rate": 6.457215189873418e-06, + "loss": 0.0292, + "step": 14500 + }, + { + "epoch": 8.01, + "learning_rate": 6.450886075949367e-06, + "loss": 0.0286, + "step": 14525 + }, + { + "epoch": 8.01, + "learning_rate": 6.444556962025317e-06, + "loss": 0.0218, + "step": 14550 + }, + { + "epoch": 8.01, + "learning_rate": 6.438227848101266e-06, + "loss": 0.0209, + "step": 14575 + }, + { + "epoch": 8.01, + "learning_rate": 6.4318987341772154e-06, + "loss": 0.0244, + "step": 14600 + }, + { + "epoch": 8.02, + "learning_rate": 6.425569620253165e-06, + "loss": 0.0212, + "step": 14625 + }, + { + "epoch": 8.02, + "learning_rate": 6.419240506329114e-06, + "loss": 0.0155, + "step": 14650 + }, + { + "epoch": 8.02, + "learning_rate": 6.412911392405064e-06, + "loss": 0.0209, + "step": 14675 + }, + { + "epoch": 8.02, + "learning_rate": 6.406582278481013e-06, + "loss": 0.0203, + "step": 14700 + }, + { + "epoch": 8.02, + "learning_rate": 6.400253164556962e-06, + "loss": 0.0192, + "step": 14725 + }, + { + "epoch": 8.02, + "learning_rate": 6.393924050632911e-06, + "loss": 0.0216, + "step": 14750 + }, + { + "epoch": 8.02, + "learning_rate": 6.387594936708861e-06, + "loss": 0.012, + "step": 14775 + }, + { + "epoch": 8.02, + "learning_rate": 6.38126582278481e-06, + "loss": 0.0241, + "step": 14800 + }, + { + "epoch": 8.02, + "learning_rate": 6.37493670886076e-06, + "loss": 0.0192, + "step": 14825 + }, + { + "epoch": 8.02, + "learning_rate": 6.36860759493671e-06, + "loss": 0.0202, + "step": 14850 + }, + { + "epoch": 8.02, + "learning_rate": 6.362278481012659e-06, + "loss": 0.0257, + "step": 14875 + }, + { + "epoch": 8.02, + "learning_rate": 6.3559493670886086e-06, + "loss": 0.0218, + "step": 14900 + }, + { + "epoch": 8.02, + "learning_rate": 6.349620253164558e-06, + "loss": 0.0232, + "step": 14925 + }, + { + "epoch": 8.02, + "learning_rate": 6.3432911392405074e-06, + "loss": 0.03, + "step": 14950 + }, + { + "epoch": 8.02, + "learning_rate": 6.337215189873418e-06, + "loss": 0.0252, + "step": 14975 + }, + { + "epoch": 8.02, + "learning_rate": 6.3308860759493675e-06, + "loss": 0.0176, + "step": 15000 + }, + { + "epoch": 8.02, + "eval_loss": 0.2596448063850403, + "eval_runtime": 1417.6481, + "eval_samples_per_second": 7.344, + "eval_steps_per_second": 0.459, + "eval_wer": 28.52241499861786, + "step": 15000 + }, + { + "epoch": 8.03, + "learning_rate": 6.324556962025317e-06, + "loss": 0.0166, + "step": 15025 + }, + { + "epoch": 8.03, + "learning_rate": 6.318227848101266e-06, + "loss": 0.017, + "step": 15050 + }, + { + "epoch": 8.03, + "learning_rate": 6.311898734177216e-06, + "loss": 0.0291, + "step": 15075 + }, + { + "epoch": 8.03, + "learning_rate": 6.305569620253164e-06, + "loss": 0.0149, + "step": 15100 + }, + { + "epoch": 8.03, + "learning_rate": 6.299240506329114e-06, + "loss": 0.0223, + "step": 15125 + }, + { + "epoch": 8.03, + "learning_rate": 6.292911392405063e-06, + "loss": 0.0212, + "step": 15150 + }, + { + "epoch": 8.03, + "learning_rate": 6.286582278481013e-06, + "loss": 0.0125, + "step": 15175 + }, + { + "epoch": 8.03, + "learning_rate": 6.280253164556962e-06, + "loss": 0.0169, + "step": 15200 + }, + { + "epoch": 8.03, + "learning_rate": 6.273924050632912e-06, + "loss": 0.0244, + "step": 15225 + }, + { + "epoch": 8.03, + "learning_rate": 6.267594936708862e-06, + "loss": 0.015, + "step": 15250 + }, + { + "epoch": 8.03, + "learning_rate": 6.261265822784811e-06, + "loss": 0.0171, + "step": 15275 + }, + { + "epoch": 8.03, + "learning_rate": 6.254936708860761e-06, + "loss": 0.017, + "step": 15300 + }, + { + "epoch": 8.03, + "learning_rate": 6.24860759493671e-06, + "loss": 0.0162, + "step": 15325 + }, + { + "epoch": 8.03, + "learning_rate": 6.2422784810126594e-06, + "loss": 0.0133, + "step": 15350 + }, + { + "epoch": 8.03, + "learning_rate": 6.235949367088608e-06, + "loss": 0.0139, + "step": 15375 + }, + { + "epoch": 8.03, + "learning_rate": 6.2296202531645575e-06, + "loss": 0.0177, + "step": 15400 + }, + { + "epoch": 8.04, + "learning_rate": 6.223291139240507e-06, + "loss": 0.0207, + "step": 15425 + }, + { + "epoch": 8.04, + "learning_rate": 6.216962025316456e-06, + "loss": 0.0192, + "step": 15450 + }, + { + "epoch": 8.04, + "learning_rate": 6.210632911392406e-06, + "loss": 0.0176, + "step": 15475 + }, + { + "epoch": 8.04, + "learning_rate": 6.204303797468355e-06, + "loss": 0.0133, + "step": 15500 + }, + { + "epoch": 8.04, + "learning_rate": 6.197974683544305e-06, + "loss": 0.0199, + "step": 15525 + }, + { + "epoch": 8.04, + "learning_rate": 6.191645569620254e-06, + "loss": 0.0135, + "step": 15550 + }, + { + "epoch": 8.04, + "learning_rate": 6.1853164556962035e-06, + "loss": 0.0121, + "step": 15575 + }, + { + "epoch": 8.04, + "learning_rate": 6.178987341772152e-06, + "loss": 0.0152, + "step": 15600 + }, + { + "epoch": 8.04, + "learning_rate": 6.1726582278481015e-06, + "loss": 0.0201, + "step": 15625 + }, + { + "epoch": 8.04, + "learning_rate": 6.166329113924051e-06, + "loss": 0.0226, + "step": 15650 + }, + { + "epoch": 8.04, + "learning_rate": 6.16e-06, + "loss": 0.0222, + "step": 15675 + }, + { + "epoch": 8.04, + "learning_rate": 6.15367088607595e-06, + "loss": 0.0184, + "step": 15700 + }, + { + "epoch": 8.04, + "learning_rate": 6.147341772151899e-06, + "loss": 0.0219, + "step": 15725 + }, + { + "epoch": 8.04, + "learning_rate": 6.141012658227849e-06, + "loss": 0.0219, + "step": 15750 + }, + { + "epoch": 9.0, + "learning_rate": 6.134683544303798e-06, + "loss": 0.0208, + "step": 15775 + }, + { + "epoch": 9.0, + "learning_rate": 6.1283544303797475e-06, + "loss": 0.0143, + "step": 15800 + }, + { + "epoch": 9.0, + "learning_rate": 6.122025316455696e-06, + "loss": 0.0254, + "step": 15825 + }, + { + "epoch": 9.0, + "learning_rate": 6.1156962025316455e-06, + "loss": 0.0287, + "step": 15850 + }, + { + "epoch": 9.0, + "learning_rate": 6.109367088607595e-06, + "loss": 0.0252, + "step": 15875 + }, + { + "epoch": 9.0, + "learning_rate": 6.103037974683544e-06, + "loss": 0.0275, + "step": 15900 + }, + { + "epoch": 9.0, + "learning_rate": 6.096708860759494e-06, + "loss": 0.0193, + "step": 15925 + }, + { + "epoch": 9.0, + "learning_rate": 6.090379746835443e-06, + "loss": 0.0255, + "step": 15950 + }, + { + "epoch": 9.01, + "learning_rate": 6.084050632911393e-06, + "loss": 0.0248, + "step": 15975 + }, + { + "epoch": 9.01, + "learning_rate": 6.077721518987342e-06, + "loss": 0.0328, + "step": 16000 + }, + { + "epoch": 9.01, + "eval_loss": 0.2512108087539673, + "eval_runtime": 1474.2883, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.442, + "eval_wer": 35.66236849379665, + "step": 16000 + }, + { + "epoch": 9.01, + "learning_rate": 6.0713924050632915e-06, + "loss": 0.0181, + "step": 16025 + }, + { + "epoch": 9.01, + "learning_rate": 6.065063291139242e-06, + "loss": 0.0134, + "step": 16050 + }, + { + "epoch": 9.01, + "learning_rate": 6.058734177215191e-06, + "loss": 0.0203, + "step": 16075 + }, + { + "epoch": 9.01, + "learning_rate": 6.052405063291141e-06, + "loss": 0.0224, + "step": 16100 + }, + { + "epoch": 9.01, + "learning_rate": 6.046075949367089e-06, + "loss": 0.0164, + "step": 16125 + }, + { + "epoch": 9.01, + "learning_rate": 6.039746835443039e-06, + "loss": 0.019, + "step": 16150 + }, + { + "epoch": 9.01, + "learning_rate": 6.033417721518988e-06, + "loss": 0.0152, + "step": 16175 + }, + { + "epoch": 9.01, + "learning_rate": 6.0270886075949374e-06, + "loss": 0.0197, + "step": 16200 + }, + { + "epoch": 9.01, + "learning_rate": 6.020759493670887e-06, + "loss": 0.0188, + "step": 16225 + }, + { + "epoch": 9.01, + "learning_rate": 6.014430379746836e-06, + "loss": 0.0245, + "step": 16250 + }, + { + "epoch": 9.01, + "learning_rate": 6.008101265822786e-06, + "loss": 0.0202, + "step": 16275 + }, + { + "epoch": 9.01, + "learning_rate": 6.001772151898735e-06, + "loss": 0.0184, + "step": 16300 + }, + { + "epoch": 9.01, + "learning_rate": 5.995443037974685e-06, + "loss": 0.0158, + "step": 16325 + }, + { + "epoch": 9.01, + "learning_rate": 5.989113924050633e-06, + "loss": 0.0175, + "step": 16350 + }, + { + "epoch": 9.02, + "learning_rate": 5.982784810126583e-06, + "loss": 0.014, + "step": 16375 + }, + { + "epoch": 9.02, + "learning_rate": 5.976455696202532e-06, + "loss": 0.0149, + "step": 16400 + }, + { + "epoch": 9.02, + "learning_rate": 5.9701265822784815e-06, + "loss": 0.0188, + "step": 16425 + }, + { + "epoch": 9.02, + "learning_rate": 5.963797468354431e-06, + "loss": 0.0215, + "step": 16450 + }, + { + "epoch": 9.02, + "learning_rate": 5.95746835443038e-06, + "loss": 0.017, + "step": 16475 + }, + { + "epoch": 9.02, + "learning_rate": 5.95113924050633e-06, + "loss": 0.0095, + "step": 16500 + }, + { + "epoch": 9.02, + "learning_rate": 5.944810126582279e-06, + "loss": 0.0112, + "step": 16525 + }, + { + "epoch": 9.02, + "learning_rate": 5.938481012658229e-06, + "loss": 0.0198, + "step": 16550 + }, + { + "epoch": 9.02, + "learning_rate": 5.932151898734177e-06, + "loss": 0.0252, + "step": 16575 + }, + { + "epoch": 9.02, + "learning_rate": 5.925822784810127e-06, + "loss": 0.0226, + "step": 16600 + }, + { + "epoch": 9.02, + "learning_rate": 5.919493670886076e-06, + "loss": 0.0167, + "step": 16625 + }, + { + "epoch": 9.02, + "learning_rate": 5.9131645569620255e-06, + "loss": 0.0259, + "step": 16650 + }, + { + "epoch": 9.02, + "learning_rate": 5.906835443037975e-06, + "loss": 0.0229, + "step": 16675 + }, + { + "epoch": 9.02, + "learning_rate": 5.900506329113924e-06, + "loss": 0.0238, + "step": 16700 + }, + { + "epoch": 9.02, + "learning_rate": 5.894177215189874e-06, + "loss": 0.02, + "step": 16725 + }, + { + "epoch": 9.02, + "learning_rate": 5.887848101265823e-06, + "loss": 0.0182, + "step": 16750 + }, + { + "epoch": 9.03, + "learning_rate": 5.881518987341773e-06, + "loss": 0.0157, + "step": 16775 + }, + { + "epoch": 9.03, + "learning_rate": 5.875189873417721e-06, + "loss": 0.0145, + "step": 16800 + }, + { + "epoch": 9.03, + "learning_rate": 5.868860759493671e-06, + "loss": 0.0166, + "step": 16825 + }, + { + "epoch": 9.03, + "learning_rate": 5.86253164556962e-06, + "loss": 0.0108, + "step": 16850 + }, + { + "epoch": 9.03, + "learning_rate": 5.85620253164557e-06, + "loss": 0.014, + "step": 16875 + }, + { + "epoch": 9.03, + "learning_rate": 5.84987341772152e-06, + "loss": 0.0167, + "step": 16900 + }, + { + "epoch": 9.03, + "learning_rate": 5.843544303797469e-06, + "loss": 0.0121, + "step": 16925 + }, + { + "epoch": 9.03, + "learning_rate": 5.837215189873419e-06, + "loss": 0.0149, + "step": 16950 + }, + { + "epoch": 9.03, + "learning_rate": 5.831139240506329e-06, + "loss": 0.0143, + "step": 16975 + }, + { + "epoch": 9.03, + "learning_rate": 5.824810126582279e-06, + "loss": 0.0139, + "step": 17000 + }, + { + "epoch": 9.03, + "eval_loss": 0.2688957154750824, + "eval_runtime": 1423.1196, + "eval_samples_per_second": 7.316, + "eval_steps_per_second": 0.457, + "eval_wer": 25.920746678807788, + "step": 17000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 7.84692562010112e+19, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-17000/training_args.bin b/checkpoint-17000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-17000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-18000/config.json b/checkpoint-18000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-18000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-18000/generation_config.json b/checkpoint-18000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-18000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-18000/optimizer.pt b/checkpoint-18000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9531ea198f4ab244853fb124a74c0a3c60e670cf --- /dev/null +++ b/checkpoint-18000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02fae2439698d58bb543f1046e4c9883a646be4029aaf6fd637c5b0ab1bb7e7d +size 1934161093 diff --git a/checkpoint-18000/preprocessor_config.json b/checkpoint-18000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-18000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-18000/pytorch_model.bin b/checkpoint-18000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..31960d3d20a4b15895aa955654713a1089d0fc56 --- /dev/null +++ b/checkpoint-18000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a976d6945de5467a01decc1fa7b9dd5fc3a6a82d9bb13280d3353966e2130409 +size 967102601 diff --git a/checkpoint-18000/rng_state.pth b/checkpoint-18000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5867b3970fc4de99292512157edc903fcefc3d19 --- /dev/null +++ b/checkpoint-18000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:112aa39ef12908b17af199483216518ca704dff3e728f22495d9e6dd3a82fdca +size 14639 diff --git a/checkpoint-18000/scaler.pt b/checkpoint-18000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c1c78a645a2588c6a8fbf7d71e621a99d36b593 --- /dev/null +++ b/checkpoint-18000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c11c81816fb8ec7752fb74e87f977b342d6f51f1b7d860c67e4a29e97b32a487 +size 557 diff --git a/checkpoint-18000/scheduler.pt b/checkpoint-18000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..070532dc983b97f206d86cbcedd57b2953d45ab6 --- /dev/null +++ b/checkpoint-18000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b135f9cfe531e65d73607f1f7be5288eff93a7467e6550093697883d486be835 +size 627 diff --git a/checkpoint-18000/trainer_state.json b/checkpoint-18000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..827089598b1830185feacc4b3eeb5d5bce30ce74 --- /dev/null +++ b/checkpoint-18000/trainer_state.json @@ -0,0 +1,4498 @@ +{ + "best_metric": 25.330493178750874, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-18000", + "epoch": 10.01225, + "global_step": 18000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + }, + { + "epoch": 7.02, + "learning_rate": 6.830632911392405e-06, + "loss": 0.0159, + "step": 13025 + }, + { + "epoch": 7.02, + "learning_rate": 6.824303797468354e-06, + "loss": 0.0186, + "step": 13050 + }, + { + "epoch": 7.02, + "learning_rate": 6.817974683544304e-06, + "loss": 0.0246, + "step": 13075 + }, + { + "epoch": 7.02, + "learning_rate": 6.811645569620253e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 7.02, + "learning_rate": 6.805316455696203e-06, + "loss": 0.0287, + "step": 13125 + }, + { + "epoch": 7.02, + "learning_rate": 6.798987341772152e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 7.02, + "learning_rate": 6.792658227848102e-06, + "loss": 0.035, + "step": 13175 + }, + { + "epoch": 7.02, + "learning_rate": 6.786329113924052e-06, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 7.02, + "learning_rate": 6.780000000000001e-06, + "loss": 0.0283, + "step": 13225 + }, + { + "epoch": 7.02, + "learning_rate": 6.773670886075951e-06, + "loss": 0.0203, + "step": 13250 + }, + { + "epoch": 7.03, + "learning_rate": 6.7673417721519e-06, + "loss": 0.0193, + "step": 13275 + }, + { + "epoch": 7.03, + "learning_rate": 6.7610126582278494e-06, + "loss": 0.0174, + "step": 13300 + }, + { + "epoch": 7.03, + "learning_rate": 6.754683544303798e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 7.03, + "learning_rate": 6.7483544303797474e-06, + "loss": 0.023, + "step": 13350 + }, + { + "epoch": 7.03, + "learning_rate": 6.742025316455697e-06, + "loss": 0.0266, + "step": 13375 + }, + { + "epoch": 7.03, + "learning_rate": 6.735696202531646e-06, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 7.03, + "learning_rate": 6.729367088607596e-06, + "loss": 0.0164, + "step": 13425 + }, + { + "epoch": 7.03, + "learning_rate": 6.723037974683545e-06, + "loss": 0.019, + "step": 13450 + }, + { + "epoch": 7.03, + "learning_rate": 6.716708860759495e-06, + "loss": 0.0209, + "step": 13475 + }, + { + "epoch": 7.03, + "learning_rate": 6.710379746835444e-06, + "loss": 0.0241, + "step": 13500 + }, + { + "epoch": 7.03, + "learning_rate": 6.7040506329113934e-06, + "loss": 0.0233, + "step": 13525 + }, + { + "epoch": 7.03, + "learning_rate": 6.697721518987342e-06, + "loss": 0.0204, + "step": 13550 + }, + { + "epoch": 7.03, + "learning_rate": 6.6913924050632914e-06, + "loss": 0.0131, + "step": 13575 + }, + { + "epoch": 7.03, + "learning_rate": 6.685063291139241e-06, + "loss": 0.0268, + "step": 13600 + }, + { + "epoch": 7.03, + "learning_rate": 6.67873417721519e-06, + "loss": 0.0156, + "step": 13625 + }, + { + "epoch": 7.03, + "learning_rate": 6.67240506329114e-06, + "loss": 0.0205, + "step": 13650 + }, + { + "epoch": 7.04, + "learning_rate": 6.666075949367089e-06, + "loss": 0.0217, + "step": 13675 + }, + { + "epoch": 7.04, + "learning_rate": 6.659746835443039e-06, + "loss": 0.0258, + "step": 13700 + }, + { + "epoch": 7.04, + "learning_rate": 6.653417721518988e-06, + "loss": 0.0211, + "step": 13725 + }, + { + "epoch": 7.04, + "learning_rate": 6.647088607594937e-06, + "loss": 0.0188, + "step": 13750 + }, + { + "epoch": 7.04, + "learning_rate": 6.640759493670886e-06, + "loss": 0.0234, + "step": 13775 + }, + { + "epoch": 7.04, + "learning_rate": 6.6344303797468355e-06, + "loss": 0.0202, + "step": 13800 + }, + { + "epoch": 7.04, + "learning_rate": 6.628101265822785e-06, + "loss": 0.0251, + "step": 13825 + }, + { + "epoch": 7.04, + "learning_rate": 6.621772151898734e-06, + "loss": 0.017, + "step": 13850 + }, + { + "epoch": 7.04, + "learning_rate": 6.615443037974684e-06, + "loss": 0.0254, + "step": 13875 + }, + { + "epoch": 7.04, + "learning_rate": 6.609113924050633e-06, + "loss": 0.0289, + "step": 13900 + }, + { + "epoch": 7.04, + "learning_rate": 6.602784810126583e-06, + "loss": 0.0238, + "step": 13925 + }, + { + "epoch": 7.04, + "learning_rate": 6.596455696202532e-06, + "loss": 0.0218, + "step": 13950 + }, + { + "epoch": 7.04, + "learning_rate": 6.590126582278481e-06, + "loss": 0.0289, + "step": 13975 + }, + { + "epoch": 7.04, + "learning_rate": 6.583797468354432e-06, + "loss": 0.0271, + "step": 14000 + }, + { + "epoch": 7.04, + "eval_loss": 0.26684999465942383, + "eval_runtime": 1448.0228, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "eval_wer": 42.464105107400115, + "step": 14000 + }, + { + "epoch": 8.0, + "learning_rate": 6.577468354430381e-06, + "loss": 0.0292, + "step": 14025 + }, + { + "epoch": 8.0, + "learning_rate": 6.57113924050633e-06, + "loss": 0.0192, + "step": 14050 + }, + { + "epoch": 8.0, + "learning_rate": 6.564810126582279e-06, + "loss": 0.027, + "step": 14075 + }, + { + "epoch": 8.0, + "learning_rate": 6.558481012658229e-06, + "loss": 0.03, + "step": 14100 + }, + { + "epoch": 8.0, + "learning_rate": 6.552151898734178e-06, + "loss": 0.0375, + "step": 14125 + }, + { + "epoch": 8.0, + "learning_rate": 6.5458227848101274e-06, + "loss": 0.0338, + "step": 14150 + }, + { + "epoch": 8.0, + "learning_rate": 6.539493670886077e-06, + "loss": 0.0305, + "step": 14175 + }, + { + "epoch": 8.0, + "learning_rate": 6.533164556962026e-06, + "loss": 0.0266, + "step": 14200 + }, + { + "epoch": 8.01, + "learning_rate": 6.526835443037976e-06, + "loss": 0.0378, + "step": 14225 + }, + { + "epoch": 8.01, + "learning_rate": 6.520506329113925e-06, + "loss": 0.0256, + "step": 14250 + }, + { + "epoch": 8.01, + "learning_rate": 6.514177215189874e-06, + "loss": 0.0203, + "step": 14275 + }, + { + "epoch": 8.01, + "learning_rate": 6.507848101265823e-06, + "loss": 0.0212, + "step": 14300 + }, + { + "epoch": 8.01, + "learning_rate": 6.501518987341773e-06, + "loss": 0.0204, + "step": 14325 + }, + { + "epoch": 8.01, + "learning_rate": 6.495189873417722e-06, + "loss": 0.0259, + "step": 14350 + }, + { + "epoch": 8.01, + "learning_rate": 6.4888607594936714e-06, + "loss": 0.0214, + "step": 14375 + }, + { + "epoch": 8.01, + "learning_rate": 6.482531645569621e-06, + "loss": 0.021, + "step": 14400 + }, + { + "epoch": 8.01, + "learning_rate": 6.47620253164557e-06, + "loss": 0.0241, + "step": 14425 + }, + { + "epoch": 8.01, + "learning_rate": 6.46987341772152e-06, + "loss": 0.0216, + "step": 14450 + }, + { + "epoch": 8.01, + "learning_rate": 6.463544303797469e-06, + "loss": 0.0209, + "step": 14475 + }, + { + "epoch": 8.01, + "learning_rate": 6.457215189873418e-06, + "loss": 0.0292, + "step": 14500 + }, + { + "epoch": 8.01, + "learning_rate": 6.450886075949367e-06, + "loss": 0.0286, + "step": 14525 + }, + { + "epoch": 8.01, + "learning_rate": 6.444556962025317e-06, + "loss": 0.0218, + "step": 14550 + }, + { + "epoch": 8.01, + "learning_rate": 6.438227848101266e-06, + "loss": 0.0209, + "step": 14575 + }, + { + "epoch": 8.01, + "learning_rate": 6.4318987341772154e-06, + "loss": 0.0244, + "step": 14600 + }, + { + "epoch": 8.02, + "learning_rate": 6.425569620253165e-06, + "loss": 0.0212, + "step": 14625 + }, + { + "epoch": 8.02, + "learning_rate": 6.419240506329114e-06, + "loss": 0.0155, + "step": 14650 + }, + { + "epoch": 8.02, + "learning_rate": 6.412911392405064e-06, + "loss": 0.0209, + "step": 14675 + }, + { + "epoch": 8.02, + "learning_rate": 6.406582278481013e-06, + "loss": 0.0203, + "step": 14700 + }, + { + "epoch": 8.02, + "learning_rate": 6.400253164556962e-06, + "loss": 0.0192, + "step": 14725 + }, + { + "epoch": 8.02, + "learning_rate": 6.393924050632911e-06, + "loss": 0.0216, + "step": 14750 + }, + { + "epoch": 8.02, + "learning_rate": 6.387594936708861e-06, + "loss": 0.012, + "step": 14775 + }, + { + "epoch": 8.02, + "learning_rate": 6.38126582278481e-06, + "loss": 0.0241, + "step": 14800 + }, + { + "epoch": 8.02, + "learning_rate": 6.37493670886076e-06, + "loss": 0.0192, + "step": 14825 + }, + { + "epoch": 8.02, + "learning_rate": 6.36860759493671e-06, + "loss": 0.0202, + "step": 14850 + }, + { + "epoch": 8.02, + "learning_rate": 6.362278481012659e-06, + "loss": 0.0257, + "step": 14875 + }, + { + "epoch": 8.02, + "learning_rate": 6.3559493670886086e-06, + "loss": 0.0218, + "step": 14900 + }, + { + "epoch": 8.02, + "learning_rate": 6.349620253164558e-06, + "loss": 0.0232, + "step": 14925 + }, + { + "epoch": 8.02, + "learning_rate": 6.3432911392405074e-06, + "loss": 0.03, + "step": 14950 + }, + { + "epoch": 8.02, + "learning_rate": 6.337215189873418e-06, + "loss": 0.0252, + "step": 14975 + }, + { + "epoch": 8.02, + "learning_rate": 6.3308860759493675e-06, + "loss": 0.0176, + "step": 15000 + }, + { + "epoch": 8.02, + "eval_loss": 0.2596448063850403, + "eval_runtime": 1417.6481, + "eval_samples_per_second": 7.344, + "eval_steps_per_second": 0.459, + "eval_wer": 28.52241499861786, + "step": 15000 + }, + { + "epoch": 8.03, + "learning_rate": 6.324556962025317e-06, + "loss": 0.0166, + "step": 15025 + }, + { + "epoch": 8.03, + "learning_rate": 6.318227848101266e-06, + "loss": 0.017, + "step": 15050 + }, + { + "epoch": 8.03, + "learning_rate": 6.311898734177216e-06, + "loss": 0.0291, + "step": 15075 + }, + { + "epoch": 8.03, + "learning_rate": 6.305569620253164e-06, + "loss": 0.0149, + "step": 15100 + }, + { + "epoch": 8.03, + "learning_rate": 6.299240506329114e-06, + "loss": 0.0223, + "step": 15125 + }, + { + "epoch": 8.03, + "learning_rate": 6.292911392405063e-06, + "loss": 0.0212, + "step": 15150 + }, + { + "epoch": 8.03, + "learning_rate": 6.286582278481013e-06, + "loss": 0.0125, + "step": 15175 + }, + { + "epoch": 8.03, + "learning_rate": 6.280253164556962e-06, + "loss": 0.0169, + "step": 15200 + }, + { + "epoch": 8.03, + "learning_rate": 6.273924050632912e-06, + "loss": 0.0244, + "step": 15225 + }, + { + "epoch": 8.03, + "learning_rate": 6.267594936708862e-06, + "loss": 0.015, + "step": 15250 + }, + { + "epoch": 8.03, + "learning_rate": 6.261265822784811e-06, + "loss": 0.0171, + "step": 15275 + }, + { + "epoch": 8.03, + "learning_rate": 6.254936708860761e-06, + "loss": 0.017, + "step": 15300 + }, + { + "epoch": 8.03, + "learning_rate": 6.24860759493671e-06, + "loss": 0.0162, + "step": 15325 + }, + { + "epoch": 8.03, + "learning_rate": 6.2422784810126594e-06, + "loss": 0.0133, + "step": 15350 + }, + { + "epoch": 8.03, + "learning_rate": 6.235949367088608e-06, + "loss": 0.0139, + "step": 15375 + }, + { + "epoch": 8.03, + "learning_rate": 6.2296202531645575e-06, + "loss": 0.0177, + "step": 15400 + }, + { + "epoch": 8.04, + "learning_rate": 6.223291139240507e-06, + "loss": 0.0207, + "step": 15425 + }, + { + "epoch": 8.04, + "learning_rate": 6.216962025316456e-06, + "loss": 0.0192, + "step": 15450 + }, + { + "epoch": 8.04, + "learning_rate": 6.210632911392406e-06, + "loss": 0.0176, + "step": 15475 + }, + { + "epoch": 8.04, + "learning_rate": 6.204303797468355e-06, + "loss": 0.0133, + "step": 15500 + }, + { + "epoch": 8.04, + "learning_rate": 6.197974683544305e-06, + "loss": 0.0199, + "step": 15525 + }, + { + "epoch": 8.04, + "learning_rate": 6.191645569620254e-06, + "loss": 0.0135, + "step": 15550 + }, + { + "epoch": 8.04, + "learning_rate": 6.1853164556962035e-06, + "loss": 0.0121, + "step": 15575 + }, + { + "epoch": 8.04, + "learning_rate": 6.178987341772152e-06, + "loss": 0.0152, + "step": 15600 + }, + { + "epoch": 8.04, + "learning_rate": 6.1726582278481015e-06, + "loss": 0.0201, + "step": 15625 + }, + { + "epoch": 8.04, + "learning_rate": 6.166329113924051e-06, + "loss": 0.0226, + "step": 15650 + }, + { + "epoch": 8.04, + "learning_rate": 6.16e-06, + "loss": 0.0222, + "step": 15675 + }, + { + "epoch": 8.04, + "learning_rate": 6.15367088607595e-06, + "loss": 0.0184, + "step": 15700 + }, + { + "epoch": 8.04, + "learning_rate": 6.147341772151899e-06, + "loss": 0.0219, + "step": 15725 + }, + { + "epoch": 8.04, + "learning_rate": 6.141012658227849e-06, + "loss": 0.0219, + "step": 15750 + }, + { + "epoch": 9.0, + "learning_rate": 6.134683544303798e-06, + "loss": 0.0208, + "step": 15775 + }, + { + "epoch": 9.0, + "learning_rate": 6.1283544303797475e-06, + "loss": 0.0143, + "step": 15800 + }, + { + "epoch": 9.0, + "learning_rate": 6.122025316455696e-06, + "loss": 0.0254, + "step": 15825 + }, + { + "epoch": 9.0, + "learning_rate": 6.1156962025316455e-06, + "loss": 0.0287, + "step": 15850 + }, + { + "epoch": 9.0, + "learning_rate": 6.109367088607595e-06, + "loss": 0.0252, + "step": 15875 + }, + { + "epoch": 9.0, + "learning_rate": 6.103037974683544e-06, + "loss": 0.0275, + "step": 15900 + }, + { + "epoch": 9.0, + "learning_rate": 6.096708860759494e-06, + "loss": 0.0193, + "step": 15925 + }, + { + "epoch": 9.0, + "learning_rate": 6.090379746835443e-06, + "loss": 0.0255, + "step": 15950 + }, + { + "epoch": 9.01, + "learning_rate": 6.084050632911393e-06, + "loss": 0.0248, + "step": 15975 + }, + { + "epoch": 9.01, + "learning_rate": 6.077721518987342e-06, + "loss": 0.0328, + "step": 16000 + }, + { + "epoch": 9.01, + "eval_loss": 0.2512108087539673, + "eval_runtime": 1474.2883, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.442, + "eval_wer": 35.66236849379665, + "step": 16000 + }, + { + "epoch": 9.01, + "learning_rate": 6.0713924050632915e-06, + "loss": 0.0181, + "step": 16025 + }, + { + "epoch": 9.01, + "learning_rate": 6.065063291139242e-06, + "loss": 0.0134, + "step": 16050 + }, + { + "epoch": 9.01, + "learning_rate": 6.058734177215191e-06, + "loss": 0.0203, + "step": 16075 + }, + { + "epoch": 9.01, + "learning_rate": 6.052405063291141e-06, + "loss": 0.0224, + "step": 16100 + }, + { + "epoch": 9.01, + "learning_rate": 6.046075949367089e-06, + "loss": 0.0164, + "step": 16125 + }, + { + "epoch": 9.01, + "learning_rate": 6.039746835443039e-06, + "loss": 0.019, + "step": 16150 + }, + { + "epoch": 9.01, + "learning_rate": 6.033417721518988e-06, + "loss": 0.0152, + "step": 16175 + }, + { + "epoch": 9.01, + "learning_rate": 6.0270886075949374e-06, + "loss": 0.0197, + "step": 16200 + }, + { + "epoch": 9.01, + "learning_rate": 6.020759493670887e-06, + "loss": 0.0188, + "step": 16225 + }, + { + "epoch": 9.01, + "learning_rate": 6.014430379746836e-06, + "loss": 0.0245, + "step": 16250 + }, + { + "epoch": 9.01, + "learning_rate": 6.008101265822786e-06, + "loss": 0.0202, + "step": 16275 + }, + { + "epoch": 9.01, + "learning_rate": 6.001772151898735e-06, + "loss": 0.0184, + "step": 16300 + }, + { + "epoch": 9.01, + "learning_rate": 5.995443037974685e-06, + "loss": 0.0158, + "step": 16325 + }, + { + "epoch": 9.01, + "learning_rate": 5.989113924050633e-06, + "loss": 0.0175, + "step": 16350 + }, + { + "epoch": 9.02, + "learning_rate": 5.982784810126583e-06, + "loss": 0.014, + "step": 16375 + }, + { + "epoch": 9.02, + "learning_rate": 5.976455696202532e-06, + "loss": 0.0149, + "step": 16400 + }, + { + "epoch": 9.02, + "learning_rate": 5.9701265822784815e-06, + "loss": 0.0188, + "step": 16425 + }, + { + "epoch": 9.02, + "learning_rate": 5.963797468354431e-06, + "loss": 0.0215, + "step": 16450 + }, + { + "epoch": 9.02, + "learning_rate": 5.95746835443038e-06, + "loss": 0.017, + "step": 16475 + }, + { + "epoch": 9.02, + "learning_rate": 5.95113924050633e-06, + "loss": 0.0095, + "step": 16500 + }, + { + "epoch": 9.02, + "learning_rate": 5.944810126582279e-06, + "loss": 0.0112, + "step": 16525 + }, + { + "epoch": 9.02, + "learning_rate": 5.938481012658229e-06, + "loss": 0.0198, + "step": 16550 + }, + { + "epoch": 9.02, + "learning_rate": 5.932151898734177e-06, + "loss": 0.0252, + "step": 16575 + }, + { + "epoch": 9.02, + "learning_rate": 5.925822784810127e-06, + "loss": 0.0226, + "step": 16600 + }, + { + "epoch": 9.02, + "learning_rate": 5.919493670886076e-06, + "loss": 0.0167, + "step": 16625 + }, + { + "epoch": 9.02, + "learning_rate": 5.9131645569620255e-06, + "loss": 0.0259, + "step": 16650 + }, + { + "epoch": 9.02, + "learning_rate": 5.906835443037975e-06, + "loss": 0.0229, + "step": 16675 + }, + { + "epoch": 9.02, + "learning_rate": 5.900506329113924e-06, + "loss": 0.0238, + "step": 16700 + }, + { + "epoch": 9.02, + "learning_rate": 5.894177215189874e-06, + "loss": 0.02, + "step": 16725 + }, + { + "epoch": 9.02, + "learning_rate": 5.887848101265823e-06, + "loss": 0.0182, + "step": 16750 + }, + { + "epoch": 9.03, + "learning_rate": 5.881518987341773e-06, + "loss": 0.0157, + "step": 16775 + }, + { + "epoch": 9.03, + "learning_rate": 5.875189873417721e-06, + "loss": 0.0145, + "step": 16800 + }, + { + "epoch": 9.03, + "learning_rate": 5.868860759493671e-06, + "loss": 0.0166, + "step": 16825 + }, + { + "epoch": 9.03, + "learning_rate": 5.86253164556962e-06, + "loss": 0.0108, + "step": 16850 + }, + { + "epoch": 9.03, + "learning_rate": 5.85620253164557e-06, + "loss": 0.014, + "step": 16875 + }, + { + "epoch": 9.03, + "learning_rate": 5.84987341772152e-06, + "loss": 0.0167, + "step": 16900 + }, + { + "epoch": 9.03, + "learning_rate": 5.843544303797469e-06, + "loss": 0.0121, + "step": 16925 + }, + { + "epoch": 9.03, + "learning_rate": 5.837215189873419e-06, + "loss": 0.0149, + "step": 16950 + }, + { + "epoch": 9.03, + "learning_rate": 5.831139240506329e-06, + "loss": 0.0143, + "step": 16975 + }, + { + "epoch": 9.03, + "learning_rate": 5.824810126582279e-06, + "loss": 0.0139, + "step": 17000 + }, + { + "epoch": 9.03, + "eval_loss": 0.2688957154750824, + "eval_runtime": 1423.1196, + "eval_samples_per_second": 7.316, + "eval_steps_per_second": 0.457, + "eval_wer": 25.920746678807788, + "step": 17000 + }, + { + "epoch": 9.03, + "learning_rate": 5.818481012658228e-06, + "loss": 0.0169, + "step": 17025 + }, + { + "epoch": 9.03, + "learning_rate": 5.8121518987341775e-06, + "loss": 0.013, + "step": 17050 + }, + { + "epoch": 9.03, + "learning_rate": 5.805822784810127e-06, + "loss": 0.0123, + "step": 17075 + }, + { + "epoch": 9.03, + "learning_rate": 5.799493670886076e-06, + "loss": 0.0161, + "step": 17100 + }, + { + "epoch": 9.03, + "learning_rate": 5.793164556962026e-06, + "loss": 0.0114, + "step": 17125 + }, + { + "epoch": 9.03, + "learning_rate": 5.786835443037974e-06, + "loss": 0.0194, + "step": 17150 + }, + { + "epoch": 9.04, + "learning_rate": 5.780506329113924e-06, + "loss": 0.0179, + "step": 17175 + }, + { + "epoch": 9.04, + "learning_rate": 5.774177215189873e-06, + "loss": 0.0165, + "step": 17200 + }, + { + "epoch": 9.04, + "learning_rate": 5.767848101265823e-06, + "loss": 0.0165, + "step": 17225 + }, + { + "epoch": 9.04, + "learning_rate": 5.761518987341773e-06, + "loss": 0.0125, + "step": 17250 + }, + { + "epoch": 9.04, + "learning_rate": 5.755189873417722e-06, + "loss": 0.0172, + "step": 17275 + }, + { + "epoch": 9.04, + "learning_rate": 5.748860759493672e-06, + "loss": 0.0158, + "step": 17300 + }, + { + "epoch": 9.04, + "learning_rate": 5.742531645569621e-06, + "loss": 0.0174, + "step": 17325 + }, + { + "epoch": 9.04, + "learning_rate": 5.736202531645571e-06, + "loss": 0.0162, + "step": 17350 + }, + { + "epoch": 9.04, + "learning_rate": 5.72987341772152e-06, + "loss": 0.0203, + "step": 17375 + }, + { + "epoch": 9.04, + "learning_rate": 5.7235443037974695e-06, + "loss": 0.0157, + "step": 17400 + }, + { + "epoch": 9.04, + "learning_rate": 5.717215189873419e-06, + "loss": 0.018, + "step": 17425 + }, + { + "epoch": 9.04, + "learning_rate": 5.7108860759493675e-06, + "loss": 0.0199, + "step": 17450 + }, + { + "epoch": 9.04, + "learning_rate": 5.704556962025317e-06, + "loss": 0.0153, + "step": 17475 + }, + { + "epoch": 9.04, + "learning_rate": 5.698227848101266e-06, + "loss": 0.0234, + "step": 17500 + }, + { + "epoch": 10.0, + "learning_rate": 5.691898734177216e-06, + "loss": 0.0168, + "step": 17525 + }, + { + "epoch": 10.0, + "learning_rate": 5.685569620253165e-06, + "loss": 0.0214, + "step": 17550 + }, + { + "epoch": 10.0, + "learning_rate": 5.679240506329115e-06, + "loss": 0.0185, + "step": 17575 + }, + { + "epoch": 10.0, + "learning_rate": 5.672911392405064e-06, + "loss": 0.0228, + "step": 17600 + }, + { + "epoch": 10.0, + "learning_rate": 5.6665822784810135e-06, + "loss": 0.024, + "step": 17625 + }, + { + "epoch": 10.0, + "learning_rate": 5.660253164556963e-06, + "loss": 0.0235, + "step": 17650 + }, + { + "epoch": 10.0, + "learning_rate": 5.6539240506329115e-06, + "loss": 0.0262, + "step": 17675 + }, + { + "epoch": 10.0, + "learning_rate": 5.647594936708861e-06, + "loss": 0.0214, + "step": 17700 + }, + { + "epoch": 10.01, + "learning_rate": 5.64126582278481e-06, + "loss": 0.0279, + "step": 17725 + }, + { + "epoch": 10.01, + "learning_rate": 5.63493670886076e-06, + "loss": 0.0248, + "step": 17750 + }, + { + "epoch": 10.01, + "learning_rate": 5.628607594936709e-06, + "loss": 0.0155, + "step": 17775 + }, + { + "epoch": 10.01, + "learning_rate": 5.622278481012659e-06, + "loss": 0.017, + "step": 17800 + }, + { + "epoch": 10.01, + "learning_rate": 5.615949367088608e-06, + "loss": 0.0205, + "step": 17825 + }, + { + "epoch": 10.01, + "learning_rate": 5.6096202531645575e-06, + "loss": 0.021, + "step": 17850 + }, + { + "epoch": 10.01, + "learning_rate": 5.603291139240507e-06, + "loss": 0.0178, + "step": 17875 + }, + { + "epoch": 10.01, + "learning_rate": 5.5969620253164555e-06, + "loss": 0.0169, + "step": 17900 + }, + { + "epoch": 10.01, + "learning_rate": 5.590632911392405e-06, + "loss": 0.0158, + "step": 17925 + }, + { + "epoch": 10.01, + "learning_rate": 5.584303797468354e-06, + "loss": 0.0195, + "step": 17950 + }, + { + "epoch": 10.01, + "learning_rate": 5.577974683544304e-06, + "loss": 0.0195, + "step": 17975 + }, + { + "epoch": 10.01, + "learning_rate": 5.571645569620253e-06, + "loss": 0.0187, + "step": 18000 + }, + { + "epoch": 10.01, + "eval_loss": 0.2605174481868744, + "eval_runtime": 1553.7854, + "eval_samples_per_second": 6.7, + "eval_steps_per_second": 0.419, + "eval_wer": 25.330493178750874, + "step": 18000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 8.3083736752128e+19, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-18000/training_args.bin b/checkpoint-18000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-18000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-19000/config.json b/checkpoint-19000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-19000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-19000/generation_config.json b/checkpoint-19000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-19000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-19000/optimizer.pt b/checkpoint-19000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..12dd0eb405ab90969e66b8cf4d0f30773f39590c --- /dev/null +++ b/checkpoint-19000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4dc991272aa1ac5c2bc8f8c73fb07a3693a84cd4ff070eea4ed0d04b9eee846 +size 1934161093 diff --git a/checkpoint-19000/preprocessor_config.json b/checkpoint-19000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-19000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-19000/pytorch_model.bin b/checkpoint-19000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..d926f9f8c60e2f9b75eac2ef940846545a7ab2b9 --- /dev/null +++ b/checkpoint-19000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffc1e535045ce05c113c5cd514089b1afd18b5b623eab411c9eca3b75570f626 +size 967102601 diff --git a/checkpoint-19000/rng_state.pth b/checkpoint-19000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..caa510b015b7b092c6463281cc2fb913ae300e70 --- /dev/null +++ b/checkpoint-19000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c80cc6c36a848d2b43c21734e3a514d410397fef4aa3732e3549e19c50577035 +size 14575 diff --git a/checkpoint-19000/scaler.pt b/checkpoint-19000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1ea6bb9dbd83dfd09687eaf8251044d08f4f952 --- /dev/null +++ b/checkpoint-19000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38ab751a6405f1e215db01c80ad6c883ef7f4b44e87d57e773f25a8fb7050cd9 +size 557 diff --git a/checkpoint-19000/scheduler.pt b/checkpoint-19000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..540f0f6fa447acd895a1248fbc53256202ff8e86 --- /dev/null +++ b/checkpoint-19000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce947876ec0551f303682057912104df8c65b81a3f6c5efbf490244078831786 +size 627 diff --git a/checkpoint-19000/trainer_state.json b/checkpoint-19000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..61ebcd4ecd3799b3657478e86a0fde4a5d249099 --- /dev/null +++ b/checkpoint-19000/trainer_state.json @@ -0,0 +1,4747 @@ +{ + "best_metric": 24.79877721588969, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-19000", + "epoch": 10.03725, + "global_step": 19000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + }, + { + "epoch": 7.02, + "learning_rate": 6.830632911392405e-06, + "loss": 0.0159, + "step": 13025 + }, + { + "epoch": 7.02, + "learning_rate": 6.824303797468354e-06, + "loss": 0.0186, + "step": 13050 + }, + { + "epoch": 7.02, + "learning_rate": 6.817974683544304e-06, + "loss": 0.0246, + "step": 13075 + }, + { + "epoch": 7.02, + "learning_rate": 6.811645569620253e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 7.02, + "learning_rate": 6.805316455696203e-06, + "loss": 0.0287, + "step": 13125 + }, + { + "epoch": 7.02, + "learning_rate": 6.798987341772152e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 7.02, + "learning_rate": 6.792658227848102e-06, + "loss": 0.035, + "step": 13175 + }, + { + "epoch": 7.02, + "learning_rate": 6.786329113924052e-06, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 7.02, + "learning_rate": 6.780000000000001e-06, + "loss": 0.0283, + "step": 13225 + }, + { + "epoch": 7.02, + "learning_rate": 6.773670886075951e-06, + "loss": 0.0203, + "step": 13250 + }, + { + "epoch": 7.03, + "learning_rate": 6.7673417721519e-06, + "loss": 0.0193, + "step": 13275 + }, + { + "epoch": 7.03, + "learning_rate": 6.7610126582278494e-06, + "loss": 0.0174, + "step": 13300 + }, + { + "epoch": 7.03, + "learning_rate": 6.754683544303798e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 7.03, + "learning_rate": 6.7483544303797474e-06, + "loss": 0.023, + "step": 13350 + }, + { + "epoch": 7.03, + "learning_rate": 6.742025316455697e-06, + "loss": 0.0266, + "step": 13375 + }, + { + "epoch": 7.03, + "learning_rate": 6.735696202531646e-06, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 7.03, + "learning_rate": 6.729367088607596e-06, + "loss": 0.0164, + "step": 13425 + }, + { + "epoch": 7.03, + "learning_rate": 6.723037974683545e-06, + "loss": 0.019, + "step": 13450 + }, + { + "epoch": 7.03, + "learning_rate": 6.716708860759495e-06, + "loss": 0.0209, + "step": 13475 + }, + { + "epoch": 7.03, + "learning_rate": 6.710379746835444e-06, + "loss": 0.0241, + "step": 13500 + }, + { + "epoch": 7.03, + "learning_rate": 6.7040506329113934e-06, + "loss": 0.0233, + "step": 13525 + }, + { + "epoch": 7.03, + "learning_rate": 6.697721518987342e-06, + "loss": 0.0204, + "step": 13550 + }, + { + "epoch": 7.03, + "learning_rate": 6.6913924050632914e-06, + "loss": 0.0131, + "step": 13575 + }, + { + "epoch": 7.03, + "learning_rate": 6.685063291139241e-06, + "loss": 0.0268, + "step": 13600 + }, + { + "epoch": 7.03, + "learning_rate": 6.67873417721519e-06, + "loss": 0.0156, + "step": 13625 + }, + { + "epoch": 7.03, + "learning_rate": 6.67240506329114e-06, + "loss": 0.0205, + "step": 13650 + }, + { + "epoch": 7.04, + "learning_rate": 6.666075949367089e-06, + "loss": 0.0217, + "step": 13675 + }, + { + "epoch": 7.04, + "learning_rate": 6.659746835443039e-06, + "loss": 0.0258, + "step": 13700 + }, + { + "epoch": 7.04, + "learning_rate": 6.653417721518988e-06, + "loss": 0.0211, + "step": 13725 + }, + { + "epoch": 7.04, + "learning_rate": 6.647088607594937e-06, + "loss": 0.0188, + "step": 13750 + }, + { + "epoch": 7.04, + "learning_rate": 6.640759493670886e-06, + "loss": 0.0234, + "step": 13775 + }, + { + "epoch": 7.04, + "learning_rate": 6.6344303797468355e-06, + "loss": 0.0202, + "step": 13800 + }, + { + "epoch": 7.04, + "learning_rate": 6.628101265822785e-06, + "loss": 0.0251, + "step": 13825 + }, + { + "epoch": 7.04, + "learning_rate": 6.621772151898734e-06, + "loss": 0.017, + "step": 13850 + }, + { + "epoch": 7.04, + "learning_rate": 6.615443037974684e-06, + "loss": 0.0254, + "step": 13875 + }, + { + "epoch": 7.04, + "learning_rate": 6.609113924050633e-06, + "loss": 0.0289, + "step": 13900 + }, + { + "epoch": 7.04, + "learning_rate": 6.602784810126583e-06, + "loss": 0.0238, + "step": 13925 + }, + { + "epoch": 7.04, + "learning_rate": 6.596455696202532e-06, + "loss": 0.0218, + "step": 13950 + }, + { + "epoch": 7.04, + "learning_rate": 6.590126582278481e-06, + "loss": 0.0289, + "step": 13975 + }, + { + "epoch": 7.04, + "learning_rate": 6.583797468354432e-06, + "loss": 0.0271, + "step": 14000 + }, + { + "epoch": 7.04, + "eval_loss": 0.26684999465942383, + "eval_runtime": 1448.0228, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "eval_wer": 42.464105107400115, + "step": 14000 + }, + { + "epoch": 8.0, + "learning_rate": 6.577468354430381e-06, + "loss": 0.0292, + "step": 14025 + }, + { + "epoch": 8.0, + "learning_rate": 6.57113924050633e-06, + "loss": 0.0192, + "step": 14050 + }, + { + "epoch": 8.0, + "learning_rate": 6.564810126582279e-06, + "loss": 0.027, + "step": 14075 + }, + { + "epoch": 8.0, + "learning_rate": 6.558481012658229e-06, + "loss": 0.03, + "step": 14100 + }, + { + "epoch": 8.0, + "learning_rate": 6.552151898734178e-06, + "loss": 0.0375, + "step": 14125 + }, + { + "epoch": 8.0, + "learning_rate": 6.5458227848101274e-06, + "loss": 0.0338, + "step": 14150 + }, + { + "epoch": 8.0, + "learning_rate": 6.539493670886077e-06, + "loss": 0.0305, + "step": 14175 + }, + { + "epoch": 8.0, + "learning_rate": 6.533164556962026e-06, + "loss": 0.0266, + "step": 14200 + }, + { + "epoch": 8.01, + "learning_rate": 6.526835443037976e-06, + "loss": 0.0378, + "step": 14225 + }, + { + "epoch": 8.01, + "learning_rate": 6.520506329113925e-06, + "loss": 0.0256, + "step": 14250 + }, + { + "epoch": 8.01, + "learning_rate": 6.514177215189874e-06, + "loss": 0.0203, + "step": 14275 + }, + { + "epoch": 8.01, + "learning_rate": 6.507848101265823e-06, + "loss": 0.0212, + "step": 14300 + }, + { + "epoch": 8.01, + "learning_rate": 6.501518987341773e-06, + "loss": 0.0204, + "step": 14325 + }, + { + "epoch": 8.01, + "learning_rate": 6.495189873417722e-06, + "loss": 0.0259, + "step": 14350 + }, + { + "epoch": 8.01, + "learning_rate": 6.4888607594936714e-06, + "loss": 0.0214, + "step": 14375 + }, + { + "epoch": 8.01, + "learning_rate": 6.482531645569621e-06, + "loss": 0.021, + "step": 14400 + }, + { + "epoch": 8.01, + "learning_rate": 6.47620253164557e-06, + "loss": 0.0241, + "step": 14425 + }, + { + "epoch": 8.01, + "learning_rate": 6.46987341772152e-06, + "loss": 0.0216, + "step": 14450 + }, + { + "epoch": 8.01, + "learning_rate": 6.463544303797469e-06, + "loss": 0.0209, + "step": 14475 + }, + { + "epoch": 8.01, + "learning_rate": 6.457215189873418e-06, + "loss": 0.0292, + "step": 14500 + }, + { + "epoch": 8.01, + "learning_rate": 6.450886075949367e-06, + "loss": 0.0286, + "step": 14525 + }, + { + "epoch": 8.01, + "learning_rate": 6.444556962025317e-06, + "loss": 0.0218, + "step": 14550 + }, + { + "epoch": 8.01, + "learning_rate": 6.438227848101266e-06, + "loss": 0.0209, + "step": 14575 + }, + { + "epoch": 8.01, + "learning_rate": 6.4318987341772154e-06, + "loss": 0.0244, + "step": 14600 + }, + { + "epoch": 8.02, + "learning_rate": 6.425569620253165e-06, + "loss": 0.0212, + "step": 14625 + }, + { + "epoch": 8.02, + "learning_rate": 6.419240506329114e-06, + "loss": 0.0155, + "step": 14650 + }, + { + "epoch": 8.02, + "learning_rate": 6.412911392405064e-06, + "loss": 0.0209, + "step": 14675 + }, + { + "epoch": 8.02, + "learning_rate": 6.406582278481013e-06, + "loss": 0.0203, + "step": 14700 + }, + { + "epoch": 8.02, + "learning_rate": 6.400253164556962e-06, + "loss": 0.0192, + "step": 14725 + }, + { + "epoch": 8.02, + "learning_rate": 6.393924050632911e-06, + "loss": 0.0216, + "step": 14750 + }, + { + "epoch": 8.02, + "learning_rate": 6.387594936708861e-06, + "loss": 0.012, + "step": 14775 + }, + { + "epoch": 8.02, + "learning_rate": 6.38126582278481e-06, + "loss": 0.0241, + "step": 14800 + }, + { + "epoch": 8.02, + "learning_rate": 6.37493670886076e-06, + "loss": 0.0192, + "step": 14825 + }, + { + "epoch": 8.02, + "learning_rate": 6.36860759493671e-06, + "loss": 0.0202, + "step": 14850 + }, + { + "epoch": 8.02, + "learning_rate": 6.362278481012659e-06, + "loss": 0.0257, + "step": 14875 + }, + { + "epoch": 8.02, + "learning_rate": 6.3559493670886086e-06, + "loss": 0.0218, + "step": 14900 + }, + { + "epoch": 8.02, + "learning_rate": 6.349620253164558e-06, + "loss": 0.0232, + "step": 14925 + }, + { + "epoch": 8.02, + "learning_rate": 6.3432911392405074e-06, + "loss": 0.03, + "step": 14950 + }, + { + "epoch": 8.02, + "learning_rate": 6.337215189873418e-06, + "loss": 0.0252, + "step": 14975 + }, + { + "epoch": 8.02, + "learning_rate": 6.3308860759493675e-06, + "loss": 0.0176, + "step": 15000 + }, + { + "epoch": 8.02, + "eval_loss": 0.2596448063850403, + "eval_runtime": 1417.6481, + "eval_samples_per_second": 7.344, + "eval_steps_per_second": 0.459, + "eval_wer": 28.52241499861786, + "step": 15000 + }, + { + "epoch": 8.03, + "learning_rate": 6.324556962025317e-06, + "loss": 0.0166, + "step": 15025 + }, + { + "epoch": 8.03, + "learning_rate": 6.318227848101266e-06, + "loss": 0.017, + "step": 15050 + }, + { + "epoch": 8.03, + "learning_rate": 6.311898734177216e-06, + "loss": 0.0291, + "step": 15075 + }, + { + "epoch": 8.03, + "learning_rate": 6.305569620253164e-06, + "loss": 0.0149, + "step": 15100 + }, + { + "epoch": 8.03, + "learning_rate": 6.299240506329114e-06, + "loss": 0.0223, + "step": 15125 + }, + { + "epoch": 8.03, + "learning_rate": 6.292911392405063e-06, + "loss": 0.0212, + "step": 15150 + }, + { + "epoch": 8.03, + "learning_rate": 6.286582278481013e-06, + "loss": 0.0125, + "step": 15175 + }, + { + "epoch": 8.03, + "learning_rate": 6.280253164556962e-06, + "loss": 0.0169, + "step": 15200 + }, + { + "epoch": 8.03, + "learning_rate": 6.273924050632912e-06, + "loss": 0.0244, + "step": 15225 + }, + { + "epoch": 8.03, + "learning_rate": 6.267594936708862e-06, + "loss": 0.015, + "step": 15250 + }, + { + "epoch": 8.03, + "learning_rate": 6.261265822784811e-06, + "loss": 0.0171, + "step": 15275 + }, + { + "epoch": 8.03, + "learning_rate": 6.254936708860761e-06, + "loss": 0.017, + "step": 15300 + }, + { + "epoch": 8.03, + "learning_rate": 6.24860759493671e-06, + "loss": 0.0162, + "step": 15325 + }, + { + "epoch": 8.03, + "learning_rate": 6.2422784810126594e-06, + "loss": 0.0133, + "step": 15350 + }, + { + "epoch": 8.03, + "learning_rate": 6.235949367088608e-06, + "loss": 0.0139, + "step": 15375 + }, + { + "epoch": 8.03, + "learning_rate": 6.2296202531645575e-06, + "loss": 0.0177, + "step": 15400 + }, + { + "epoch": 8.04, + "learning_rate": 6.223291139240507e-06, + "loss": 0.0207, + "step": 15425 + }, + { + "epoch": 8.04, + "learning_rate": 6.216962025316456e-06, + "loss": 0.0192, + "step": 15450 + }, + { + "epoch": 8.04, + "learning_rate": 6.210632911392406e-06, + "loss": 0.0176, + "step": 15475 + }, + { + "epoch": 8.04, + "learning_rate": 6.204303797468355e-06, + "loss": 0.0133, + "step": 15500 + }, + { + "epoch": 8.04, + "learning_rate": 6.197974683544305e-06, + "loss": 0.0199, + "step": 15525 + }, + { + "epoch": 8.04, + "learning_rate": 6.191645569620254e-06, + "loss": 0.0135, + "step": 15550 + }, + { + "epoch": 8.04, + "learning_rate": 6.1853164556962035e-06, + "loss": 0.0121, + "step": 15575 + }, + { + "epoch": 8.04, + "learning_rate": 6.178987341772152e-06, + "loss": 0.0152, + "step": 15600 + }, + { + "epoch": 8.04, + "learning_rate": 6.1726582278481015e-06, + "loss": 0.0201, + "step": 15625 + }, + { + "epoch": 8.04, + "learning_rate": 6.166329113924051e-06, + "loss": 0.0226, + "step": 15650 + }, + { + "epoch": 8.04, + "learning_rate": 6.16e-06, + "loss": 0.0222, + "step": 15675 + }, + { + "epoch": 8.04, + "learning_rate": 6.15367088607595e-06, + "loss": 0.0184, + "step": 15700 + }, + { + "epoch": 8.04, + "learning_rate": 6.147341772151899e-06, + "loss": 0.0219, + "step": 15725 + }, + { + "epoch": 8.04, + "learning_rate": 6.141012658227849e-06, + "loss": 0.0219, + "step": 15750 + }, + { + "epoch": 9.0, + "learning_rate": 6.134683544303798e-06, + "loss": 0.0208, + "step": 15775 + }, + { + "epoch": 9.0, + "learning_rate": 6.1283544303797475e-06, + "loss": 0.0143, + "step": 15800 + }, + { + "epoch": 9.0, + "learning_rate": 6.122025316455696e-06, + "loss": 0.0254, + "step": 15825 + }, + { + "epoch": 9.0, + "learning_rate": 6.1156962025316455e-06, + "loss": 0.0287, + "step": 15850 + }, + { + "epoch": 9.0, + "learning_rate": 6.109367088607595e-06, + "loss": 0.0252, + "step": 15875 + }, + { + "epoch": 9.0, + "learning_rate": 6.103037974683544e-06, + "loss": 0.0275, + "step": 15900 + }, + { + "epoch": 9.0, + "learning_rate": 6.096708860759494e-06, + "loss": 0.0193, + "step": 15925 + }, + { + "epoch": 9.0, + "learning_rate": 6.090379746835443e-06, + "loss": 0.0255, + "step": 15950 + }, + { + "epoch": 9.01, + "learning_rate": 6.084050632911393e-06, + "loss": 0.0248, + "step": 15975 + }, + { + "epoch": 9.01, + "learning_rate": 6.077721518987342e-06, + "loss": 0.0328, + "step": 16000 + }, + { + "epoch": 9.01, + "eval_loss": 0.2512108087539673, + "eval_runtime": 1474.2883, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.442, + "eval_wer": 35.66236849379665, + "step": 16000 + }, + { + "epoch": 9.01, + "learning_rate": 6.0713924050632915e-06, + "loss": 0.0181, + "step": 16025 + }, + { + "epoch": 9.01, + "learning_rate": 6.065063291139242e-06, + "loss": 0.0134, + "step": 16050 + }, + { + "epoch": 9.01, + "learning_rate": 6.058734177215191e-06, + "loss": 0.0203, + "step": 16075 + }, + { + "epoch": 9.01, + "learning_rate": 6.052405063291141e-06, + "loss": 0.0224, + "step": 16100 + }, + { + "epoch": 9.01, + "learning_rate": 6.046075949367089e-06, + "loss": 0.0164, + "step": 16125 + }, + { + "epoch": 9.01, + "learning_rate": 6.039746835443039e-06, + "loss": 0.019, + "step": 16150 + }, + { + "epoch": 9.01, + "learning_rate": 6.033417721518988e-06, + "loss": 0.0152, + "step": 16175 + }, + { + "epoch": 9.01, + "learning_rate": 6.0270886075949374e-06, + "loss": 0.0197, + "step": 16200 + }, + { + "epoch": 9.01, + "learning_rate": 6.020759493670887e-06, + "loss": 0.0188, + "step": 16225 + }, + { + "epoch": 9.01, + "learning_rate": 6.014430379746836e-06, + "loss": 0.0245, + "step": 16250 + }, + { + "epoch": 9.01, + "learning_rate": 6.008101265822786e-06, + "loss": 0.0202, + "step": 16275 + }, + { + "epoch": 9.01, + "learning_rate": 6.001772151898735e-06, + "loss": 0.0184, + "step": 16300 + }, + { + "epoch": 9.01, + "learning_rate": 5.995443037974685e-06, + "loss": 0.0158, + "step": 16325 + }, + { + "epoch": 9.01, + "learning_rate": 5.989113924050633e-06, + "loss": 0.0175, + "step": 16350 + }, + { + "epoch": 9.02, + "learning_rate": 5.982784810126583e-06, + "loss": 0.014, + "step": 16375 + }, + { + "epoch": 9.02, + "learning_rate": 5.976455696202532e-06, + "loss": 0.0149, + "step": 16400 + }, + { + "epoch": 9.02, + "learning_rate": 5.9701265822784815e-06, + "loss": 0.0188, + "step": 16425 + }, + { + "epoch": 9.02, + "learning_rate": 5.963797468354431e-06, + "loss": 0.0215, + "step": 16450 + }, + { + "epoch": 9.02, + "learning_rate": 5.95746835443038e-06, + "loss": 0.017, + "step": 16475 + }, + { + "epoch": 9.02, + "learning_rate": 5.95113924050633e-06, + "loss": 0.0095, + "step": 16500 + }, + { + "epoch": 9.02, + "learning_rate": 5.944810126582279e-06, + "loss": 0.0112, + "step": 16525 + }, + { + "epoch": 9.02, + "learning_rate": 5.938481012658229e-06, + "loss": 0.0198, + "step": 16550 + }, + { + "epoch": 9.02, + "learning_rate": 5.932151898734177e-06, + "loss": 0.0252, + "step": 16575 + }, + { + "epoch": 9.02, + "learning_rate": 5.925822784810127e-06, + "loss": 0.0226, + "step": 16600 + }, + { + "epoch": 9.02, + "learning_rate": 5.919493670886076e-06, + "loss": 0.0167, + "step": 16625 + }, + { + "epoch": 9.02, + "learning_rate": 5.9131645569620255e-06, + "loss": 0.0259, + "step": 16650 + }, + { + "epoch": 9.02, + "learning_rate": 5.906835443037975e-06, + "loss": 0.0229, + "step": 16675 + }, + { + "epoch": 9.02, + "learning_rate": 5.900506329113924e-06, + "loss": 0.0238, + "step": 16700 + }, + { + "epoch": 9.02, + "learning_rate": 5.894177215189874e-06, + "loss": 0.02, + "step": 16725 + }, + { + "epoch": 9.02, + "learning_rate": 5.887848101265823e-06, + "loss": 0.0182, + "step": 16750 + }, + { + "epoch": 9.03, + "learning_rate": 5.881518987341773e-06, + "loss": 0.0157, + "step": 16775 + }, + { + "epoch": 9.03, + "learning_rate": 5.875189873417721e-06, + "loss": 0.0145, + "step": 16800 + }, + { + "epoch": 9.03, + "learning_rate": 5.868860759493671e-06, + "loss": 0.0166, + "step": 16825 + }, + { + "epoch": 9.03, + "learning_rate": 5.86253164556962e-06, + "loss": 0.0108, + "step": 16850 + }, + { + "epoch": 9.03, + "learning_rate": 5.85620253164557e-06, + "loss": 0.014, + "step": 16875 + }, + { + "epoch": 9.03, + "learning_rate": 5.84987341772152e-06, + "loss": 0.0167, + "step": 16900 + }, + { + "epoch": 9.03, + "learning_rate": 5.843544303797469e-06, + "loss": 0.0121, + "step": 16925 + }, + { + "epoch": 9.03, + "learning_rate": 5.837215189873419e-06, + "loss": 0.0149, + "step": 16950 + }, + { + "epoch": 9.03, + "learning_rate": 5.831139240506329e-06, + "loss": 0.0143, + "step": 16975 + }, + { + "epoch": 9.03, + "learning_rate": 5.824810126582279e-06, + "loss": 0.0139, + "step": 17000 + }, + { + "epoch": 9.03, + "eval_loss": 0.2688957154750824, + "eval_runtime": 1423.1196, + "eval_samples_per_second": 7.316, + "eval_steps_per_second": 0.457, + "eval_wer": 25.920746678807788, + "step": 17000 + }, + { + "epoch": 9.03, + "learning_rate": 5.818481012658228e-06, + "loss": 0.0169, + "step": 17025 + }, + { + "epoch": 9.03, + "learning_rate": 5.8121518987341775e-06, + "loss": 0.013, + "step": 17050 + }, + { + "epoch": 9.03, + "learning_rate": 5.805822784810127e-06, + "loss": 0.0123, + "step": 17075 + }, + { + "epoch": 9.03, + "learning_rate": 5.799493670886076e-06, + "loss": 0.0161, + "step": 17100 + }, + { + "epoch": 9.03, + "learning_rate": 5.793164556962026e-06, + "loss": 0.0114, + "step": 17125 + }, + { + "epoch": 9.03, + "learning_rate": 5.786835443037974e-06, + "loss": 0.0194, + "step": 17150 + }, + { + "epoch": 9.04, + "learning_rate": 5.780506329113924e-06, + "loss": 0.0179, + "step": 17175 + }, + { + "epoch": 9.04, + "learning_rate": 5.774177215189873e-06, + "loss": 0.0165, + "step": 17200 + }, + { + "epoch": 9.04, + "learning_rate": 5.767848101265823e-06, + "loss": 0.0165, + "step": 17225 + }, + { + "epoch": 9.04, + "learning_rate": 5.761518987341773e-06, + "loss": 0.0125, + "step": 17250 + }, + { + "epoch": 9.04, + "learning_rate": 5.755189873417722e-06, + "loss": 0.0172, + "step": 17275 + }, + { + "epoch": 9.04, + "learning_rate": 5.748860759493672e-06, + "loss": 0.0158, + "step": 17300 + }, + { + "epoch": 9.04, + "learning_rate": 5.742531645569621e-06, + "loss": 0.0174, + "step": 17325 + }, + { + "epoch": 9.04, + "learning_rate": 5.736202531645571e-06, + "loss": 0.0162, + "step": 17350 + }, + { + "epoch": 9.04, + "learning_rate": 5.72987341772152e-06, + "loss": 0.0203, + "step": 17375 + }, + { + "epoch": 9.04, + "learning_rate": 5.7235443037974695e-06, + "loss": 0.0157, + "step": 17400 + }, + { + "epoch": 9.04, + "learning_rate": 5.717215189873419e-06, + "loss": 0.018, + "step": 17425 + }, + { + "epoch": 9.04, + "learning_rate": 5.7108860759493675e-06, + "loss": 0.0199, + "step": 17450 + }, + { + "epoch": 9.04, + "learning_rate": 5.704556962025317e-06, + "loss": 0.0153, + "step": 17475 + }, + { + "epoch": 9.04, + "learning_rate": 5.698227848101266e-06, + "loss": 0.0234, + "step": 17500 + }, + { + "epoch": 10.0, + "learning_rate": 5.691898734177216e-06, + "loss": 0.0168, + "step": 17525 + }, + { + "epoch": 10.0, + "learning_rate": 5.685569620253165e-06, + "loss": 0.0214, + "step": 17550 + }, + { + "epoch": 10.0, + "learning_rate": 5.679240506329115e-06, + "loss": 0.0185, + "step": 17575 + }, + { + "epoch": 10.0, + "learning_rate": 5.672911392405064e-06, + "loss": 0.0228, + "step": 17600 + }, + { + "epoch": 10.0, + "learning_rate": 5.6665822784810135e-06, + "loss": 0.024, + "step": 17625 + }, + { + "epoch": 10.0, + "learning_rate": 5.660253164556963e-06, + "loss": 0.0235, + "step": 17650 + }, + { + "epoch": 10.0, + "learning_rate": 5.6539240506329115e-06, + "loss": 0.0262, + "step": 17675 + }, + { + "epoch": 10.0, + "learning_rate": 5.647594936708861e-06, + "loss": 0.0214, + "step": 17700 + }, + { + "epoch": 10.01, + "learning_rate": 5.64126582278481e-06, + "loss": 0.0279, + "step": 17725 + }, + { + "epoch": 10.01, + "learning_rate": 5.63493670886076e-06, + "loss": 0.0248, + "step": 17750 + }, + { + "epoch": 10.01, + "learning_rate": 5.628607594936709e-06, + "loss": 0.0155, + "step": 17775 + }, + { + "epoch": 10.01, + "learning_rate": 5.622278481012659e-06, + "loss": 0.017, + "step": 17800 + }, + { + "epoch": 10.01, + "learning_rate": 5.615949367088608e-06, + "loss": 0.0205, + "step": 17825 + }, + { + "epoch": 10.01, + "learning_rate": 5.6096202531645575e-06, + "loss": 0.021, + "step": 17850 + }, + { + "epoch": 10.01, + "learning_rate": 5.603291139240507e-06, + "loss": 0.0178, + "step": 17875 + }, + { + "epoch": 10.01, + "learning_rate": 5.5969620253164555e-06, + "loss": 0.0169, + "step": 17900 + }, + { + "epoch": 10.01, + "learning_rate": 5.590632911392405e-06, + "loss": 0.0158, + "step": 17925 + }, + { + "epoch": 10.01, + "learning_rate": 5.584303797468354e-06, + "loss": 0.0195, + "step": 17950 + }, + { + "epoch": 10.01, + "learning_rate": 5.577974683544304e-06, + "loss": 0.0195, + "step": 17975 + }, + { + "epoch": 10.01, + "learning_rate": 5.571645569620253e-06, + "loss": 0.0187, + "step": 18000 + }, + { + "epoch": 10.01, + "eval_loss": 0.2605174481868744, + "eval_runtime": 1553.7854, + "eval_samples_per_second": 6.7, + "eval_steps_per_second": 0.419, + "eval_wer": 25.330493178750874, + "step": 18000 + }, + { + "epoch": 10.01, + "learning_rate": 5.565316455696203e-06, + "loss": 0.0214, + "step": 18025 + }, + { + "epoch": 10.01, + "learning_rate": 5.558987341772152e-06, + "loss": 0.0193, + "step": 18050 + }, + { + "epoch": 10.01, + "learning_rate": 5.552658227848102e-06, + "loss": 0.0168, + "step": 18075 + }, + { + "epoch": 10.01, + "learning_rate": 5.546329113924052e-06, + "loss": 0.0147, + "step": 18100 + }, + { + "epoch": 10.02, + "learning_rate": 5.540000000000001e-06, + "loss": 0.0192, + "step": 18125 + }, + { + "epoch": 10.02, + "learning_rate": 5.533670886075951e-06, + "loss": 0.014, + "step": 18150 + }, + { + "epoch": 10.02, + "learning_rate": 5.527341772151899e-06, + "loss": 0.0163, + "step": 18175 + }, + { + "epoch": 10.02, + "learning_rate": 5.521012658227849e-06, + "loss": 0.0139, + "step": 18200 + }, + { + "epoch": 10.02, + "learning_rate": 5.514683544303798e-06, + "loss": 0.0168, + "step": 18225 + }, + { + "epoch": 10.02, + "learning_rate": 5.5083544303797475e-06, + "loss": 0.0118, + "step": 18250 + }, + { + "epoch": 10.02, + "learning_rate": 5.502025316455697e-06, + "loss": 0.0119, + "step": 18275 + }, + { + "epoch": 10.02, + "learning_rate": 5.495696202531646e-06, + "loss": 0.0136, + "step": 18300 + }, + { + "epoch": 10.02, + "learning_rate": 5.489367088607596e-06, + "loss": 0.0132, + "step": 18325 + }, + { + "epoch": 10.02, + "learning_rate": 5.483037974683545e-06, + "loss": 0.0148, + "step": 18350 + }, + { + "epoch": 10.02, + "learning_rate": 5.476708860759495e-06, + "loss": 0.015, + "step": 18375 + }, + { + "epoch": 10.02, + "learning_rate": 5.470379746835443e-06, + "loss": 0.0154, + "step": 18400 + }, + { + "epoch": 10.02, + "learning_rate": 5.464050632911393e-06, + "loss": 0.0139, + "step": 18425 + }, + { + "epoch": 10.02, + "learning_rate": 5.457721518987342e-06, + "loss": 0.0239, + "step": 18450 + }, + { + "epoch": 10.02, + "learning_rate": 5.4513924050632915e-06, + "loss": 0.0203, + "step": 18475 + }, + { + "epoch": 10.02, + "learning_rate": 5.445063291139241e-06, + "loss": 0.0199, + "step": 18500 + }, + { + "epoch": 10.03, + "learning_rate": 5.43873417721519e-06, + "loss": 0.013, + "step": 18525 + }, + { + "epoch": 10.03, + "learning_rate": 5.43240506329114e-06, + "loss": 0.0128, + "step": 18550 + }, + { + "epoch": 10.03, + "learning_rate": 5.426075949367089e-06, + "loss": 0.0161, + "step": 18575 + }, + { + "epoch": 10.03, + "learning_rate": 5.419746835443039e-06, + "loss": 0.0129, + "step": 18600 + }, + { + "epoch": 10.03, + "learning_rate": 5.413417721518987e-06, + "loss": 0.0124, + "step": 18625 + }, + { + "epoch": 10.03, + "learning_rate": 5.407088607594937e-06, + "loss": 0.0161, + "step": 18650 + }, + { + "epoch": 10.03, + "learning_rate": 5.400759493670886e-06, + "loss": 0.0104, + "step": 18675 + }, + { + "epoch": 10.03, + "learning_rate": 5.3944303797468355e-06, + "loss": 0.0109, + "step": 18700 + }, + { + "epoch": 10.03, + "learning_rate": 5.388101265822785e-06, + "loss": 0.0128, + "step": 18725 + }, + { + "epoch": 10.03, + "learning_rate": 5.381772151898734e-06, + "loss": 0.014, + "step": 18750 + }, + { + "epoch": 10.03, + "learning_rate": 5.375443037974684e-06, + "loss": 0.0159, + "step": 18775 + }, + { + "epoch": 10.03, + "learning_rate": 5.369113924050633e-06, + "loss": 0.017, + "step": 18800 + }, + { + "epoch": 10.03, + "learning_rate": 5.362784810126583e-06, + "loss": 0.012, + "step": 18825 + }, + { + "epoch": 10.03, + "learning_rate": 5.356455696202531e-06, + "loss": 0.0165, + "step": 18850 + }, + { + "epoch": 10.03, + "learning_rate": 5.350126582278481e-06, + "loss": 0.0153, + "step": 18875 + }, + { + "epoch": 10.03, + "learning_rate": 5.343797468354432e-06, + "loss": 0.0182, + "step": 18900 + }, + { + "epoch": 10.04, + "learning_rate": 5.33746835443038e-06, + "loss": 0.0194, + "step": 18925 + }, + { + "epoch": 10.04, + "learning_rate": 5.33113924050633e-06, + "loss": 0.0144, + "step": 18950 + }, + { + "epoch": 10.04, + "learning_rate": 5.324810126582279e-06, + "loss": 0.0157, + "step": 18975 + }, + { + "epoch": 10.04, + "learning_rate": 5.318481012658229e-06, + "loss": 0.0111, + "step": 19000 + }, + { + "epoch": 10.04, + "eval_loss": 0.2696678638458252, + "eval_runtime": 1463.5986, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.445, + "eval_wer": 24.79877721588969, + "step": 19000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 8.7701103157248e+19, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-19000/training_args.bin b/checkpoint-19000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-19000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-2000/config.json b/checkpoint-2000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-2000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-2000/generation_config.json b/checkpoint-2000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-2000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-2000/optimizer.pt b/checkpoint-2000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1198d56bab7bb369d04ee5905bef97f77246fce0 --- /dev/null +++ b/checkpoint-2000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:096520a4f7e8f9ecb27acf05006b441e5320d12ad8ffb1d3bc68a806bd908f76 +size 1934161093 diff --git a/checkpoint-2000/preprocessor_config.json b/checkpoint-2000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-2000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-2000/pytorch_model.bin b/checkpoint-2000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..d6c869990fe75261337bcd6d727c34eebec4fc1f --- /dev/null +++ b/checkpoint-2000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3edc053e5fb2156c541fe0eef54073faaeddae141211847a671e5a022ed1018e +size 967102601 diff --git a/checkpoint-2000/rng_state.pth b/checkpoint-2000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c434760a2e9c86568bf553bc36f2bbd091a6d89b --- /dev/null +++ b/checkpoint-2000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:387254903a558d4e353afabac93253b53632e2fee5d4a06b758057ab10fda0cd +size 14639 diff --git a/checkpoint-2000/scaler.pt b/checkpoint-2000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..137505533d42127a732686a501b882bd84b6a629 --- /dev/null +++ b/checkpoint-2000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e05db01431ef28f8b04837b0db08512024c3c3c31931c75aa96bdaa971a6337 +size 557 diff --git a/checkpoint-2000/scheduler.pt b/checkpoint-2000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a8eab5056964af37e511965d708471681be77c1e --- /dev/null +++ b/checkpoint-2000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2bec2e4d186bd268eab2683aff96e40369e5ebfe92efe6675ba2bd4536d98f4 +size 627 diff --git a/checkpoint-2000/trainer_state.json b/checkpoint-2000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b9c94f379a7b3c557db7a350406eb17df69e2ce0 --- /dev/null +++ b/checkpoint-2000/trainer_state.json @@ -0,0 +1,514 @@ +{ + "best_metric": 35.12252231743606, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-2000", + "epoch": 1.006225, + "global_step": 2000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 9.2318469562368e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2000/training_args.bin b/checkpoint-2000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-2000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-20000/config.json b/checkpoint-20000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-20000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-20000/generation_config.json b/checkpoint-20000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-20000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-20000/optimizer.pt b/checkpoint-20000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..84dd19fb51ecb5006faf0c2cf7c8b04038fba03d --- /dev/null +++ b/checkpoint-20000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0e3bf17a0d3f55b423a3c7307666d10ed91790d4919f548bcd3f9e52cb27076 +size 1934161093 diff --git a/checkpoint-20000/preprocessor_config.json b/checkpoint-20000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-20000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-20000/pytorch_model.bin b/checkpoint-20000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..c59110c764eecc5305b0e16ba72c5464a16cbb8c --- /dev/null +++ b/checkpoint-20000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2ab1a2a68392bffcc5d2d3116a000063762f6be218ff4fe70522cc1b014e723 +size 967102601 diff --git a/checkpoint-20000/rng_state.pth b/checkpoint-20000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5ac84f8273bbf4a6de4ad61167e9cc56d13dbfaa --- /dev/null +++ b/checkpoint-20000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:122ff7e3179a2d7d4f013f760bf8ff272fc91f3f3375d2a2da5464cf40104a30 +size 14575 diff --git a/checkpoint-20000/scaler.pt b/checkpoint-20000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e4b9e8d34e288a1278bfcddc66f3d3be2a54af5 --- /dev/null +++ b/checkpoint-20000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be1582ddfefb8b3d27ac480793a5e3fb0f74e938fb530e19f8a18c710cb9f52a +size 557 diff --git a/checkpoint-20000/scheduler.pt b/checkpoint-20000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d179339744cc7b7a839f4567175f9430e57b10c5 --- /dev/null +++ b/checkpoint-20000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9280da8c030164f7b27467bc994068bea82a2598481df1db98f9864a096bd799 +size 627 diff --git a/checkpoint-20000/trainer_state.json b/checkpoint-20000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1e80d61e1b9094898e266d70cf5ec32916f536b1 --- /dev/null +++ b/checkpoint-20000/trainer_state.json @@ -0,0 +1,4996 @@ +{ + "best_metric": 24.392266540919362, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-20000", + "epoch": 11.018475, + "global_step": 20000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + }, + { + "epoch": 7.02, + "learning_rate": 6.830632911392405e-06, + "loss": 0.0159, + "step": 13025 + }, + { + "epoch": 7.02, + "learning_rate": 6.824303797468354e-06, + "loss": 0.0186, + "step": 13050 + }, + { + "epoch": 7.02, + "learning_rate": 6.817974683544304e-06, + "loss": 0.0246, + "step": 13075 + }, + { + "epoch": 7.02, + "learning_rate": 6.811645569620253e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 7.02, + "learning_rate": 6.805316455696203e-06, + "loss": 0.0287, + "step": 13125 + }, + { + "epoch": 7.02, + "learning_rate": 6.798987341772152e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 7.02, + "learning_rate": 6.792658227848102e-06, + "loss": 0.035, + "step": 13175 + }, + { + "epoch": 7.02, + "learning_rate": 6.786329113924052e-06, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 7.02, + "learning_rate": 6.780000000000001e-06, + "loss": 0.0283, + "step": 13225 + }, + { + "epoch": 7.02, + "learning_rate": 6.773670886075951e-06, + "loss": 0.0203, + "step": 13250 + }, + { + "epoch": 7.03, + "learning_rate": 6.7673417721519e-06, + "loss": 0.0193, + "step": 13275 + }, + { + "epoch": 7.03, + "learning_rate": 6.7610126582278494e-06, + "loss": 0.0174, + "step": 13300 + }, + { + "epoch": 7.03, + "learning_rate": 6.754683544303798e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 7.03, + "learning_rate": 6.7483544303797474e-06, + "loss": 0.023, + "step": 13350 + }, + { + "epoch": 7.03, + "learning_rate": 6.742025316455697e-06, + "loss": 0.0266, + "step": 13375 + }, + { + "epoch": 7.03, + "learning_rate": 6.735696202531646e-06, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 7.03, + "learning_rate": 6.729367088607596e-06, + "loss": 0.0164, + "step": 13425 + }, + { + "epoch": 7.03, + "learning_rate": 6.723037974683545e-06, + "loss": 0.019, + "step": 13450 + }, + { + "epoch": 7.03, + "learning_rate": 6.716708860759495e-06, + "loss": 0.0209, + "step": 13475 + }, + { + "epoch": 7.03, + "learning_rate": 6.710379746835444e-06, + "loss": 0.0241, + "step": 13500 + }, + { + "epoch": 7.03, + "learning_rate": 6.7040506329113934e-06, + "loss": 0.0233, + "step": 13525 + }, + { + "epoch": 7.03, + "learning_rate": 6.697721518987342e-06, + "loss": 0.0204, + "step": 13550 + }, + { + "epoch": 7.03, + "learning_rate": 6.6913924050632914e-06, + "loss": 0.0131, + "step": 13575 + }, + { + "epoch": 7.03, + "learning_rate": 6.685063291139241e-06, + "loss": 0.0268, + "step": 13600 + }, + { + "epoch": 7.03, + "learning_rate": 6.67873417721519e-06, + "loss": 0.0156, + "step": 13625 + }, + { + "epoch": 7.03, + "learning_rate": 6.67240506329114e-06, + "loss": 0.0205, + "step": 13650 + }, + { + "epoch": 7.04, + "learning_rate": 6.666075949367089e-06, + "loss": 0.0217, + "step": 13675 + }, + { + "epoch": 7.04, + "learning_rate": 6.659746835443039e-06, + "loss": 0.0258, + "step": 13700 + }, + { + "epoch": 7.04, + "learning_rate": 6.653417721518988e-06, + "loss": 0.0211, + "step": 13725 + }, + { + "epoch": 7.04, + "learning_rate": 6.647088607594937e-06, + "loss": 0.0188, + "step": 13750 + }, + { + "epoch": 7.04, + "learning_rate": 6.640759493670886e-06, + "loss": 0.0234, + "step": 13775 + }, + { + "epoch": 7.04, + "learning_rate": 6.6344303797468355e-06, + "loss": 0.0202, + "step": 13800 + }, + { + "epoch": 7.04, + "learning_rate": 6.628101265822785e-06, + "loss": 0.0251, + "step": 13825 + }, + { + "epoch": 7.04, + "learning_rate": 6.621772151898734e-06, + "loss": 0.017, + "step": 13850 + }, + { + "epoch": 7.04, + "learning_rate": 6.615443037974684e-06, + "loss": 0.0254, + "step": 13875 + }, + { + "epoch": 7.04, + "learning_rate": 6.609113924050633e-06, + "loss": 0.0289, + "step": 13900 + }, + { + "epoch": 7.04, + "learning_rate": 6.602784810126583e-06, + "loss": 0.0238, + "step": 13925 + }, + { + "epoch": 7.04, + "learning_rate": 6.596455696202532e-06, + "loss": 0.0218, + "step": 13950 + }, + { + "epoch": 7.04, + "learning_rate": 6.590126582278481e-06, + "loss": 0.0289, + "step": 13975 + }, + { + "epoch": 7.04, + "learning_rate": 6.583797468354432e-06, + "loss": 0.0271, + "step": 14000 + }, + { + "epoch": 7.04, + "eval_loss": 0.26684999465942383, + "eval_runtime": 1448.0228, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "eval_wer": 42.464105107400115, + "step": 14000 + }, + { + "epoch": 8.0, + "learning_rate": 6.577468354430381e-06, + "loss": 0.0292, + "step": 14025 + }, + { + "epoch": 8.0, + "learning_rate": 6.57113924050633e-06, + "loss": 0.0192, + "step": 14050 + }, + { + "epoch": 8.0, + "learning_rate": 6.564810126582279e-06, + "loss": 0.027, + "step": 14075 + }, + { + "epoch": 8.0, + "learning_rate": 6.558481012658229e-06, + "loss": 0.03, + "step": 14100 + }, + { + "epoch": 8.0, + "learning_rate": 6.552151898734178e-06, + "loss": 0.0375, + "step": 14125 + }, + { + "epoch": 8.0, + "learning_rate": 6.5458227848101274e-06, + "loss": 0.0338, + "step": 14150 + }, + { + "epoch": 8.0, + "learning_rate": 6.539493670886077e-06, + "loss": 0.0305, + "step": 14175 + }, + { + "epoch": 8.0, + "learning_rate": 6.533164556962026e-06, + "loss": 0.0266, + "step": 14200 + }, + { + "epoch": 8.01, + "learning_rate": 6.526835443037976e-06, + "loss": 0.0378, + "step": 14225 + }, + { + "epoch": 8.01, + "learning_rate": 6.520506329113925e-06, + "loss": 0.0256, + "step": 14250 + }, + { + "epoch": 8.01, + "learning_rate": 6.514177215189874e-06, + "loss": 0.0203, + "step": 14275 + }, + { + "epoch": 8.01, + "learning_rate": 6.507848101265823e-06, + "loss": 0.0212, + "step": 14300 + }, + { + "epoch": 8.01, + "learning_rate": 6.501518987341773e-06, + "loss": 0.0204, + "step": 14325 + }, + { + "epoch": 8.01, + "learning_rate": 6.495189873417722e-06, + "loss": 0.0259, + "step": 14350 + }, + { + "epoch": 8.01, + "learning_rate": 6.4888607594936714e-06, + "loss": 0.0214, + "step": 14375 + }, + { + "epoch": 8.01, + "learning_rate": 6.482531645569621e-06, + "loss": 0.021, + "step": 14400 + }, + { + "epoch": 8.01, + "learning_rate": 6.47620253164557e-06, + "loss": 0.0241, + "step": 14425 + }, + { + "epoch": 8.01, + "learning_rate": 6.46987341772152e-06, + "loss": 0.0216, + "step": 14450 + }, + { + "epoch": 8.01, + "learning_rate": 6.463544303797469e-06, + "loss": 0.0209, + "step": 14475 + }, + { + "epoch": 8.01, + "learning_rate": 6.457215189873418e-06, + "loss": 0.0292, + "step": 14500 + }, + { + "epoch": 8.01, + "learning_rate": 6.450886075949367e-06, + "loss": 0.0286, + "step": 14525 + }, + { + "epoch": 8.01, + "learning_rate": 6.444556962025317e-06, + "loss": 0.0218, + "step": 14550 + }, + { + "epoch": 8.01, + "learning_rate": 6.438227848101266e-06, + "loss": 0.0209, + "step": 14575 + }, + { + "epoch": 8.01, + "learning_rate": 6.4318987341772154e-06, + "loss": 0.0244, + "step": 14600 + }, + { + "epoch": 8.02, + "learning_rate": 6.425569620253165e-06, + "loss": 0.0212, + "step": 14625 + }, + { + "epoch": 8.02, + "learning_rate": 6.419240506329114e-06, + "loss": 0.0155, + "step": 14650 + }, + { + "epoch": 8.02, + "learning_rate": 6.412911392405064e-06, + "loss": 0.0209, + "step": 14675 + }, + { + "epoch": 8.02, + "learning_rate": 6.406582278481013e-06, + "loss": 0.0203, + "step": 14700 + }, + { + "epoch": 8.02, + "learning_rate": 6.400253164556962e-06, + "loss": 0.0192, + "step": 14725 + }, + { + "epoch": 8.02, + "learning_rate": 6.393924050632911e-06, + "loss": 0.0216, + "step": 14750 + }, + { + "epoch": 8.02, + "learning_rate": 6.387594936708861e-06, + "loss": 0.012, + "step": 14775 + }, + { + "epoch": 8.02, + "learning_rate": 6.38126582278481e-06, + "loss": 0.0241, + "step": 14800 + }, + { + "epoch": 8.02, + "learning_rate": 6.37493670886076e-06, + "loss": 0.0192, + "step": 14825 + }, + { + "epoch": 8.02, + "learning_rate": 6.36860759493671e-06, + "loss": 0.0202, + "step": 14850 + }, + { + "epoch": 8.02, + "learning_rate": 6.362278481012659e-06, + "loss": 0.0257, + "step": 14875 + }, + { + "epoch": 8.02, + "learning_rate": 6.3559493670886086e-06, + "loss": 0.0218, + "step": 14900 + }, + { + "epoch": 8.02, + "learning_rate": 6.349620253164558e-06, + "loss": 0.0232, + "step": 14925 + }, + { + "epoch": 8.02, + "learning_rate": 6.3432911392405074e-06, + "loss": 0.03, + "step": 14950 + }, + { + "epoch": 8.02, + "learning_rate": 6.337215189873418e-06, + "loss": 0.0252, + "step": 14975 + }, + { + "epoch": 8.02, + "learning_rate": 6.3308860759493675e-06, + "loss": 0.0176, + "step": 15000 + }, + { + "epoch": 8.02, + "eval_loss": 0.2596448063850403, + "eval_runtime": 1417.6481, + "eval_samples_per_second": 7.344, + "eval_steps_per_second": 0.459, + "eval_wer": 28.52241499861786, + "step": 15000 + }, + { + "epoch": 8.03, + "learning_rate": 6.324556962025317e-06, + "loss": 0.0166, + "step": 15025 + }, + { + "epoch": 8.03, + "learning_rate": 6.318227848101266e-06, + "loss": 0.017, + "step": 15050 + }, + { + "epoch": 8.03, + "learning_rate": 6.311898734177216e-06, + "loss": 0.0291, + "step": 15075 + }, + { + "epoch": 8.03, + "learning_rate": 6.305569620253164e-06, + "loss": 0.0149, + "step": 15100 + }, + { + "epoch": 8.03, + "learning_rate": 6.299240506329114e-06, + "loss": 0.0223, + "step": 15125 + }, + { + "epoch": 8.03, + "learning_rate": 6.292911392405063e-06, + "loss": 0.0212, + "step": 15150 + }, + { + "epoch": 8.03, + "learning_rate": 6.286582278481013e-06, + "loss": 0.0125, + "step": 15175 + }, + { + "epoch": 8.03, + "learning_rate": 6.280253164556962e-06, + "loss": 0.0169, + "step": 15200 + }, + { + "epoch": 8.03, + "learning_rate": 6.273924050632912e-06, + "loss": 0.0244, + "step": 15225 + }, + { + "epoch": 8.03, + "learning_rate": 6.267594936708862e-06, + "loss": 0.015, + "step": 15250 + }, + { + "epoch": 8.03, + "learning_rate": 6.261265822784811e-06, + "loss": 0.0171, + "step": 15275 + }, + { + "epoch": 8.03, + "learning_rate": 6.254936708860761e-06, + "loss": 0.017, + "step": 15300 + }, + { + "epoch": 8.03, + "learning_rate": 6.24860759493671e-06, + "loss": 0.0162, + "step": 15325 + }, + { + "epoch": 8.03, + "learning_rate": 6.2422784810126594e-06, + "loss": 0.0133, + "step": 15350 + }, + { + "epoch": 8.03, + "learning_rate": 6.235949367088608e-06, + "loss": 0.0139, + "step": 15375 + }, + { + "epoch": 8.03, + "learning_rate": 6.2296202531645575e-06, + "loss": 0.0177, + "step": 15400 + }, + { + "epoch": 8.04, + "learning_rate": 6.223291139240507e-06, + "loss": 0.0207, + "step": 15425 + }, + { + "epoch": 8.04, + "learning_rate": 6.216962025316456e-06, + "loss": 0.0192, + "step": 15450 + }, + { + "epoch": 8.04, + "learning_rate": 6.210632911392406e-06, + "loss": 0.0176, + "step": 15475 + }, + { + "epoch": 8.04, + "learning_rate": 6.204303797468355e-06, + "loss": 0.0133, + "step": 15500 + }, + { + "epoch": 8.04, + "learning_rate": 6.197974683544305e-06, + "loss": 0.0199, + "step": 15525 + }, + { + "epoch": 8.04, + "learning_rate": 6.191645569620254e-06, + "loss": 0.0135, + "step": 15550 + }, + { + "epoch": 8.04, + "learning_rate": 6.1853164556962035e-06, + "loss": 0.0121, + "step": 15575 + }, + { + "epoch": 8.04, + "learning_rate": 6.178987341772152e-06, + "loss": 0.0152, + "step": 15600 + }, + { + "epoch": 8.04, + "learning_rate": 6.1726582278481015e-06, + "loss": 0.0201, + "step": 15625 + }, + { + "epoch": 8.04, + "learning_rate": 6.166329113924051e-06, + "loss": 0.0226, + "step": 15650 + }, + { + "epoch": 8.04, + "learning_rate": 6.16e-06, + "loss": 0.0222, + "step": 15675 + }, + { + "epoch": 8.04, + "learning_rate": 6.15367088607595e-06, + "loss": 0.0184, + "step": 15700 + }, + { + "epoch": 8.04, + "learning_rate": 6.147341772151899e-06, + "loss": 0.0219, + "step": 15725 + }, + { + "epoch": 8.04, + "learning_rate": 6.141012658227849e-06, + "loss": 0.0219, + "step": 15750 + }, + { + "epoch": 9.0, + "learning_rate": 6.134683544303798e-06, + "loss": 0.0208, + "step": 15775 + }, + { + "epoch": 9.0, + "learning_rate": 6.1283544303797475e-06, + "loss": 0.0143, + "step": 15800 + }, + { + "epoch": 9.0, + "learning_rate": 6.122025316455696e-06, + "loss": 0.0254, + "step": 15825 + }, + { + "epoch": 9.0, + "learning_rate": 6.1156962025316455e-06, + "loss": 0.0287, + "step": 15850 + }, + { + "epoch": 9.0, + "learning_rate": 6.109367088607595e-06, + "loss": 0.0252, + "step": 15875 + }, + { + "epoch": 9.0, + "learning_rate": 6.103037974683544e-06, + "loss": 0.0275, + "step": 15900 + }, + { + "epoch": 9.0, + "learning_rate": 6.096708860759494e-06, + "loss": 0.0193, + "step": 15925 + }, + { + "epoch": 9.0, + "learning_rate": 6.090379746835443e-06, + "loss": 0.0255, + "step": 15950 + }, + { + "epoch": 9.01, + "learning_rate": 6.084050632911393e-06, + "loss": 0.0248, + "step": 15975 + }, + { + "epoch": 9.01, + "learning_rate": 6.077721518987342e-06, + "loss": 0.0328, + "step": 16000 + }, + { + "epoch": 9.01, + "eval_loss": 0.2512108087539673, + "eval_runtime": 1474.2883, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.442, + "eval_wer": 35.66236849379665, + "step": 16000 + }, + { + "epoch": 9.01, + "learning_rate": 6.0713924050632915e-06, + "loss": 0.0181, + "step": 16025 + }, + { + "epoch": 9.01, + "learning_rate": 6.065063291139242e-06, + "loss": 0.0134, + "step": 16050 + }, + { + "epoch": 9.01, + "learning_rate": 6.058734177215191e-06, + "loss": 0.0203, + "step": 16075 + }, + { + "epoch": 9.01, + "learning_rate": 6.052405063291141e-06, + "loss": 0.0224, + "step": 16100 + }, + { + "epoch": 9.01, + "learning_rate": 6.046075949367089e-06, + "loss": 0.0164, + "step": 16125 + }, + { + "epoch": 9.01, + "learning_rate": 6.039746835443039e-06, + "loss": 0.019, + "step": 16150 + }, + { + "epoch": 9.01, + "learning_rate": 6.033417721518988e-06, + "loss": 0.0152, + "step": 16175 + }, + { + "epoch": 9.01, + "learning_rate": 6.0270886075949374e-06, + "loss": 0.0197, + "step": 16200 + }, + { + "epoch": 9.01, + "learning_rate": 6.020759493670887e-06, + "loss": 0.0188, + "step": 16225 + }, + { + "epoch": 9.01, + "learning_rate": 6.014430379746836e-06, + "loss": 0.0245, + "step": 16250 + }, + { + "epoch": 9.01, + "learning_rate": 6.008101265822786e-06, + "loss": 0.0202, + "step": 16275 + }, + { + "epoch": 9.01, + "learning_rate": 6.001772151898735e-06, + "loss": 0.0184, + "step": 16300 + }, + { + "epoch": 9.01, + "learning_rate": 5.995443037974685e-06, + "loss": 0.0158, + "step": 16325 + }, + { + "epoch": 9.01, + "learning_rate": 5.989113924050633e-06, + "loss": 0.0175, + "step": 16350 + }, + { + "epoch": 9.02, + "learning_rate": 5.982784810126583e-06, + "loss": 0.014, + "step": 16375 + }, + { + "epoch": 9.02, + "learning_rate": 5.976455696202532e-06, + "loss": 0.0149, + "step": 16400 + }, + { + "epoch": 9.02, + "learning_rate": 5.9701265822784815e-06, + "loss": 0.0188, + "step": 16425 + }, + { + "epoch": 9.02, + "learning_rate": 5.963797468354431e-06, + "loss": 0.0215, + "step": 16450 + }, + { + "epoch": 9.02, + "learning_rate": 5.95746835443038e-06, + "loss": 0.017, + "step": 16475 + }, + { + "epoch": 9.02, + "learning_rate": 5.95113924050633e-06, + "loss": 0.0095, + "step": 16500 + }, + { + "epoch": 9.02, + "learning_rate": 5.944810126582279e-06, + "loss": 0.0112, + "step": 16525 + }, + { + "epoch": 9.02, + "learning_rate": 5.938481012658229e-06, + "loss": 0.0198, + "step": 16550 + }, + { + "epoch": 9.02, + "learning_rate": 5.932151898734177e-06, + "loss": 0.0252, + "step": 16575 + }, + { + "epoch": 9.02, + "learning_rate": 5.925822784810127e-06, + "loss": 0.0226, + "step": 16600 + }, + { + "epoch": 9.02, + "learning_rate": 5.919493670886076e-06, + "loss": 0.0167, + "step": 16625 + }, + { + "epoch": 9.02, + "learning_rate": 5.9131645569620255e-06, + "loss": 0.0259, + "step": 16650 + }, + { + "epoch": 9.02, + "learning_rate": 5.906835443037975e-06, + "loss": 0.0229, + "step": 16675 + }, + { + "epoch": 9.02, + "learning_rate": 5.900506329113924e-06, + "loss": 0.0238, + "step": 16700 + }, + { + "epoch": 9.02, + "learning_rate": 5.894177215189874e-06, + "loss": 0.02, + "step": 16725 + }, + { + "epoch": 9.02, + "learning_rate": 5.887848101265823e-06, + "loss": 0.0182, + "step": 16750 + }, + { + "epoch": 9.03, + "learning_rate": 5.881518987341773e-06, + "loss": 0.0157, + "step": 16775 + }, + { + "epoch": 9.03, + "learning_rate": 5.875189873417721e-06, + "loss": 0.0145, + "step": 16800 + }, + { + "epoch": 9.03, + "learning_rate": 5.868860759493671e-06, + "loss": 0.0166, + "step": 16825 + }, + { + "epoch": 9.03, + "learning_rate": 5.86253164556962e-06, + "loss": 0.0108, + "step": 16850 + }, + { + "epoch": 9.03, + "learning_rate": 5.85620253164557e-06, + "loss": 0.014, + "step": 16875 + }, + { + "epoch": 9.03, + "learning_rate": 5.84987341772152e-06, + "loss": 0.0167, + "step": 16900 + }, + { + "epoch": 9.03, + "learning_rate": 5.843544303797469e-06, + "loss": 0.0121, + "step": 16925 + }, + { + "epoch": 9.03, + "learning_rate": 5.837215189873419e-06, + "loss": 0.0149, + "step": 16950 + }, + { + "epoch": 9.03, + "learning_rate": 5.831139240506329e-06, + "loss": 0.0143, + "step": 16975 + }, + { + "epoch": 9.03, + "learning_rate": 5.824810126582279e-06, + "loss": 0.0139, + "step": 17000 + }, + { + "epoch": 9.03, + "eval_loss": 0.2688957154750824, + "eval_runtime": 1423.1196, + "eval_samples_per_second": 7.316, + "eval_steps_per_second": 0.457, + "eval_wer": 25.920746678807788, + "step": 17000 + }, + { + "epoch": 9.03, + "learning_rate": 5.818481012658228e-06, + "loss": 0.0169, + "step": 17025 + }, + { + "epoch": 9.03, + "learning_rate": 5.8121518987341775e-06, + "loss": 0.013, + "step": 17050 + }, + { + "epoch": 9.03, + "learning_rate": 5.805822784810127e-06, + "loss": 0.0123, + "step": 17075 + }, + { + "epoch": 9.03, + "learning_rate": 5.799493670886076e-06, + "loss": 0.0161, + "step": 17100 + }, + { + "epoch": 9.03, + "learning_rate": 5.793164556962026e-06, + "loss": 0.0114, + "step": 17125 + }, + { + "epoch": 9.03, + "learning_rate": 5.786835443037974e-06, + "loss": 0.0194, + "step": 17150 + }, + { + "epoch": 9.04, + "learning_rate": 5.780506329113924e-06, + "loss": 0.0179, + "step": 17175 + }, + { + "epoch": 9.04, + "learning_rate": 5.774177215189873e-06, + "loss": 0.0165, + "step": 17200 + }, + { + "epoch": 9.04, + "learning_rate": 5.767848101265823e-06, + "loss": 0.0165, + "step": 17225 + }, + { + "epoch": 9.04, + "learning_rate": 5.761518987341773e-06, + "loss": 0.0125, + "step": 17250 + }, + { + "epoch": 9.04, + "learning_rate": 5.755189873417722e-06, + "loss": 0.0172, + "step": 17275 + }, + { + "epoch": 9.04, + "learning_rate": 5.748860759493672e-06, + "loss": 0.0158, + "step": 17300 + }, + { + "epoch": 9.04, + "learning_rate": 5.742531645569621e-06, + "loss": 0.0174, + "step": 17325 + }, + { + "epoch": 9.04, + "learning_rate": 5.736202531645571e-06, + "loss": 0.0162, + "step": 17350 + }, + { + "epoch": 9.04, + "learning_rate": 5.72987341772152e-06, + "loss": 0.0203, + "step": 17375 + }, + { + "epoch": 9.04, + "learning_rate": 5.7235443037974695e-06, + "loss": 0.0157, + "step": 17400 + }, + { + "epoch": 9.04, + "learning_rate": 5.717215189873419e-06, + "loss": 0.018, + "step": 17425 + }, + { + "epoch": 9.04, + "learning_rate": 5.7108860759493675e-06, + "loss": 0.0199, + "step": 17450 + }, + { + "epoch": 9.04, + "learning_rate": 5.704556962025317e-06, + "loss": 0.0153, + "step": 17475 + }, + { + "epoch": 9.04, + "learning_rate": 5.698227848101266e-06, + "loss": 0.0234, + "step": 17500 + }, + { + "epoch": 10.0, + "learning_rate": 5.691898734177216e-06, + "loss": 0.0168, + "step": 17525 + }, + { + "epoch": 10.0, + "learning_rate": 5.685569620253165e-06, + "loss": 0.0214, + "step": 17550 + }, + { + "epoch": 10.0, + "learning_rate": 5.679240506329115e-06, + "loss": 0.0185, + "step": 17575 + }, + { + "epoch": 10.0, + "learning_rate": 5.672911392405064e-06, + "loss": 0.0228, + "step": 17600 + }, + { + "epoch": 10.0, + "learning_rate": 5.6665822784810135e-06, + "loss": 0.024, + "step": 17625 + }, + { + "epoch": 10.0, + "learning_rate": 5.660253164556963e-06, + "loss": 0.0235, + "step": 17650 + }, + { + "epoch": 10.0, + "learning_rate": 5.6539240506329115e-06, + "loss": 0.0262, + "step": 17675 + }, + { + "epoch": 10.0, + "learning_rate": 5.647594936708861e-06, + "loss": 0.0214, + "step": 17700 + }, + { + "epoch": 10.01, + "learning_rate": 5.64126582278481e-06, + "loss": 0.0279, + "step": 17725 + }, + { + "epoch": 10.01, + "learning_rate": 5.63493670886076e-06, + "loss": 0.0248, + "step": 17750 + }, + { + "epoch": 10.01, + "learning_rate": 5.628607594936709e-06, + "loss": 0.0155, + "step": 17775 + }, + { + "epoch": 10.01, + "learning_rate": 5.622278481012659e-06, + "loss": 0.017, + "step": 17800 + }, + { + "epoch": 10.01, + "learning_rate": 5.615949367088608e-06, + "loss": 0.0205, + "step": 17825 + }, + { + "epoch": 10.01, + "learning_rate": 5.6096202531645575e-06, + "loss": 0.021, + "step": 17850 + }, + { + "epoch": 10.01, + "learning_rate": 5.603291139240507e-06, + "loss": 0.0178, + "step": 17875 + }, + { + "epoch": 10.01, + "learning_rate": 5.5969620253164555e-06, + "loss": 0.0169, + "step": 17900 + }, + { + "epoch": 10.01, + "learning_rate": 5.590632911392405e-06, + "loss": 0.0158, + "step": 17925 + }, + { + "epoch": 10.01, + "learning_rate": 5.584303797468354e-06, + "loss": 0.0195, + "step": 17950 + }, + { + "epoch": 10.01, + "learning_rate": 5.577974683544304e-06, + "loss": 0.0195, + "step": 17975 + }, + { + "epoch": 10.01, + "learning_rate": 5.571645569620253e-06, + "loss": 0.0187, + "step": 18000 + }, + { + "epoch": 10.01, + "eval_loss": 0.2605174481868744, + "eval_runtime": 1553.7854, + "eval_samples_per_second": 6.7, + "eval_steps_per_second": 0.419, + "eval_wer": 25.330493178750874, + "step": 18000 + }, + { + "epoch": 10.01, + "learning_rate": 5.565316455696203e-06, + "loss": 0.0214, + "step": 18025 + }, + { + "epoch": 10.01, + "learning_rate": 5.558987341772152e-06, + "loss": 0.0193, + "step": 18050 + }, + { + "epoch": 10.01, + "learning_rate": 5.552658227848102e-06, + "loss": 0.0168, + "step": 18075 + }, + { + "epoch": 10.01, + "learning_rate": 5.546329113924052e-06, + "loss": 0.0147, + "step": 18100 + }, + { + "epoch": 10.02, + "learning_rate": 5.540000000000001e-06, + "loss": 0.0192, + "step": 18125 + }, + { + "epoch": 10.02, + "learning_rate": 5.533670886075951e-06, + "loss": 0.014, + "step": 18150 + }, + { + "epoch": 10.02, + "learning_rate": 5.527341772151899e-06, + "loss": 0.0163, + "step": 18175 + }, + { + "epoch": 10.02, + "learning_rate": 5.521012658227849e-06, + "loss": 0.0139, + "step": 18200 + }, + { + "epoch": 10.02, + "learning_rate": 5.514683544303798e-06, + "loss": 0.0168, + "step": 18225 + }, + { + "epoch": 10.02, + "learning_rate": 5.5083544303797475e-06, + "loss": 0.0118, + "step": 18250 + }, + { + "epoch": 10.02, + "learning_rate": 5.502025316455697e-06, + "loss": 0.0119, + "step": 18275 + }, + { + "epoch": 10.02, + "learning_rate": 5.495696202531646e-06, + "loss": 0.0136, + "step": 18300 + }, + { + "epoch": 10.02, + "learning_rate": 5.489367088607596e-06, + "loss": 0.0132, + "step": 18325 + }, + { + "epoch": 10.02, + "learning_rate": 5.483037974683545e-06, + "loss": 0.0148, + "step": 18350 + }, + { + "epoch": 10.02, + "learning_rate": 5.476708860759495e-06, + "loss": 0.015, + "step": 18375 + }, + { + "epoch": 10.02, + "learning_rate": 5.470379746835443e-06, + "loss": 0.0154, + "step": 18400 + }, + { + "epoch": 10.02, + "learning_rate": 5.464050632911393e-06, + "loss": 0.0139, + "step": 18425 + }, + { + "epoch": 10.02, + "learning_rate": 5.457721518987342e-06, + "loss": 0.0239, + "step": 18450 + }, + { + "epoch": 10.02, + "learning_rate": 5.4513924050632915e-06, + "loss": 0.0203, + "step": 18475 + }, + { + "epoch": 10.02, + "learning_rate": 5.445063291139241e-06, + "loss": 0.0199, + "step": 18500 + }, + { + "epoch": 10.03, + "learning_rate": 5.43873417721519e-06, + "loss": 0.013, + "step": 18525 + }, + { + "epoch": 10.03, + "learning_rate": 5.43240506329114e-06, + "loss": 0.0128, + "step": 18550 + }, + { + "epoch": 10.03, + "learning_rate": 5.426075949367089e-06, + "loss": 0.0161, + "step": 18575 + }, + { + "epoch": 10.03, + "learning_rate": 5.419746835443039e-06, + "loss": 0.0129, + "step": 18600 + }, + { + "epoch": 10.03, + "learning_rate": 5.413417721518987e-06, + "loss": 0.0124, + "step": 18625 + }, + { + "epoch": 10.03, + "learning_rate": 5.407088607594937e-06, + "loss": 0.0161, + "step": 18650 + }, + { + "epoch": 10.03, + "learning_rate": 5.400759493670886e-06, + "loss": 0.0104, + "step": 18675 + }, + { + "epoch": 10.03, + "learning_rate": 5.3944303797468355e-06, + "loss": 0.0109, + "step": 18700 + }, + { + "epoch": 10.03, + "learning_rate": 5.388101265822785e-06, + "loss": 0.0128, + "step": 18725 + }, + { + "epoch": 10.03, + "learning_rate": 5.381772151898734e-06, + "loss": 0.014, + "step": 18750 + }, + { + "epoch": 10.03, + "learning_rate": 5.375443037974684e-06, + "loss": 0.0159, + "step": 18775 + }, + { + "epoch": 10.03, + "learning_rate": 5.369113924050633e-06, + "loss": 0.017, + "step": 18800 + }, + { + "epoch": 10.03, + "learning_rate": 5.362784810126583e-06, + "loss": 0.012, + "step": 18825 + }, + { + "epoch": 10.03, + "learning_rate": 5.356455696202531e-06, + "loss": 0.0165, + "step": 18850 + }, + { + "epoch": 10.03, + "learning_rate": 5.350126582278481e-06, + "loss": 0.0153, + "step": 18875 + }, + { + "epoch": 10.03, + "learning_rate": 5.343797468354432e-06, + "loss": 0.0182, + "step": 18900 + }, + { + "epoch": 10.04, + "learning_rate": 5.33746835443038e-06, + "loss": 0.0194, + "step": 18925 + }, + { + "epoch": 10.04, + "learning_rate": 5.33113924050633e-06, + "loss": 0.0144, + "step": 18950 + }, + { + "epoch": 10.04, + "learning_rate": 5.324810126582279e-06, + "loss": 0.0157, + "step": 18975 + }, + { + "epoch": 10.04, + "learning_rate": 5.318481012658229e-06, + "loss": 0.0111, + "step": 19000 + }, + { + "epoch": 10.04, + "eval_loss": 0.2696678638458252, + "eval_runtime": 1463.5986, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.445, + "eval_wer": 24.79877721588969, + "step": 19000 + }, + { + "epoch": 10.04, + "learning_rate": 5.312151898734178e-06, + "loss": 0.0176, + "step": 19025 + }, + { + "epoch": 10.04, + "learning_rate": 5.3058227848101275e-06, + "loss": 0.0144, + "step": 19050 + }, + { + "epoch": 10.04, + "learning_rate": 5.299493670886077e-06, + "loss": 0.0129, + "step": 19075 + }, + { + "epoch": 10.04, + "learning_rate": 5.293164556962026e-06, + "loss": 0.0162, + "step": 19100 + }, + { + "epoch": 10.04, + "learning_rate": 5.286835443037976e-06, + "loss": 0.0128, + "step": 19125 + }, + { + "epoch": 10.04, + "learning_rate": 5.280506329113924e-06, + "loss": 0.0152, + "step": 19150 + }, + { + "epoch": 10.04, + "learning_rate": 5.274177215189874e-06, + "loss": 0.0183, + "step": 19175 + }, + { + "epoch": 10.04, + "learning_rate": 5.267848101265823e-06, + "loss": 0.0198, + "step": 19200 + }, + { + "epoch": 10.04, + "learning_rate": 5.261518987341773e-06, + "loss": 0.0179, + "step": 19225 + }, + { + "epoch": 10.04, + "learning_rate": 5.255443037974683e-06, + "loss": 0.019, + "step": 19250 + }, + { + "epoch": 11.0, + "learning_rate": 5.249113924050633e-06, + "loss": 0.0159, + "step": 19275 + }, + { + "epoch": 11.0, + "learning_rate": 5.242784810126583e-06, + "loss": 0.0156, + "step": 19300 + }, + { + "epoch": 11.0, + "learning_rate": 5.236455696202532e-06, + "loss": 0.015, + "step": 19325 + }, + { + "epoch": 11.0, + "learning_rate": 5.230126582278482e-06, + "loss": 0.0162, + "step": 19350 + }, + { + "epoch": 11.0, + "learning_rate": 5.223797468354431e-06, + "loss": 0.0262, + "step": 19375 + }, + { + "epoch": 11.0, + "learning_rate": 5.217468354430381e-06, + "loss": 0.0207, + "step": 19400 + }, + { + "epoch": 11.0, + "learning_rate": 5.21113924050633e-06, + "loss": 0.0222, + "step": 19425 + }, + { + "epoch": 11.0, + "learning_rate": 5.2048101265822795e-06, + "loss": 0.016, + "step": 19450 + }, + { + "epoch": 11.01, + "learning_rate": 5.198481012658229e-06, + "loss": 0.0176, + "step": 19475 + }, + { + "epoch": 11.01, + "learning_rate": 5.192151898734178e-06, + "loss": 0.0209, + "step": 19500 + }, + { + "epoch": 11.01, + "learning_rate": 5.185822784810127e-06, + "loss": 0.0212, + "step": 19525 + }, + { + "epoch": 11.01, + "learning_rate": 5.179493670886076e-06, + "loss": 0.0143, + "step": 19550 + }, + { + "epoch": 11.01, + "learning_rate": 5.173164556962026e-06, + "loss": 0.0224, + "step": 19575 + }, + { + "epoch": 11.01, + "learning_rate": 5.166835443037975e-06, + "loss": 0.0171, + "step": 19600 + }, + { + "epoch": 11.01, + "learning_rate": 5.160506329113925e-06, + "loss": 0.0166, + "step": 19625 + }, + { + "epoch": 11.01, + "learning_rate": 5.154177215189874e-06, + "loss": 0.0135, + "step": 19650 + }, + { + "epoch": 11.01, + "learning_rate": 5.1478481012658235e-06, + "loss": 0.0164, + "step": 19675 + }, + { + "epoch": 11.01, + "learning_rate": 5.141518987341773e-06, + "loss": 0.0154, + "step": 19700 + }, + { + "epoch": 11.01, + "learning_rate": 5.135189873417722e-06, + "loss": 0.0183, + "step": 19725 + }, + { + "epoch": 11.01, + "learning_rate": 5.128860759493671e-06, + "loss": 0.0149, + "step": 19750 + }, + { + "epoch": 11.01, + "learning_rate": 5.12253164556962e-06, + "loss": 0.0176, + "step": 19775 + }, + { + "epoch": 11.01, + "learning_rate": 5.11620253164557e-06, + "loss": 0.0135, + "step": 19800 + }, + { + "epoch": 11.01, + "learning_rate": 5.109873417721519e-06, + "loss": 0.0121, + "step": 19825 + }, + { + "epoch": 11.01, + "learning_rate": 5.103544303797469e-06, + "loss": 0.0158, + "step": 19850 + }, + { + "epoch": 11.02, + "learning_rate": 5.097215189873418e-06, + "loss": 0.0191, + "step": 19875 + }, + { + "epoch": 11.02, + "learning_rate": 5.0908860759493675e-06, + "loss": 0.0127, + "step": 19900 + }, + { + "epoch": 11.02, + "learning_rate": 5.084556962025317e-06, + "loss": 0.0149, + "step": 19925 + }, + { + "epoch": 11.02, + "learning_rate": 5.0782278481012655e-06, + "loss": 0.0152, + "step": 19950 + }, + { + "epoch": 11.02, + "learning_rate": 5.071898734177215e-06, + "loss": 0.0139, + "step": 19975 + }, + { + "epoch": 11.02, + "learning_rate": 5.065569620253164e-06, + "loss": 0.015, + "step": 20000 + }, + { + "epoch": 11.02, + "eval_loss": 0.2623814344406128, + "eval_runtime": 1462.1621, + "eval_samples_per_second": 7.12, + "eval_steps_per_second": 0.445, + "eval_wer": 24.392266540919362, + "step": 20000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 9.23155837083648e+19, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-20000/training_args.bin b/checkpoint-20000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-20000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-21000/config.json b/checkpoint-21000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-21000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-21000/generation_config.json b/checkpoint-21000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-21000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-21000/optimizer.pt b/checkpoint-21000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e7b21db5fb6eb6313cceb1896a98b51b1115babe --- /dev/null +++ b/checkpoint-21000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c65fb3041f6bbc9cb1fd7e9aad8ed23b352d4b7d0c52bc4a6f8d00d6385be085 +size 1934161093 diff --git a/checkpoint-21000/preprocessor_config.json b/checkpoint-21000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-21000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-21000/pytorch_model.bin b/checkpoint-21000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..0845384b7156bf7152e4af8548f4b6a4be6f1a9b --- /dev/null +++ b/checkpoint-21000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6d19260092d7751f5c62de47c82ceaad5cfe4a0ba27fa56c56f793f4a0bfbd1 +size 967102601 diff --git a/checkpoint-21000/rng_state.pth b/checkpoint-21000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..eabfe547cc76ae0070c125d3711d396c9d0aa61e --- /dev/null +++ b/checkpoint-21000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6ac5602f630afe81fbb7207857efc8af425909d22950a1cd147632e00b4d7a7 +size 14639 diff --git a/checkpoint-21000/scaler.pt b/checkpoint-21000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1c27abe863cb53c0f56d9e08e086f2aca02848b6 --- /dev/null +++ b/checkpoint-21000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08837e6582ee4a223cf98b8899da2c888e11d4b81ced0b15e46cc719acc1d89a +size 557 diff --git a/checkpoint-21000/scheduler.pt b/checkpoint-21000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..289f18800d48f32f0eefe2cf3e6404b65ff1cb13 --- /dev/null +++ b/checkpoint-21000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5e97635aefa8b2a6ad6e9e36b1a4fa531b533a5132d24f5186c862ad2203b0d +size 627 diff --git a/checkpoint-21000/trainer_state.json b/checkpoint-21000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3e97ecc856ac85dc9e84620f7d053f704c558624 --- /dev/null +++ b/checkpoint-21000/trainer_state.json @@ -0,0 +1,5245 @@ +{ + "best_metric": 24.392266540919362, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-20000", + "epoch": 11.043475, + "global_step": 21000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + }, + { + "epoch": 7.02, + "learning_rate": 6.830632911392405e-06, + "loss": 0.0159, + "step": 13025 + }, + { + "epoch": 7.02, + "learning_rate": 6.824303797468354e-06, + "loss": 0.0186, + "step": 13050 + }, + { + "epoch": 7.02, + "learning_rate": 6.817974683544304e-06, + "loss": 0.0246, + "step": 13075 + }, + { + "epoch": 7.02, + "learning_rate": 6.811645569620253e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 7.02, + "learning_rate": 6.805316455696203e-06, + "loss": 0.0287, + "step": 13125 + }, + { + "epoch": 7.02, + "learning_rate": 6.798987341772152e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 7.02, + "learning_rate": 6.792658227848102e-06, + "loss": 0.035, + "step": 13175 + }, + { + "epoch": 7.02, + "learning_rate": 6.786329113924052e-06, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 7.02, + "learning_rate": 6.780000000000001e-06, + "loss": 0.0283, + "step": 13225 + }, + { + "epoch": 7.02, + "learning_rate": 6.773670886075951e-06, + "loss": 0.0203, + "step": 13250 + }, + { + "epoch": 7.03, + "learning_rate": 6.7673417721519e-06, + "loss": 0.0193, + "step": 13275 + }, + { + "epoch": 7.03, + "learning_rate": 6.7610126582278494e-06, + "loss": 0.0174, + "step": 13300 + }, + { + "epoch": 7.03, + "learning_rate": 6.754683544303798e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 7.03, + "learning_rate": 6.7483544303797474e-06, + "loss": 0.023, + "step": 13350 + }, + { + "epoch": 7.03, + "learning_rate": 6.742025316455697e-06, + "loss": 0.0266, + "step": 13375 + }, + { + "epoch": 7.03, + "learning_rate": 6.735696202531646e-06, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 7.03, + "learning_rate": 6.729367088607596e-06, + "loss": 0.0164, + "step": 13425 + }, + { + "epoch": 7.03, + "learning_rate": 6.723037974683545e-06, + "loss": 0.019, + "step": 13450 + }, + { + "epoch": 7.03, + "learning_rate": 6.716708860759495e-06, + "loss": 0.0209, + "step": 13475 + }, + { + "epoch": 7.03, + "learning_rate": 6.710379746835444e-06, + "loss": 0.0241, + "step": 13500 + }, + { + "epoch": 7.03, + "learning_rate": 6.7040506329113934e-06, + "loss": 0.0233, + "step": 13525 + }, + { + "epoch": 7.03, + "learning_rate": 6.697721518987342e-06, + "loss": 0.0204, + "step": 13550 + }, + { + "epoch": 7.03, + "learning_rate": 6.6913924050632914e-06, + "loss": 0.0131, + "step": 13575 + }, + { + "epoch": 7.03, + "learning_rate": 6.685063291139241e-06, + "loss": 0.0268, + "step": 13600 + }, + { + "epoch": 7.03, + "learning_rate": 6.67873417721519e-06, + "loss": 0.0156, + "step": 13625 + }, + { + "epoch": 7.03, + "learning_rate": 6.67240506329114e-06, + "loss": 0.0205, + "step": 13650 + }, + { + "epoch": 7.04, + "learning_rate": 6.666075949367089e-06, + "loss": 0.0217, + "step": 13675 + }, + { + "epoch": 7.04, + "learning_rate": 6.659746835443039e-06, + "loss": 0.0258, + "step": 13700 + }, + { + "epoch": 7.04, + "learning_rate": 6.653417721518988e-06, + "loss": 0.0211, + "step": 13725 + }, + { + "epoch": 7.04, + "learning_rate": 6.647088607594937e-06, + "loss": 0.0188, + "step": 13750 + }, + { + "epoch": 7.04, + "learning_rate": 6.640759493670886e-06, + "loss": 0.0234, + "step": 13775 + }, + { + "epoch": 7.04, + "learning_rate": 6.6344303797468355e-06, + "loss": 0.0202, + "step": 13800 + }, + { + "epoch": 7.04, + "learning_rate": 6.628101265822785e-06, + "loss": 0.0251, + "step": 13825 + }, + { + "epoch": 7.04, + "learning_rate": 6.621772151898734e-06, + "loss": 0.017, + "step": 13850 + }, + { + "epoch": 7.04, + "learning_rate": 6.615443037974684e-06, + "loss": 0.0254, + "step": 13875 + }, + { + "epoch": 7.04, + "learning_rate": 6.609113924050633e-06, + "loss": 0.0289, + "step": 13900 + }, + { + "epoch": 7.04, + "learning_rate": 6.602784810126583e-06, + "loss": 0.0238, + "step": 13925 + }, + { + "epoch": 7.04, + "learning_rate": 6.596455696202532e-06, + "loss": 0.0218, + "step": 13950 + }, + { + "epoch": 7.04, + "learning_rate": 6.590126582278481e-06, + "loss": 0.0289, + "step": 13975 + }, + { + "epoch": 7.04, + "learning_rate": 6.583797468354432e-06, + "loss": 0.0271, + "step": 14000 + }, + { + "epoch": 7.04, + "eval_loss": 0.26684999465942383, + "eval_runtime": 1448.0228, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "eval_wer": 42.464105107400115, + "step": 14000 + }, + { + "epoch": 8.0, + "learning_rate": 6.577468354430381e-06, + "loss": 0.0292, + "step": 14025 + }, + { + "epoch": 8.0, + "learning_rate": 6.57113924050633e-06, + "loss": 0.0192, + "step": 14050 + }, + { + "epoch": 8.0, + "learning_rate": 6.564810126582279e-06, + "loss": 0.027, + "step": 14075 + }, + { + "epoch": 8.0, + "learning_rate": 6.558481012658229e-06, + "loss": 0.03, + "step": 14100 + }, + { + "epoch": 8.0, + "learning_rate": 6.552151898734178e-06, + "loss": 0.0375, + "step": 14125 + }, + { + "epoch": 8.0, + "learning_rate": 6.5458227848101274e-06, + "loss": 0.0338, + "step": 14150 + }, + { + "epoch": 8.0, + "learning_rate": 6.539493670886077e-06, + "loss": 0.0305, + "step": 14175 + }, + { + "epoch": 8.0, + "learning_rate": 6.533164556962026e-06, + "loss": 0.0266, + "step": 14200 + }, + { + "epoch": 8.01, + "learning_rate": 6.526835443037976e-06, + "loss": 0.0378, + "step": 14225 + }, + { + "epoch": 8.01, + "learning_rate": 6.520506329113925e-06, + "loss": 0.0256, + "step": 14250 + }, + { + "epoch": 8.01, + "learning_rate": 6.514177215189874e-06, + "loss": 0.0203, + "step": 14275 + }, + { + "epoch": 8.01, + "learning_rate": 6.507848101265823e-06, + "loss": 0.0212, + "step": 14300 + }, + { + "epoch": 8.01, + "learning_rate": 6.501518987341773e-06, + "loss": 0.0204, + "step": 14325 + }, + { + "epoch": 8.01, + "learning_rate": 6.495189873417722e-06, + "loss": 0.0259, + "step": 14350 + }, + { + "epoch": 8.01, + "learning_rate": 6.4888607594936714e-06, + "loss": 0.0214, + "step": 14375 + }, + { + "epoch": 8.01, + "learning_rate": 6.482531645569621e-06, + "loss": 0.021, + "step": 14400 + }, + { + "epoch": 8.01, + "learning_rate": 6.47620253164557e-06, + "loss": 0.0241, + "step": 14425 + }, + { + "epoch": 8.01, + "learning_rate": 6.46987341772152e-06, + "loss": 0.0216, + "step": 14450 + }, + { + "epoch": 8.01, + "learning_rate": 6.463544303797469e-06, + "loss": 0.0209, + "step": 14475 + }, + { + "epoch": 8.01, + "learning_rate": 6.457215189873418e-06, + "loss": 0.0292, + "step": 14500 + }, + { + "epoch": 8.01, + "learning_rate": 6.450886075949367e-06, + "loss": 0.0286, + "step": 14525 + }, + { + "epoch": 8.01, + "learning_rate": 6.444556962025317e-06, + "loss": 0.0218, + "step": 14550 + }, + { + "epoch": 8.01, + "learning_rate": 6.438227848101266e-06, + "loss": 0.0209, + "step": 14575 + }, + { + "epoch": 8.01, + "learning_rate": 6.4318987341772154e-06, + "loss": 0.0244, + "step": 14600 + }, + { + "epoch": 8.02, + "learning_rate": 6.425569620253165e-06, + "loss": 0.0212, + "step": 14625 + }, + { + "epoch": 8.02, + "learning_rate": 6.419240506329114e-06, + "loss": 0.0155, + "step": 14650 + }, + { + "epoch": 8.02, + "learning_rate": 6.412911392405064e-06, + "loss": 0.0209, + "step": 14675 + }, + { + "epoch": 8.02, + "learning_rate": 6.406582278481013e-06, + "loss": 0.0203, + "step": 14700 + }, + { + "epoch": 8.02, + "learning_rate": 6.400253164556962e-06, + "loss": 0.0192, + "step": 14725 + }, + { + "epoch": 8.02, + "learning_rate": 6.393924050632911e-06, + "loss": 0.0216, + "step": 14750 + }, + { + "epoch": 8.02, + "learning_rate": 6.387594936708861e-06, + "loss": 0.012, + "step": 14775 + }, + { + "epoch": 8.02, + "learning_rate": 6.38126582278481e-06, + "loss": 0.0241, + "step": 14800 + }, + { + "epoch": 8.02, + "learning_rate": 6.37493670886076e-06, + "loss": 0.0192, + "step": 14825 + }, + { + "epoch": 8.02, + "learning_rate": 6.36860759493671e-06, + "loss": 0.0202, + "step": 14850 + }, + { + "epoch": 8.02, + "learning_rate": 6.362278481012659e-06, + "loss": 0.0257, + "step": 14875 + }, + { + "epoch": 8.02, + "learning_rate": 6.3559493670886086e-06, + "loss": 0.0218, + "step": 14900 + }, + { + "epoch": 8.02, + "learning_rate": 6.349620253164558e-06, + "loss": 0.0232, + "step": 14925 + }, + { + "epoch": 8.02, + "learning_rate": 6.3432911392405074e-06, + "loss": 0.03, + "step": 14950 + }, + { + "epoch": 8.02, + "learning_rate": 6.337215189873418e-06, + "loss": 0.0252, + "step": 14975 + }, + { + "epoch": 8.02, + "learning_rate": 6.3308860759493675e-06, + "loss": 0.0176, + "step": 15000 + }, + { + "epoch": 8.02, + "eval_loss": 0.2596448063850403, + "eval_runtime": 1417.6481, + "eval_samples_per_second": 7.344, + "eval_steps_per_second": 0.459, + "eval_wer": 28.52241499861786, + "step": 15000 + }, + { + "epoch": 8.03, + "learning_rate": 6.324556962025317e-06, + "loss": 0.0166, + "step": 15025 + }, + { + "epoch": 8.03, + "learning_rate": 6.318227848101266e-06, + "loss": 0.017, + "step": 15050 + }, + { + "epoch": 8.03, + "learning_rate": 6.311898734177216e-06, + "loss": 0.0291, + "step": 15075 + }, + { + "epoch": 8.03, + "learning_rate": 6.305569620253164e-06, + "loss": 0.0149, + "step": 15100 + }, + { + "epoch": 8.03, + "learning_rate": 6.299240506329114e-06, + "loss": 0.0223, + "step": 15125 + }, + { + "epoch": 8.03, + "learning_rate": 6.292911392405063e-06, + "loss": 0.0212, + "step": 15150 + }, + { + "epoch": 8.03, + "learning_rate": 6.286582278481013e-06, + "loss": 0.0125, + "step": 15175 + }, + { + "epoch": 8.03, + "learning_rate": 6.280253164556962e-06, + "loss": 0.0169, + "step": 15200 + }, + { + "epoch": 8.03, + "learning_rate": 6.273924050632912e-06, + "loss": 0.0244, + "step": 15225 + }, + { + "epoch": 8.03, + "learning_rate": 6.267594936708862e-06, + "loss": 0.015, + "step": 15250 + }, + { + "epoch": 8.03, + "learning_rate": 6.261265822784811e-06, + "loss": 0.0171, + "step": 15275 + }, + { + "epoch": 8.03, + "learning_rate": 6.254936708860761e-06, + "loss": 0.017, + "step": 15300 + }, + { + "epoch": 8.03, + "learning_rate": 6.24860759493671e-06, + "loss": 0.0162, + "step": 15325 + }, + { + "epoch": 8.03, + "learning_rate": 6.2422784810126594e-06, + "loss": 0.0133, + "step": 15350 + }, + { + "epoch": 8.03, + "learning_rate": 6.235949367088608e-06, + "loss": 0.0139, + "step": 15375 + }, + { + "epoch": 8.03, + "learning_rate": 6.2296202531645575e-06, + "loss": 0.0177, + "step": 15400 + }, + { + "epoch": 8.04, + "learning_rate": 6.223291139240507e-06, + "loss": 0.0207, + "step": 15425 + }, + { + "epoch": 8.04, + "learning_rate": 6.216962025316456e-06, + "loss": 0.0192, + "step": 15450 + }, + { + "epoch": 8.04, + "learning_rate": 6.210632911392406e-06, + "loss": 0.0176, + "step": 15475 + }, + { + "epoch": 8.04, + "learning_rate": 6.204303797468355e-06, + "loss": 0.0133, + "step": 15500 + }, + { + "epoch": 8.04, + "learning_rate": 6.197974683544305e-06, + "loss": 0.0199, + "step": 15525 + }, + { + "epoch": 8.04, + "learning_rate": 6.191645569620254e-06, + "loss": 0.0135, + "step": 15550 + }, + { + "epoch": 8.04, + "learning_rate": 6.1853164556962035e-06, + "loss": 0.0121, + "step": 15575 + }, + { + "epoch": 8.04, + "learning_rate": 6.178987341772152e-06, + "loss": 0.0152, + "step": 15600 + }, + { + "epoch": 8.04, + "learning_rate": 6.1726582278481015e-06, + "loss": 0.0201, + "step": 15625 + }, + { + "epoch": 8.04, + "learning_rate": 6.166329113924051e-06, + "loss": 0.0226, + "step": 15650 + }, + { + "epoch": 8.04, + "learning_rate": 6.16e-06, + "loss": 0.0222, + "step": 15675 + }, + { + "epoch": 8.04, + "learning_rate": 6.15367088607595e-06, + "loss": 0.0184, + "step": 15700 + }, + { + "epoch": 8.04, + "learning_rate": 6.147341772151899e-06, + "loss": 0.0219, + "step": 15725 + }, + { + "epoch": 8.04, + "learning_rate": 6.141012658227849e-06, + "loss": 0.0219, + "step": 15750 + }, + { + "epoch": 9.0, + "learning_rate": 6.134683544303798e-06, + "loss": 0.0208, + "step": 15775 + }, + { + "epoch": 9.0, + "learning_rate": 6.1283544303797475e-06, + "loss": 0.0143, + "step": 15800 + }, + { + "epoch": 9.0, + "learning_rate": 6.122025316455696e-06, + "loss": 0.0254, + "step": 15825 + }, + { + "epoch": 9.0, + "learning_rate": 6.1156962025316455e-06, + "loss": 0.0287, + "step": 15850 + }, + { + "epoch": 9.0, + "learning_rate": 6.109367088607595e-06, + "loss": 0.0252, + "step": 15875 + }, + { + "epoch": 9.0, + "learning_rate": 6.103037974683544e-06, + "loss": 0.0275, + "step": 15900 + }, + { + "epoch": 9.0, + "learning_rate": 6.096708860759494e-06, + "loss": 0.0193, + "step": 15925 + }, + { + "epoch": 9.0, + "learning_rate": 6.090379746835443e-06, + "loss": 0.0255, + "step": 15950 + }, + { + "epoch": 9.01, + "learning_rate": 6.084050632911393e-06, + "loss": 0.0248, + "step": 15975 + }, + { + "epoch": 9.01, + "learning_rate": 6.077721518987342e-06, + "loss": 0.0328, + "step": 16000 + }, + { + "epoch": 9.01, + "eval_loss": 0.2512108087539673, + "eval_runtime": 1474.2883, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.442, + "eval_wer": 35.66236849379665, + "step": 16000 + }, + { + "epoch": 9.01, + "learning_rate": 6.0713924050632915e-06, + "loss": 0.0181, + "step": 16025 + }, + { + "epoch": 9.01, + "learning_rate": 6.065063291139242e-06, + "loss": 0.0134, + "step": 16050 + }, + { + "epoch": 9.01, + "learning_rate": 6.058734177215191e-06, + "loss": 0.0203, + "step": 16075 + }, + { + "epoch": 9.01, + "learning_rate": 6.052405063291141e-06, + "loss": 0.0224, + "step": 16100 + }, + { + "epoch": 9.01, + "learning_rate": 6.046075949367089e-06, + "loss": 0.0164, + "step": 16125 + }, + { + "epoch": 9.01, + "learning_rate": 6.039746835443039e-06, + "loss": 0.019, + "step": 16150 + }, + { + "epoch": 9.01, + "learning_rate": 6.033417721518988e-06, + "loss": 0.0152, + "step": 16175 + }, + { + "epoch": 9.01, + "learning_rate": 6.0270886075949374e-06, + "loss": 0.0197, + "step": 16200 + }, + { + "epoch": 9.01, + "learning_rate": 6.020759493670887e-06, + "loss": 0.0188, + "step": 16225 + }, + { + "epoch": 9.01, + "learning_rate": 6.014430379746836e-06, + "loss": 0.0245, + "step": 16250 + }, + { + "epoch": 9.01, + "learning_rate": 6.008101265822786e-06, + "loss": 0.0202, + "step": 16275 + }, + { + "epoch": 9.01, + "learning_rate": 6.001772151898735e-06, + "loss": 0.0184, + "step": 16300 + }, + { + "epoch": 9.01, + "learning_rate": 5.995443037974685e-06, + "loss": 0.0158, + "step": 16325 + }, + { + "epoch": 9.01, + "learning_rate": 5.989113924050633e-06, + "loss": 0.0175, + "step": 16350 + }, + { + "epoch": 9.02, + "learning_rate": 5.982784810126583e-06, + "loss": 0.014, + "step": 16375 + }, + { + "epoch": 9.02, + "learning_rate": 5.976455696202532e-06, + "loss": 0.0149, + "step": 16400 + }, + { + "epoch": 9.02, + "learning_rate": 5.9701265822784815e-06, + "loss": 0.0188, + "step": 16425 + }, + { + "epoch": 9.02, + "learning_rate": 5.963797468354431e-06, + "loss": 0.0215, + "step": 16450 + }, + { + "epoch": 9.02, + "learning_rate": 5.95746835443038e-06, + "loss": 0.017, + "step": 16475 + }, + { + "epoch": 9.02, + "learning_rate": 5.95113924050633e-06, + "loss": 0.0095, + "step": 16500 + }, + { + "epoch": 9.02, + "learning_rate": 5.944810126582279e-06, + "loss": 0.0112, + "step": 16525 + }, + { + "epoch": 9.02, + "learning_rate": 5.938481012658229e-06, + "loss": 0.0198, + "step": 16550 + }, + { + "epoch": 9.02, + "learning_rate": 5.932151898734177e-06, + "loss": 0.0252, + "step": 16575 + }, + { + "epoch": 9.02, + "learning_rate": 5.925822784810127e-06, + "loss": 0.0226, + "step": 16600 + }, + { + "epoch": 9.02, + "learning_rate": 5.919493670886076e-06, + "loss": 0.0167, + "step": 16625 + }, + { + "epoch": 9.02, + "learning_rate": 5.9131645569620255e-06, + "loss": 0.0259, + "step": 16650 + }, + { + "epoch": 9.02, + "learning_rate": 5.906835443037975e-06, + "loss": 0.0229, + "step": 16675 + }, + { + "epoch": 9.02, + "learning_rate": 5.900506329113924e-06, + "loss": 0.0238, + "step": 16700 + }, + { + "epoch": 9.02, + "learning_rate": 5.894177215189874e-06, + "loss": 0.02, + "step": 16725 + }, + { + "epoch": 9.02, + "learning_rate": 5.887848101265823e-06, + "loss": 0.0182, + "step": 16750 + }, + { + "epoch": 9.03, + "learning_rate": 5.881518987341773e-06, + "loss": 0.0157, + "step": 16775 + }, + { + "epoch": 9.03, + "learning_rate": 5.875189873417721e-06, + "loss": 0.0145, + "step": 16800 + }, + { + "epoch": 9.03, + "learning_rate": 5.868860759493671e-06, + "loss": 0.0166, + "step": 16825 + }, + { + "epoch": 9.03, + "learning_rate": 5.86253164556962e-06, + "loss": 0.0108, + "step": 16850 + }, + { + "epoch": 9.03, + "learning_rate": 5.85620253164557e-06, + "loss": 0.014, + "step": 16875 + }, + { + "epoch": 9.03, + "learning_rate": 5.84987341772152e-06, + "loss": 0.0167, + "step": 16900 + }, + { + "epoch": 9.03, + "learning_rate": 5.843544303797469e-06, + "loss": 0.0121, + "step": 16925 + }, + { + "epoch": 9.03, + "learning_rate": 5.837215189873419e-06, + "loss": 0.0149, + "step": 16950 + }, + { + "epoch": 9.03, + "learning_rate": 5.831139240506329e-06, + "loss": 0.0143, + "step": 16975 + }, + { + "epoch": 9.03, + "learning_rate": 5.824810126582279e-06, + "loss": 0.0139, + "step": 17000 + }, + { + "epoch": 9.03, + "eval_loss": 0.2688957154750824, + "eval_runtime": 1423.1196, + "eval_samples_per_second": 7.316, + "eval_steps_per_second": 0.457, + "eval_wer": 25.920746678807788, + "step": 17000 + }, + { + "epoch": 9.03, + "learning_rate": 5.818481012658228e-06, + "loss": 0.0169, + "step": 17025 + }, + { + "epoch": 9.03, + "learning_rate": 5.8121518987341775e-06, + "loss": 0.013, + "step": 17050 + }, + { + "epoch": 9.03, + "learning_rate": 5.805822784810127e-06, + "loss": 0.0123, + "step": 17075 + }, + { + "epoch": 9.03, + "learning_rate": 5.799493670886076e-06, + "loss": 0.0161, + "step": 17100 + }, + { + "epoch": 9.03, + "learning_rate": 5.793164556962026e-06, + "loss": 0.0114, + "step": 17125 + }, + { + "epoch": 9.03, + "learning_rate": 5.786835443037974e-06, + "loss": 0.0194, + "step": 17150 + }, + { + "epoch": 9.04, + "learning_rate": 5.780506329113924e-06, + "loss": 0.0179, + "step": 17175 + }, + { + "epoch": 9.04, + "learning_rate": 5.774177215189873e-06, + "loss": 0.0165, + "step": 17200 + }, + { + "epoch": 9.04, + "learning_rate": 5.767848101265823e-06, + "loss": 0.0165, + "step": 17225 + }, + { + "epoch": 9.04, + "learning_rate": 5.761518987341773e-06, + "loss": 0.0125, + "step": 17250 + }, + { + "epoch": 9.04, + "learning_rate": 5.755189873417722e-06, + "loss": 0.0172, + "step": 17275 + }, + { + "epoch": 9.04, + "learning_rate": 5.748860759493672e-06, + "loss": 0.0158, + "step": 17300 + }, + { + "epoch": 9.04, + "learning_rate": 5.742531645569621e-06, + "loss": 0.0174, + "step": 17325 + }, + { + "epoch": 9.04, + "learning_rate": 5.736202531645571e-06, + "loss": 0.0162, + "step": 17350 + }, + { + "epoch": 9.04, + "learning_rate": 5.72987341772152e-06, + "loss": 0.0203, + "step": 17375 + }, + { + "epoch": 9.04, + "learning_rate": 5.7235443037974695e-06, + "loss": 0.0157, + "step": 17400 + }, + { + "epoch": 9.04, + "learning_rate": 5.717215189873419e-06, + "loss": 0.018, + "step": 17425 + }, + { + "epoch": 9.04, + "learning_rate": 5.7108860759493675e-06, + "loss": 0.0199, + "step": 17450 + }, + { + "epoch": 9.04, + "learning_rate": 5.704556962025317e-06, + "loss": 0.0153, + "step": 17475 + }, + { + "epoch": 9.04, + "learning_rate": 5.698227848101266e-06, + "loss": 0.0234, + "step": 17500 + }, + { + "epoch": 10.0, + "learning_rate": 5.691898734177216e-06, + "loss": 0.0168, + "step": 17525 + }, + { + "epoch": 10.0, + "learning_rate": 5.685569620253165e-06, + "loss": 0.0214, + "step": 17550 + }, + { + "epoch": 10.0, + "learning_rate": 5.679240506329115e-06, + "loss": 0.0185, + "step": 17575 + }, + { + "epoch": 10.0, + "learning_rate": 5.672911392405064e-06, + "loss": 0.0228, + "step": 17600 + }, + { + "epoch": 10.0, + "learning_rate": 5.6665822784810135e-06, + "loss": 0.024, + "step": 17625 + }, + { + "epoch": 10.0, + "learning_rate": 5.660253164556963e-06, + "loss": 0.0235, + "step": 17650 + }, + { + "epoch": 10.0, + "learning_rate": 5.6539240506329115e-06, + "loss": 0.0262, + "step": 17675 + }, + { + "epoch": 10.0, + "learning_rate": 5.647594936708861e-06, + "loss": 0.0214, + "step": 17700 + }, + { + "epoch": 10.01, + "learning_rate": 5.64126582278481e-06, + "loss": 0.0279, + "step": 17725 + }, + { + "epoch": 10.01, + "learning_rate": 5.63493670886076e-06, + "loss": 0.0248, + "step": 17750 + }, + { + "epoch": 10.01, + "learning_rate": 5.628607594936709e-06, + "loss": 0.0155, + "step": 17775 + }, + { + "epoch": 10.01, + "learning_rate": 5.622278481012659e-06, + "loss": 0.017, + "step": 17800 + }, + { + "epoch": 10.01, + "learning_rate": 5.615949367088608e-06, + "loss": 0.0205, + "step": 17825 + }, + { + "epoch": 10.01, + "learning_rate": 5.6096202531645575e-06, + "loss": 0.021, + "step": 17850 + }, + { + "epoch": 10.01, + "learning_rate": 5.603291139240507e-06, + "loss": 0.0178, + "step": 17875 + }, + { + "epoch": 10.01, + "learning_rate": 5.5969620253164555e-06, + "loss": 0.0169, + "step": 17900 + }, + { + "epoch": 10.01, + "learning_rate": 5.590632911392405e-06, + "loss": 0.0158, + "step": 17925 + }, + { + "epoch": 10.01, + "learning_rate": 5.584303797468354e-06, + "loss": 0.0195, + "step": 17950 + }, + { + "epoch": 10.01, + "learning_rate": 5.577974683544304e-06, + "loss": 0.0195, + "step": 17975 + }, + { + "epoch": 10.01, + "learning_rate": 5.571645569620253e-06, + "loss": 0.0187, + "step": 18000 + }, + { + "epoch": 10.01, + "eval_loss": 0.2605174481868744, + "eval_runtime": 1553.7854, + "eval_samples_per_second": 6.7, + "eval_steps_per_second": 0.419, + "eval_wer": 25.330493178750874, + "step": 18000 + }, + { + "epoch": 10.01, + "learning_rate": 5.565316455696203e-06, + "loss": 0.0214, + "step": 18025 + }, + { + "epoch": 10.01, + "learning_rate": 5.558987341772152e-06, + "loss": 0.0193, + "step": 18050 + }, + { + "epoch": 10.01, + "learning_rate": 5.552658227848102e-06, + "loss": 0.0168, + "step": 18075 + }, + { + "epoch": 10.01, + "learning_rate": 5.546329113924052e-06, + "loss": 0.0147, + "step": 18100 + }, + { + "epoch": 10.02, + "learning_rate": 5.540000000000001e-06, + "loss": 0.0192, + "step": 18125 + }, + { + "epoch": 10.02, + "learning_rate": 5.533670886075951e-06, + "loss": 0.014, + "step": 18150 + }, + { + "epoch": 10.02, + "learning_rate": 5.527341772151899e-06, + "loss": 0.0163, + "step": 18175 + }, + { + "epoch": 10.02, + "learning_rate": 5.521012658227849e-06, + "loss": 0.0139, + "step": 18200 + }, + { + "epoch": 10.02, + "learning_rate": 5.514683544303798e-06, + "loss": 0.0168, + "step": 18225 + }, + { + "epoch": 10.02, + "learning_rate": 5.5083544303797475e-06, + "loss": 0.0118, + "step": 18250 + }, + { + "epoch": 10.02, + "learning_rate": 5.502025316455697e-06, + "loss": 0.0119, + "step": 18275 + }, + { + "epoch": 10.02, + "learning_rate": 5.495696202531646e-06, + "loss": 0.0136, + "step": 18300 + }, + { + "epoch": 10.02, + "learning_rate": 5.489367088607596e-06, + "loss": 0.0132, + "step": 18325 + }, + { + "epoch": 10.02, + "learning_rate": 5.483037974683545e-06, + "loss": 0.0148, + "step": 18350 + }, + { + "epoch": 10.02, + "learning_rate": 5.476708860759495e-06, + "loss": 0.015, + "step": 18375 + }, + { + "epoch": 10.02, + "learning_rate": 5.470379746835443e-06, + "loss": 0.0154, + "step": 18400 + }, + { + "epoch": 10.02, + "learning_rate": 5.464050632911393e-06, + "loss": 0.0139, + "step": 18425 + }, + { + "epoch": 10.02, + "learning_rate": 5.457721518987342e-06, + "loss": 0.0239, + "step": 18450 + }, + { + "epoch": 10.02, + "learning_rate": 5.4513924050632915e-06, + "loss": 0.0203, + "step": 18475 + }, + { + "epoch": 10.02, + "learning_rate": 5.445063291139241e-06, + "loss": 0.0199, + "step": 18500 + }, + { + "epoch": 10.03, + "learning_rate": 5.43873417721519e-06, + "loss": 0.013, + "step": 18525 + }, + { + "epoch": 10.03, + "learning_rate": 5.43240506329114e-06, + "loss": 0.0128, + "step": 18550 + }, + { + "epoch": 10.03, + "learning_rate": 5.426075949367089e-06, + "loss": 0.0161, + "step": 18575 + }, + { + "epoch": 10.03, + "learning_rate": 5.419746835443039e-06, + "loss": 0.0129, + "step": 18600 + }, + { + "epoch": 10.03, + "learning_rate": 5.413417721518987e-06, + "loss": 0.0124, + "step": 18625 + }, + { + "epoch": 10.03, + "learning_rate": 5.407088607594937e-06, + "loss": 0.0161, + "step": 18650 + }, + { + "epoch": 10.03, + "learning_rate": 5.400759493670886e-06, + "loss": 0.0104, + "step": 18675 + }, + { + "epoch": 10.03, + "learning_rate": 5.3944303797468355e-06, + "loss": 0.0109, + "step": 18700 + }, + { + "epoch": 10.03, + "learning_rate": 5.388101265822785e-06, + "loss": 0.0128, + "step": 18725 + }, + { + "epoch": 10.03, + "learning_rate": 5.381772151898734e-06, + "loss": 0.014, + "step": 18750 + }, + { + "epoch": 10.03, + "learning_rate": 5.375443037974684e-06, + "loss": 0.0159, + "step": 18775 + }, + { + "epoch": 10.03, + "learning_rate": 5.369113924050633e-06, + "loss": 0.017, + "step": 18800 + }, + { + "epoch": 10.03, + "learning_rate": 5.362784810126583e-06, + "loss": 0.012, + "step": 18825 + }, + { + "epoch": 10.03, + "learning_rate": 5.356455696202531e-06, + "loss": 0.0165, + "step": 18850 + }, + { + "epoch": 10.03, + "learning_rate": 5.350126582278481e-06, + "loss": 0.0153, + "step": 18875 + }, + { + "epoch": 10.03, + "learning_rate": 5.343797468354432e-06, + "loss": 0.0182, + "step": 18900 + }, + { + "epoch": 10.04, + "learning_rate": 5.33746835443038e-06, + "loss": 0.0194, + "step": 18925 + }, + { + "epoch": 10.04, + "learning_rate": 5.33113924050633e-06, + "loss": 0.0144, + "step": 18950 + }, + { + "epoch": 10.04, + "learning_rate": 5.324810126582279e-06, + "loss": 0.0157, + "step": 18975 + }, + { + "epoch": 10.04, + "learning_rate": 5.318481012658229e-06, + "loss": 0.0111, + "step": 19000 + }, + { + "epoch": 10.04, + "eval_loss": 0.2696678638458252, + "eval_runtime": 1463.5986, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.445, + "eval_wer": 24.79877721588969, + "step": 19000 + }, + { + "epoch": 10.04, + "learning_rate": 5.312151898734178e-06, + "loss": 0.0176, + "step": 19025 + }, + { + "epoch": 10.04, + "learning_rate": 5.3058227848101275e-06, + "loss": 0.0144, + "step": 19050 + }, + { + "epoch": 10.04, + "learning_rate": 5.299493670886077e-06, + "loss": 0.0129, + "step": 19075 + }, + { + "epoch": 10.04, + "learning_rate": 5.293164556962026e-06, + "loss": 0.0162, + "step": 19100 + }, + { + "epoch": 10.04, + "learning_rate": 5.286835443037976e-06, + "loss": 0.0128, + "step": 19125 + }, + { + "epoch": 10.04, + "learning_rate": 5.280506329113924e-06, + "loss": 0.0152, + "step": 19150 + }, + { + "epoch": 10.04, + "learning_rate": 5.274177215189874e-06, + "loss": 0.0183, + "step": 19175 + }, + { + "epoch": 10.04, + "learning_rate": 5.267848101265823e-06, + "loss": 0.0198, + "step": 19200 + }, + { + "epoch": 10.04, + "learning_rate": 5.261518987341773e-06, + "loss": 0.0179, + "step": 19225 + }, + { + "epoch": 10.04, + "learning_rate": 5.255443037974683e-06, + "loss": 0.019, + "step": 19250 + }, + { + "epoch": 11.0, + "learning_rate": 5.249113924050633e-06, + "loss": 0.0159, + "step": 19275 + }, + { + "epoch": 11.0, + "learning_rate": 5.242784810126583e-06, + "loss": 0.0156, + "step": 19300 + }, + { + "epoch": 11.0, + "learning_rate": 5.236455696202532e-06, + "loss": 0.015, + "step": 19325 + }, + { + "epoch": 11.0, + "learning_rate": 5.230126582278482e-06, + "loss": 0.0162, + "step": 19350 + }, + { + "epoch": 11.0, + "learning_rate": 5.223797468354431e-06, + "loss": 0.0262, + "step": 19375 + }, + { + "epoch": 11.0, + "learning_rate": 5.217468354430381e-06, + "loss": 0.0207, + "step": 19400 + }, + { + "epoch": 11.0, + "learning_rate": 5.21113924050633e-06, + "loss": 0.0222, + "step": 19425 + }, + { + "epoch": 11.0, + "learning_rate": 5.2048101265822795e-06, + "loss": 0.016, + "step": 19450 + }, + { + "epoch": 11.01, + "learning_rate": 5.198481012658229e-06, + "loss": 0.0176, + "step": 19475 + }, + { + "epoch": 11.01, + "learning_rate": 5.192151898734178e-06, + "loss": 0.0209, + "step": 19500 + }, + { + "epoch": 11.01, + "learning_rate": 5.185822784810127e-06, + "loss": 0.0212, + "step": 19525 + }, + { + "epoch": 11.01, + "learning_rate": 5.179493670886076e-06, + "loss": 0.0143, + "step": 19550 + }, + { + "epoch": 11.01, + "learning_rate": 5.173164556962026e-06, + "loss": 0.0224, + "step": 19575 + }, + { + "epoch": 11.01, + "learning_rate": 5.166835443037975e-06, + "loss": 0.0171, + "step": 19600 + }, + { + "epoch": 11.01, + "learning_rate": 5.160506329113925e-06, + "loss": 0.0166, + "step": 19625 + }, + { + "epoch": 11.01, + "learning_rate": 5.154177215189874e-06, + "loss": 0.0135, + "step": 19650 + }, + { + "epoch": 11.01, + "learning_rate": 5.1478481012658235e-06, + "loss": 0.0164, + "step": 19675 + }, + { + "epoch": 11.01, + "learning_rate": 5.141518987341773e-06, + "loss": 0.0154, + "step": 19700 + }, + { + "epoch": 11.01, + "learning_rate": 5.135189873417722e-06, + "loss": 0.0183, + "step": 19725 + }, + { + "epoch": 11.01, + "learning_rate": 5.128860759493671e-06, + "loss": 0.0149, + "step": 19750 + }, + { + "epoch": 11.01, + "learning_rate": 5.12253164556962e-06, + "loss": 0.0176, + "step": 19775 + }, + { + "epoch": 11.01, + "learning_rate": 5.11620253164557e-06, + "loss": 0.0135, + "step": 19800 + }, + { + "epoch": 11.01, + "learning_rate": 5.109873417721519e-06, + "loss": 0.0121, + "step": 19825 + }, + { + "epoch": 11.01, + "learning_rate": 5.103544303797469e-06, + "loss": 0.0158, + "step": 19850 + }, + { + "epoch": 11.02, + "learning_rate": 5.097215189873418e-06, + "loss": 0.0191, + "step": 19875 + }, + { + "epoch": 11.02, + "learning_rate": 5.0908860759493675e-06, + "loss": 0.0127, + "step": 19900 + }, + { + "epoch": 11.02, + "learning_rate": 5.084556962025317e-06, + "loss": 0.0149, + "step": 19925 + }, + { + "epoch": 11.02, + "learning_rate": 5.0782278481012655e-06, + "loss": 0.0152, + "step": 19950 + }, + { + "epoch": 11.02, + "learning_rate": 5.071898734177215e-06, + "loss": 0.0139, + "step": 19975 + }, + { + "epoch": 11.02, + "learning_rate": 5.065569620253164e-06, + "loss": 0.015, + "step": 20000 + }, + { + "epoch": 11.02, + "eval_loss": 0.2623814344406128, + "eval_runtime": 1462.1621, + "eval_samples_per_second": 7.12, + "eval_steps_per_second": 0.445, + "eval_wer": 24.392266540919362, + "step": 20000 + }, + { + "epoch": 11.02, + "learning_rate": 5.059240506329114e-06, + "loss": 0.0094, + "step": 20025 + }, + { + "epoch": 11.02, + "learning_rate": 5.052911392405063e-06, + "loss": 0.0108, + "step": 20050 + }, + { + "epoch": 11.02, + "learning_rate": 5.046582278481013e-06, + "loss": 0.0155, + "step": 20075 + }, + { + "epoch": 11.02, + "learning_rate": 5.040253164556962e-06, + "loss": 0.0103, + "step": 20100 + }, + { + "epoch": 11.02, + "learning_rate": 5.033924050632912e-06, + "loss": 0.0109, + "step": 20125 + }, + { + "epoch": 11.02, + "learning_rate": 5.027594936708862e-06, + "loss": 0.0188, + "step": 20150 + }, + { + "epoch": 11.02, + "learning_rate": 5.021265822784811e-06, + "loss": 0.0178, + "step": 20175 + }, + { + "epoch": 11.02, + "learning_rate": 5.014936708860761e-06, + "loss": 0.0262, + "step": 20200 + }, + { + "epoch": 11.02, + "learning_rate": 5.00860759493671e-06, + "loss": 0.0194, + "step": 20225 + }, + { + "epoch": 11.02, + "learning_rate": 5.002278481012659e-06, + "loss": 0.0107, + "step": 20250 + }, + { + "epoch": 11.03, + "learning_rate": 4.995949367088608e-06, + "loss": 0.0147, + "step": 20275 + }, + { + "epoch": 11.03, + "learning_rate": 4.9896202531645575e-06, + "loss": 0.0084, + "step": 20300 + }, + { + "epoch": 11.03, + "learning_rate": 4.983291139240507e-06, + "loss": 0.0214, + "step": 20325 + }, + { + "epoch": 11.03, + "learning_rate": 4.976962025316456e-06, + "loss": 0.014, + "step": 20350 + }, + { + "epoch": 11.03, + "learning_rate": 4.970632911392406e-06, + "loss": 0.0129, + "step": 20375 + }, + { + "epoch": 11.03, + "learning_rate": 4.964303797468355e-06, + "loss": 0.0145, + "step": 20400 + }, + { + "epoch": 11.03, + "learning_rate": 4.957974683544305e-06, + "loss": 0.0109, + "step": 20425 + }, + { + "epoch": 11.03, + "learning_rate": 4.951645569620254e-06, + "loss": 0.0117, + "step": 20450 + }, + { + "epoch": 11.03, + "learning_rate": 4.945316455696203e-06, + "loss": 0.0137, + "step": 20475 + }, + { + "epoch": 11.03, + "learning_rate": 4.938987341772152e-06, + "loss": 0.0086, + "step": 20500 + }, + { + "epoch": 11.03, + "learning_rate": 4.9326582278481015e-06, + "loss": 0.0128, + "step": 20525 + }, + { + "epoch": 11.03, + "learning_rate": 4.926329113924051e-06, + "loss": 0.0101, + "step": 20550 + }, + { + "epoch": 11.03, + "learning_rate": 4.92e-06, + "loss": 0.0132, + "step": 20575 + }, + { + "epoch": 11.03, + "learning_rate": 4.91367088607595e-06, + "loss": 0.0147, + "step": 20600 + }, + { + "epoch": 11.03, + "learning_rate": 4.907341772151899e-06, + "loss": 0.0077, + "step": 20625 + }, + { + "epoch": 11.03, + "learning_rate": 4.901012658227849e-06, + "loss": 0.0125, + "step": 20650 + }, + { + "epoch": 11.04, + "learning_rate": 4.894683544303798e-06, + "loss": 0.0115, + "step": 20675 + }, + { + "epoch": 11.04, + "learning_rate": 4.888354430379747e-06, + "loss": 0.0136, + "step": 20700 + }, + { + "epoch": 11.04, + "learning_rate": 4.882025316455696e-06, + "loss": 0.0098, + "step": 20725 + }, + { + "epoch": 11.04, + "learning_rate": 4.875696202531646e-06, + "loss": 0.0154, + "step": 20750 + }, + { + "epoch": 11.04, + "learning_rate": 4.869367088607596e-06, + "loss": 0.0126, + "step": 20775 + }, + { + "epoch": 11.04, + "learning_rate": 4.863037974683545e-06, + "loss": 0.0116, + "step": 20800 + }, + { + "epoch": 11.04, + "learning_rate": 4.856708860759495e-06, + "loss": 0.0133, + "step": 20825 + }, + { + "epoch": 11.04, + "learning_rate": 4.850379746835443e-06, + "loss": 0.0106, + "step": 20850 + }, + { + "epoch": 11.04, + "learning_rate": 4.844050632911393e-06, + "loss": 0.0151, + "step": 20875 + }, + { + "epoch": 11.04, + "learning_rate": 4.837721518987342e-06, + "loss": 0.0148, + "step": 20900 + }, + { + "epoch": 11.04, + "learning_rate": 4.8313924050632915e-06, + "loss": 0.0173, + "step": 20925 + }, + { + "epoch": 11.04, + "learning_rate": 4.825063291139241e-06, + "loss": 0.0126, + "step": 20950 + }, + { + "epoch": 11.04, + "learning_rate": 4.81873417721519e-06, + "loss": 0.0163, + "step": 20975 + }, + { + "epoch": 11.04, + "learning_rate": 4.81240506329114e-06, + "loss": 0.0203, + "step": 21000 + }, + { + "epoch": 11.04, + "eval_loss": 0.27788785099983215, + "eval_runtime": 1546.852, + "eval_samples_per_second": 6.73, + "eval_steps_per_second": 0.421, + "eval_wer": 25.284963983154196, + "step": 21000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 9.69329501134848e+19, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-21000/training_args.bin b/checkpoint-21000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-21000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-22000/config.json b/checkpoint-22000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-22000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-22000/generation_config.json b/checkpoint-22000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-22000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-22000/optimizer.pt b/checkpoint-22000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1f5e3894881fc14dd27e6a354f789bf6b7878868 --- /dev/null +++ b/checkpoint-22000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6388f11d15e1f8b163297850b4f1dd51a5e3ba8b56b6e3878179705eb728193 +size 1934161093 diff --git a/checkpoint-22000/preprocessor_config.json b/checkpoint-22000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-22000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-22000/pytorch_model.bin b/checkpoint-22000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..09ec4ebf896a7a7a7a49bbf9b776e331e19d78f7 --- /dev/null +++ b/checkpoint-22000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbae2f307848b926636c1171bc16fb5ec0f4519da189351a35371d49337fbf53 +size 967102601 diff --git a/checkpoint-22000/rng_state.pth b/checkpoint-22000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..22674d356d384b094675572c055fd8b5a9ca1123 --- /dev/null +++ b/checkpoint-22000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d061f392d885c8c6a9ae47775718ce3e97ed3417f1cbf23361865f87a9c3e8d +size 14575 diff --git a/checkpoint-22000/scaler.pt b/checkpoint-22000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..361fc82ca09665a6a12f91715fcd1d70bc2fe3dc --- /dev/null +++ b/checkpoint-22000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:214c94df7cec48ed01fddc18a4197f6772bc66a3891dded592e9cb1f10783dd0 +size 557 diff --git a/checkpoint-22000/scheduler.pt b/checkpoint-22000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e366d003ba4caf9840246f9f9e04793776988d6 --- /dev/null +++ b/checkpoint-22000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:708688497faf597910e214ed525d4a0c47d2462d0cab99ce14ca7ab0476e060d +size 627 diff --git a/checkpoint-22000/trainer_state.json b/checkpoint-22000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..24fce806dfd30bfc3769b212630c5ffc621d7c72 --- /dev/null +++ b/checkpoint-22000/trainer_state.json @@ -0,0 +1,5494 @@ +{ + "best_metric": 24.392266540919362, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-20000", + "epoch": 12.0247, + "global_step": 22000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + }, + { + "epoch": 7.02, + "learning_rate": 6.830632911392405e-06, + "loss": 0.0159, + "step": 13025 + }, + { + "epoch": 7.02, + "learning_rate": 6.824303797468354e-06, + "loss": 0.0186, + "step": 13050 + }, + { + "epoch": 7.02, + "learning_rate": 6.817974683544304e-06, + "loss": 0.0246, + "step": 13075 + }, + { + "epoch": 7.02, + "learning_rate": 6.811645569620253e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 7.02, + "learning_rate": 6.805316455696203e-06, + "loss": 0.0287, + "step": 13125 + }, + { + "epoch": 7.02, + "learning_rate": 6.798987341772152e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 7.02, + "learning_rate": 6.792658227848102e-06, + "loss": 0.035, + "step": 13175 + }, + { + "epoch": 7.02, + "learning_rate": 6.786329113924052e-06, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 7.02, + "learning_rate": 6.780000000000001e-06, + "loss": 0.0283, + "step": 13225 + }, + { + "epoch": 7.02, + "learning_rate": 6.773670886075951e-06, + "loss": 0.0203, + "step": 13250 + }, + { + "epoch": 7.03, + "learning_rate": 6.7673417721519e-06, + "loss": 0.0193, + "step": 13275 + }, + { + "epoch": 7.03, + "learning_rate": 6.7610126582278494e-06, + "loss": 0.0174, + "step": 13300 + }, + { + "epoch": 7.03, + "learning_rate": 6.754683544303798e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 7.03, + "learning_rate": 6.7483544303797474e-06, + "loss": 0.023, + "step": 13350 + }, + { + "epoch": 7.03, + "learning_rate": 6.742025316455697e-06, + "loss": 0.0266, + "step": 13375 + }, + { + "epoch": 7.03, + "learning_rate": 6.735696202531646e-06, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 7.03, + "learning_rate": 6.729367088607596e-06, + "loss": 0.0164, + "step": 13425 + }, + { + "epoch": 7.03, + "learning_rate": 6.723037974683545e-06, + "loss": 0.019, + "step": 13450 + }, + { + "epoch": 7.03, + "learning_rate": 6.716708860759495e-06, + "loss": 0.0209, + "step": 13475 + }, + { + "epoch": 7.03, + "learning_rate": 6.710379746835444e-06, + "loss": 0.0241, + "step": 13500 + }, + { + "epoch": 7.03, + "learning_rate": 6.7040506329113934e-06, + "loss": 0.0233, + "step": 13525 + }, + { + "epoch": 7.03, + "learning_rate": 6.697721518987342e-06, + "loss": 0.0204, + "step": 13550 + }, + { + "epoch": 7.03, + "learning_rate": 6.6913924050632914e-06, + "loss": 0.0131, + "step": 13575 + }, + { + "epoch": 7.03, + "learning_rate": 6.685063291139241e-06, + "loss": 0.0268, + "step": 13600 + }, + { + "epoch": 7.03, + "learning_rate": 6.67873417721519e-06, + "loss": 0.0156, + "step": 13625 + }, + { + "epoch": 7.03, + "learning_rate": 6.67240506329114e-06, + "loss": 0.0205, + "step": 13650 + }, + { + "epoch": 7.04, + "learning_rate": 6.666075949367089e-06, + "loss": 0.0217, + "step": 13675 + }, + { + "epoch": 7.04, + "learning_rate": 6.659746835443039e-06, + "loss": 0.0258, + "step": 13700 + }, + { + "epoch": 7.04, + "learning_rate": 6.653417721518988e-06, + "loss": 0.0211, + "step": 13725 + }, + { + "epoch": 7.04, + "learning_rate": 6.647088607594937e-06, + "loss": 0.0188, + "step": 13750 + }, + { + "epoch": 7.04, + "learning_rate": 6.640759493670886e-06, + "loss": 0.0234, + "step": 13775 + }, + { + "epoch": 7.04, + "learning_rate": 6.6344303797468355e-06, + "loss": 0.0202, + "step": 13800 + }, + { + "epoch": 7.04, + "learning_rate": 6.628101265822785e-06, + "loss": 0.0251, + "step": 13825 + }, + { + "epoch": 7.04, + "learning_rate": 6.621772151898734e-06, + "loss": 0.017, + "step": 13850 + }, + { + "epoch": 7.04, + "learning_rate": 6.615443037974684e-06, + "loss": 0.0254, + "step": 13875 + }, + { + "epoch": 7.04, + "learning_rate": 6.609113924050633e-06, + "loss": 0.0289, + "step": 13900 + }, + { + "epoch": 7.04, + "learning_rate": 6.602784810126583e-06, + "loss": 0.0238, + "step": 13925 + }, + { + "epoch": 7.04, + "learning_rate": 6.596455696202532e-06, + "loss": 0.0218, + "step": 13950 + }, + { + "epoch": 7.04, + "learning_rate": 6.590126582278481e-06, + "loss": 0.0289, + "step": 13975 + }, + { + "epoch": 7.04, + "learning_rate": 6.583797468354432e-06, + "loss": 0.0271, + "step": 14000 + }, + { + "epoch": 7.04, + "eval_loss": 0.26684999465942383, + "eval_runtime": 1448.0228, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "eval_wer": 42.464105107400115, + "step": 14000 + }, + { + "epoch": 8.0, + "learning_rate": 6.577468354430381e-06, + "loss": 0.0292, + "step": 14025 + }, + { + "epoch": 8.0, + "learning_rate": 6.57113924050633e-06, + "loss": 0.0192, + "step": 14050 + }, + { + "epoch": 8.0, + "learning_rate": 6.564810126582279e-06, + "loss": 0.027, + "step": 14075 + }, + { + "epoch": 8.0, + "learning_rate": 6.558481012658229e-06, + "loss": 0.03, + "step": 14100 + }, + { + "epoch": 8.0, + "learning_rate": 6.552151898734178e-06, + "loss": 0.0375, + "step": 14125 + }, + { + "epoch": 8.0, + "learning_rate": 6.5458227848101274e-06, + "loss": 0.0338, + "step": 14150 + }, + { + "epoch": 8.0, + "learning_rate": 6.539493670886077e-06, + "loss": 0.0305, + "step": 14175 + }, + { + "epoch": 8.0, + "learning_rate": 6.533164556962026e-06, + "loss": 0.0266, + "step": 14200 + }, + { + "epoch": 8.01, + "learning_rate": 6.526835443037976e-06, + "loss": 0.0378, + "step": 14225 + }, + { + "epoch": 8.01, + "learning_rate": 6.520506329113925e-06, + "loss": 0.0256, + "step": 14250 + }, + { + "epoch": 8.01, + "learning_rate": 6.514177215189874e-06, + "loss": 0.0203, + "step": 14275 + }, + { + "epoch": 8.01, + "learning_rate": 6.507848101265823e-06, + "loss": 0.0212, + "step": 14300 + }, + { + "epoch": 8.01, + "learning_rate": 6.501518987341773e-06, + "loss": 0.0204, + "step": 14325 + }, + { + "epoch": 8.01, + "learning_rate": 6.495189873417722e-06, + "loss": 0.0259, + "step": 14350 + }, + { + "epoch": 8.01, + "learning_rate": 6.4888607594936714e-06, + "loss": 0.0214, + "step": 14375 + }, + { + "epoch": 8.01, + "learning_rate": 6.482531645569621e-06, + "loss": 0.021, + "step": 14400 + }, + { + "epoch": 8.01, + "learning_rate": 6.47620253164557e-06, + "loss": 0.0241, + "step": 14425 + }, + { + "epoch": 8.01, + "learning_rate": 6.46987341772152e-06, + "loss": 0.0216, + "step": 14450 + }, + { + "epoch": 8.01, + "learning_rate": 6.463544303797469e-06, + "loss": 0.0209, + "step": 14475 + }, + { + "epoch": 8.01, + "learning_rate": 6.457215189873418e-06, + "loss": 0.0292, + "step": 14500 + }, + { + "epoch": 8.01, + "learning_rate": 6.450886075949367e-06, + "loss": 0.0286, + "step": 14525 + }, + { + "epoch": 8.01, + "learning_rate": 6.444556962025317e-06, + "loss": 0.0218, + "step": 14550 + }, + { + "epoch": 8.01, + "learning_rate": 6.438227848101266e-06, + "loss": 0.0209, + "step": 14575 + }, + { + "epoch": 8.01, + "learning_rate": 6.4318987341772154e-06, + "loss": 0.0244, + "step": 14600 + }, + { + "epoch": 8.02, + "learning_rate": 6.425569620253165e-06, + "loss": 0.0212, + "step": 14625 + }, + { + "epoch": 8.02, + "learning_rate": 6.419240506329114e-06, + "loss": 0.0155, + "step": 14650 + }, + { + "epoch": 8.02, + "learning_rate": 6.412911392405064e-06, + "loss": 0.0209, + "step": 14675 + }, + { + "epoch": 8.02, + "learning_rate": 6.406582278481013e-06, + "loss": 0.0203, + "step": 14700 + }, + { + "epoch": 8.02, + "learning_rate": 6.400253164556962e-06, + "loss": 0.0192, + "step": 14725 + }, + { + "epoch": 8.02, + "learning_rate": 6.393924050632911e-06, + "loss": 0.0216, + "step": 14750 + }, + { + "epoch": 8.02, + "learning_rate": 6.387594936708861e-06, + "loss": 0.012, + "step": 14775 + }, + { + "epoch": 8.02, + "learning_rate": 6.38126582278481e-06, + "loss": 0.0241, + "step": 14800 + }, + { + "epoch": 8.02, + "learning_rate": 6.37493670886076e-06, + "loss": 0.0192, + "step": 14825 + }, + { + "epoch": 8.02, + "learning_rate": 6.36860759493671e-06, + "loss": 0.0202, + "step": 14850 + }, + { + "epoch": 8.02, + "learning_rate": 6.362278481012659e-06, + "loss": 0.0257, + "step": 14875 + }, + { + "epoch": 8.02, + "learning_rate": 6.3559493670886086e-06, + "loss": 0.0218, + "step": 14900 + }, + { + "epoch": 8.02, + "learning_rate": 6.349620253164558e-06, + "loss": 0.0232, + "step": 14925 + }, + { + "epoch": 8.02, + "learning_rate": 6.3432911392405074e-06, + "loss": 0.03, + "step": 14950 + }, + { + "epoch": 8.02, + "learning_rate": 6.337215189873418e-06, + "loss": 0.0252, + "step": 14975 + }, + { + "epoch": 8.02, + "learning_rate": 6.3308860759493675e-06, + "loss": 0.0176, + "step": 15000 + }, + { + "epoch": 8.02, + "eval_loss": 0.2596448063850403, + "eval_runtime": 1417.6481, + "eval_samples_per_second": 7.344, + "eval_steps_per_second": 0.459, + "eval_wer": 28.52241499861786, + "step": 15000 + }, + { + "epoch": 8.03, + "learning_rate": 6.324556962025317e-06, + "loss": 0.0166, + "step": 15025 + }, + { + "epoch": 8.03, + "learning_rate": 6.318227848101266e-06, + "loss": 0.017, + "step": 15050 + }, + { + "epoch": 8.03, + "learning_rate": 6.311898734177216e-06, + "loss": 0.0291, + "step": 15075 + }, + { + "epoch": 8.03, + "learning_rate": 6.305569620253164e-06, + "loss": 0.0149, + "step": 15100 + }, + { + "epoch": 8.03, + "learning_rate": 6.299240506329114e-06, + "loss": 0.0223, + "step": 15125 + }, + { + "epoch": 8.03, + "learning_rate": 6.292911392405063e-06, + "loss": 0.0212, + "step": 15150 + }, + { + "epoch": 8.03, + "learning_rate": 6.286582278481013e-06, + "loss": 0.0125, + "step": 15175 + }, + { + "epoch": 8.03, + "learning_rate": 6.280253164556962e-06, + "loss": 0.0169, + "step": 15200 + }, + { + "epoch": 8.03, + "learning_rate": 6.273924050632912e-06, + "loss": 0.0244, + "step": 15225 + }, + { + "epoch": 8.03, + "learning_rate": 6.267594936708862e-06, + "loss": 0.015, + "step": 15250 + }, + { + "epoch": 8.03, + "learning_rate": 6.261265822784811e-06, + "loss": 0.0171, + "step": 15275 + }, + { + "epoch": 8.03, + "learning_rate": 6.254936708860761e-06, + "loss": 0.017, + "step": 15300 + }, + { + "epoch": 8.03, + "learning_rate": 6.24860759493671e-06, + "loss": 0.0162, + "step": 15325 + }, + { + "epoch": 8.03, + "learning_rate": 6.2422784810126594e-06, + "loss": 0.0133, + "step": 15350 + }, + { + "epoch": 8.03, + "learning_rate": 6.235949367088608e-06, + "loss": 0.0139, + "step": 15375 + }, + { + "epoch": 8.03, + "learning_rate": 6.2296202531645575e-06, + "loss": 0.0177, + "step": 15400 + }, + { + "epoch": 8.04, + "learning_rate": 6.223291139240507e-06, + "loss": 0.0207, + "step": 15425 + }, + { + "epoch": 8.04, + "learning_rate": 6.216962025316456e-06, + "loss": 0.0192, + "step": 15450 + }, + { + "epoch": 8.04, + "learning_rate": 6.210632911392406e-06, + "loss": 0.0176, + "step": 15475 + }, + { + "epoch": 8.04, + "learning_rate": 6.204303797468355e-06, + "loss": 0.0133, + "step": 15500 + }, + { + "epoch": 8.04, + "learning_rate": 6.197974683544305e-06, + "loss": 0.0199, + "step": 15525 + }, + { + "epoch": 8.04, + "learning_rate": 6.191645569620254e-06, + "loss": 0.0135, + "step": 15550 + }, + { + "epoch": 8.04, + "learning_rate": 6.1853164556962035e-06, + "loss": 0.0121, + "step": 15575 + }, + { + "epoch": 8.04, + "learning_rate": 6.178987341772152e-06, + "loss": 0.0152, + "step": 15600 + }, + { + "epoch": 8.04, + "learning_rate": 6.1726582278481015e-06, + "loss": 0.0201, + "step": 15625 + }, + { + "epoch": 8.04, + "learning_rate": 6.166329113924051e-06, + "loss": 0.0226, + "step": 15650 + }, + { + "epoch": 8.04, + "learning_rate": 6.16e-06, + "loss": 0.0222, + "step": 15675 + }, + { + "epoch": 8.04, + "learning_rate": 6.15367088607595e-06, + "loss": 0.0184, + "step": 15700 + }, + { + "epoch": 8.04, + "learning_rate": 6.147341772151899e-06, + "loss": 0.0219, + "step": 15725 + }, + { + "epoch": 8.04, + "learning_rate": 6.141012658227849e-06, + "loss": 0.0219, + "step": 15750 + }, + { + "epoch": 9.0, + "learning_rate": 6.134683544303798e-06, + "loss": 0.0208, + "step": 15775 + }, + { + "epoch": 9.0, + "learning_rate": 6.1283544303797475e-06, + "loss": 0.0143, + "step": 15800 + }, + { + "epoch": 9.0, + "learning_rate": 6.122025316455696e-06, + "loss": 0.0254, + "step": 15825 + }, + { + "epoch": 9.0, + "learning_rate": 6.1156962025316455e-06, + "loss": 0.0287, + "step": 15850 + }, + { + "epoch": 9.0, + "learning_rate": 6.109367088607595e-06, + "loss": 0.0252, + "step": 15875 + }, + { + "epoch": 9.0, + "learning_rate": 6.103037974683544e-06, + "loss": 0.0275, + "step": 15900 + }, + { + "epoch": 9.0, + "learning_rate": 6.096708860759494e-06, + "loss": 0.0193, + "step": 15925 + }, + { + "epoch": 9.0, + "learning_rate": 6.090379746835443e-06, + "loss": 0.0255, + "step": 15950 + }, + { + "epoch": 9.01, + "learning_rate": 6.084050632911393e-06, + "loss": 0.0248, + "step": 15975 + }, + { + "epoch": 9.01, + "learning_rate": 6.077721518987342e-06, + "loss": 0.0328, + "step": 16000 + }, + { + "epoch": 9.01, + "eval_loss": 0.2512108087539673, + "eval_runtime": 1474.2883, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.442, + "eval_wer": 35.66236849379665, + "step": 16000 + }, + { + "epoch": 9.01, + "learning_rate": 6.0713924050632915e-06, + "loss": 0.0181, + "step": 16025 + }, + { + "epoch": 9.01, + "learning_rate": 6.065063291139242e-06, + "loss": 0.0134, + "step": 16050 + }, + { + "epoch": 9.01, + "learning_rate": 6.058734177215191e-06, + "loss": 0.0203, + "step": 16075 + }, + { + "epoch": 9.01, + "learning_rate": 6.052405063291141e-06, + "loss": 0.0224, + "step": 16100 + }, + { + "epoch": 9.01, + "learning_rate": 6.046075949367089e-06, + "loss": 0.0164, + "step": 16125 + }, + { + "epoch": 9.01, + "learning_rate": 6.039746835443039e-06, + "loss": 0.019, + "step": 16150 + }, + { + "epoch": 9.01, + "learning_rate": 6.033417721518988e-06, + "loss": 0.0152, + "step": 16175 + }, + { + "epoch": 9.01, + "learning_rate": 6.0270886075949374e-06, + "loss": 0.0197, + "step": 16200 + }, + { + "epoch": 9.01, + "learning_rate": 6.020759493670887e-06, + "loss": 0.0188, + "step": 16225 + }, + { + "epoch": 9.01, + "learning_rate": 6.014430379746836e-06, + "loss": 0.0245, + "step": 16250 + }, + { + "epoch": 9.01, + "learning_rate": 6.008101265822786e-06, + "loss": 0.0202, + "step": 16275 + }, + { + "epoch": 9.01, + "learning_rate": 6.001772151898735e-06, + "loss": 0.0184, + "step": 16300 + }, + { + "epoch": 9.01, + "learning_rate": 5.995443037974685e-06, + "loss": 0.0158, + "step": 16325 + }, + { + "epoch": 9.01, + "learning_rate": 5.989113924050633e-06, + "loss": 0.0175, + "step": 16350 + }, + { + "epoch": 9.02, + "learning_rate": 5.982784810126583e-06, + "loss": 0.014, + "step": 16375 + }, + { + "epoch": 9.02, + "learning_rate": 5.976455696202532e-06, + "loss": 0.0149, + "step": 16400 + }, + { + "epoch": 9.02, + "learning_rate": 5.9701265822784815e-06, + "loss": 0.0188, + "step": 16425 + }, + { + "epoch": 9.02, + "learning_rate": 5.963797468354431e-06, + "loss": 0.0215, + "step": 16450 + }, + { + "epoch": 9.02, + "learning_rate": 5.95746835443038e-06, + "loss": 0.017, + "step": 16475 + }, + { + "epoch": 9.02, + "learning_rate": 5.95113924050633e-06, + "loss": 0.0095, + "step": 16500 + }, + { + "epoch": 9.02, + "learning_rate": 5.944810126582279e-06, + "loss": 0.0112, + "step": 16525 + }, + { + "epoch": 9.02, + "learning_rate": 5.938481012658229e-06, + "loss": 0.0198, + "step": 16550 + }, + { + "epoch": 9.02, + "learning_rate": 5.932151898734177e-06, + "loss": 0.0252, + "step": 16575 + }, + { + "epoch": 9.02, + "learning_rate": 5.925822784810127e-06, + "loss": 0.0226, + "step": 16600 + }, + { + "epoch": 9.02, + "learning_rate": 5.919493670886076e-06, + "loss": 0.0167, + "step": 16625 + }, + { + "epoch": 9.02, + "learning_rate": 5.9131645569620255e-06, + "loss": 0.0259, + "step": 16650 + }, + { + "epoch": 9.02, + "learning_rate": 5.906835443037975e-06, + "loss": 0.0229, + "step": 16675 + }, + { + "epoch": 9.02, + "learning_rate": 5.900506329113924e-06, + "loss": 0.0238, + "step": 16700 + }, + { + "epoch": 9.02, + "learning_rate": 5.894177215189874e-06, + "loss": 0.02, + "step": 16725 + }, + { + "epoch": 9.02, + "learning_rate": 5.887848101265823e-06, + "loss": 0.0182, + "step": 16750 + }, + { + "epoch": 9.03, + "learning_rate": 5.881518987341773e-06, + "loss": 0.0157, + "step": 16775 + }, + { + "epoch": 9.03, + "learning_rate": 5.875189873417721e-06, + "loss": 0.0145, + "step": 16800 + }, + { + "epoch": 9.03, + "learning_rate": 5.868860759493671e-06, + "loss": 0.0166, + "step": 16825 + }, + { + "epoch": 9.03, + "learning_rate": 5.86253164556962e-06, + "loss": 0.0108, + "step": 16850 + }, + { + "epoch": 9.03, + "learning_rate": 5.85620253164557e-06, + "loss": 0.014, + "step": 16875 + }, + { + "epoch": 9.03, + "learning_rate": 5.84987341772152e-06, + "loss": 0.0167, + "step": 16900 + }, + { + "epoch": 9.03, + "learning_rate": 5.843544303797469e-06, + "loss": 0.0121, + "step": 16925 + }, + { + "epoch": 9.03, + "learning_rate": 5.837215189873419e-06, + "loss": 0.0149, + "step": 16950 + }, + { + "epoch": 9.03, + "learning_rate": 5.831139240506329e-06, + "loss": 0.0143, + "step": 16975 + }, + { + "epoch": 9.03, + "learning_rate": 5.824810126582279e-06, + "loss": 0.0139, + "step": 17000 + }, + { + "epoch": 9.03, + "eval_loss": 0.2688957154750824, + "eval_runtime": 1423.1196, + "eval_samples_per_second": 7.316, + "eval_steps_per_second": 0.457, + "eval_wer": 25.920746678807788, + "step": 17000 + }, + { + "epoch": 9.03, + "learning_rate": 5.818481012658228e-06, + "loss": 0.0169, + "step": 17025 + }, + { + "epoch": 9.03, + "learning_rate": 5.8121518987341775e-06, + "loss": 0.013, + "step": 17050 + }, + { + "epoch": 9.03, + "learning_rate": 5.805822784810127e-06, + "loss": 0.0123, + "step": 17075 + }, + { + "epoch": 9.03, + "learning_rate": 5.799493670886076e-06, + "loss": 0.0161, + "step": 17100 + }, + { + "epoch": 9.03, + "learning_rate": 5.793164556962026e-06, + "loss": 0.0114, + "step": 17125 + }, + { + "epoch": 9.03, + "learning_rate": 5.786835443037974e-06, + "loss": 0.0194, + "step": 17150 + }, + { + "epoch": 9.04, + "learning_rate": 5.780506329113924e-06, + "loss": 0.0179, + "step": 17175 + }, + { + "epoch": 9.04, + "learning_rate": 5.774177215189873e-06, + "loss": 0.0165, + "step": 17200 + }, + { + "epoch": 9.04, + "learning_rate": 5.767848101265823e-06, + "loss": 0.0165, + "step": 17225 + }, + { + "epoch": 9.04, + "learning_rate": 5.761518987341773e-06, + "loss": 0.0125, + "step": 17250 + }, + { + "epoch": 9.04, + "learning_rate": 5.755189873417722e-06, + "loss": 0.0172, + "step": 17275 + }, + { + "epoch": 9.04, + "learning_rate": 5.748860759493672e-06, + "loss": 0.0158, + "step": 17300 + }, + { + "epoch": 9.04, + "learning_rate": 5.742531645569621e-06, + "loss": 0.0174, + "step": 17325 + }, + { + "epoch": 9.04, + "learning_rate": 5.736202531645571e-06, + "loss": 0.0162, + "step": 17350 + }, + { + "epoch": 9.04, + "learning_rate": 5.72987341772152e-06, + "loss": 0.0203, + "step": 17375 + }, + { + "epoch": 9.04, + "learning_rate": 5.7235443037974695e-06, + "loss": 0.0157, + "step": 17400 + }, + { + "epoch": 9.04, + "learning_rate": 5.717215189873419e-06, + "loss": 0.018, + "step": 17425 + }, + { + "epoch": 9.04, + "learning_rate": 5.7108860759493675e-06, + "loss": 0.0199, + "step": 17450 + }, + { + "epoch": 9.04, + "learning_rate": 5.704556962025317e-06, + "loss": 0.0153, + "step": 17475 + }, + { + "epoch": 9.04, + "learning_rate": 5.698227848101266e-06, + "loss": 0.0234, + "step": 17500 + }, + { + "epoch": 10.0, + "learning_rate": 5.691898734177216e-06, + "loss": 0.0168, + "step": 17525 + }, + { + "epoch": 10.0, + "learning_rate": 5.685569620253165e-06, + "loss": 0.0214, + "step": 17550 + }, + { + "epoch": 10.0, + "learning_rate": 5.679240506329115e-06, + "loss": 0.0185, + "step": 17575 + }, + { + "epoch": 10.0, + "learning_rate": 5.672911392405064e-06, + "loss": 0.0228, + "step": 17600 + }, + { + "epoch": 10.0, + "learning_rate": 5.6665822784810135e-06, + "loss": 0.024, + "step": 17625 + }, + { + "epoch": 10.0, + "learning_rate": 5.660253164556963e-06, + "loss": 0.0235, + "step": 17650 + }, + { + "epoch": 10.0, + "learning_rate": 5.6539240506329115e-06, + "loss": 0.0262, + "step": 17675 + }, + { + "epoch": 10.0, + "learning_rate": 5.647594936708861e-06, + "loss": 0.0214, + "step": 17700 + }, + { + "epoch": 10.01, + "learning_rate": 5.64126582278481e-06, + "loss": 0.0279, + "step": 17725 + }, + { + "epoch": 10.01, + "learning_rate": 5.63493670886076e-06, + "loss": 0.0248, + "step": 17750 + }, + { + "epoch": 10.01, + "learning_rate": 5.628607594936709e-06, + "loss": 0.0155, + "step": 17775 + }, + { + "epoch": 10.01, + "learning_rate": 5.622278481012659e-06, + "loss": 0.017, + "step": 17800 + }, + { + "epoch": 10.01, + "learning_rate": 5.615949367088608e-06, + "loss": 0.0205, + "step": 17825 + }, + { + "epoch": 10.01, + "learning_rate": 5.6096202531645575e-06, + "loss": 0.021, + "step": 17850 + }, + { + "epoch": 10.01, + "learning_rate": 5.603291139240507e-06, + "loss": 0.0178, + "step": 17875 + }, + { + "epoch": 10.01, + "learning_rate": 5.5969620253164555e-06, + "loss": 0.0169, + "step": 17900 + }, + { + "epoch": 10.01, + "learning_rate": 5.590632911392405e-06, + "loss": 0.0158, + "step": 17925 + }, + { + "epoch": 10.01, + "learning_rate": 5.584303797468354e-06, + "loss": 0.0195, + "step": 17950 + }, + { + "epoch": 10.01, + "learning_rate": 5.577974683544304e-06, + "loss": 0.0195, + "step": 17975 + }, + { + "epoch": 10.01, + "learning_rate": 5.571645569620253e-06, + "loss": 0.0187, + "step": 18000 + }, + { + "epoch": 10.01, + "eval_loss": 0.2605174481868744, + "eval_runtime": 1553.7854, + "eval_samples_per_second": 6.7, + "eval_steps_per_second": 0.419, + "eval_wer": 25.330493178750874, + "step": 18000 + }, + { + "epoch": 10.01, + "learning_rate": 5.565316455696203e-06, + "loss": 0.0214, + "step": 18025 + }, + { + "epoch": 10.01, + "learning_rate": 5.558987341772152e-06, + "loss": 0.0193, + "step": 18050 + }, + { + "epoch": 10.01, + "learning_rate": 5.552658227848102e-06, + "loss": 0.0168, + "step": 18075 + }, + { + "epoch": 10.01, + "learning_rate": 5.546329113924052e-06, + "loss": 0.0147, + "step": 18100 + }, + { + "epoch": 10.02, + "learning_rate": 5.540000000000001e-06, + "loss": 0.0192, + "step": 18125 + }, + { + "epoch": 10.02, + "learning_rate": 5.533670886075951e-06, + "loss": 0.014, + "step": 18150 + }, + { + "epoch": 10.02, + "learning_rate": 5.527341772151899e-06, + "loss": 0.0163, + "step": 18175 + }, + { + "epoch": 10.02, + "learning_rate": 5.521012658227849e-06, + "loss": 0.0139, + "step": 18200 + }, + { + "epoch": 10.02, + "learning_rate": 5.514683544303798e-06, + "loss": 0.0168, + "step": 18225 + }, + { + "epoch": 10.02, + "learning_rate": 5.5083544303797475e-06, + "loss": 0.0118, + "step": 18250 + }, + { + "epoch": 10.02, + "learning_rate": 5.502025316455697e-06, + "loss": 0.0119, + "step": 18275 + }, + { + "epoch": 10.02, + "learning_rate": 5.495696202531646e-06, + "loss": 0.0136, + "step": 18300 + }, + { + "epoch": 10.02, + "learning_rate": 5.489367088607596e-06, + "loss": 0.0132, + "step": 18325 + }, + { + "epoch": 10.02, + "learning_rate": 5.483037974683545e-06, + "loss": 0.0148, + "step": 18350 + }, + { + "epoch": 10.02, + "learning_rate": 5.476708860759495e-06, + "loss": 0.015, + "step": 18375 + }, + { + "epoch": 10.02, + "learning_rate": 5.470379746835443e-06, + "loss": 0.0154, + "step": 18400 + }, + { + "epoch": 10.02, + "learning_rate": 5.464050632911393e-06, + "loss": 0.0139, + "step": 18425 + }, + { + "epoch": 10.02, + "learning_rate": 5.457721518987342e-06, + "loss": 0.0239, + "step": 18450 + }, + { + "epoch": 10.02, + "learning_rate": 5.4513924050632915e-06, + "loss": 0.0203, + "step": 18475 + }, + { + "epoch": 10.02, + "learning_rate": 5.445063291139241e-06, + "loss": 0.0199, + "step": 18500 + }, + { + "epoch": 10.03, + "learning_rate": 5.43873417721519e-06, + "loss": 0.013, + "step": 18525 + }, + { + "epoch": 10.03, + "learning_rate": 5.43240506329114e-06, + "loss": 0.0128, + "step": 18550 + }, + { + "epoch": 10.03, + "learning_rate": 5.426075949367089e-06, + "loss": 0.0161, + "step": 18575 + }, + { + "epoch": 10.03, + "learning_rate": 5.419746835443039e-06, + "loss": 0.0129, + "step": 18600 + }, + { + "epoch": 10.03, + "learning_rate": 5.413417721518987e-06, + "loss": 0.0124, + "step": 18625 + }, + { + "epoch": 10.03, + "learning_rate": 5.407088607594937e-06, + "loss": 0.0161, + "step": 18650 + }, + { + "epoch": 10.03, + "learning_rate": 5.400759493670886e-06, + "loss": 0.0104, + "step": 18675 + }, + { + "epoch": 10.03, + "learning_rate": 5.3944303797468355e-06, + "loss": 0.0109, + "step": 18700 + }, + { + "epoch": 10.03, + "learning_rate": 5.388101265822785e-06, + "loss": 0.0128, + "step": 18725 + }, + { + "epoch": 10.03, + "learning_rate": 5.381772151898734e-06, + "loss": 0.014, + "step": 18750 + }, + { + "epoch": 10.03, + "learning_rate": 5.375443037974684e-06, + "loss": 0.0159, + "step": 18775 + }, + { + "epoch": 10.03, + "learning_rate": 5.369113924050633e-06, + "loss": 0.017, + "step": 18800 + }, + { + "epoch": 10.03, + "learning_rate": 5.362784810126583e-06, + "loss": 0.012, + "step": 18825 + }, + { + "epoch": 10.03, + "learning_rate": 5.356455696202531e-06, + "loss": 0.0165, + "step": 18850 + }, + { + "epoch": 10.03, + "learning_rate": 5.350126582278481e-06, + "loss": 0.0153, + "step": 18875 + }, + { + "epoch": 10.03, + "learning_rate": 5.343797468354432e-06, + "loss": 0.0182, + "step": 18900 + }, + { + "epoch": 10.04, + "learning_rate": 5.33746835443038e-06, + "loss": 0.0194, + "step": 18925 + }, + { + "epoch": 10.04, + "learning_rate": 5.33113924050633e-06, + "loss": 0.0144, + "step": 18950 + }, + { + "epoch": 10.04, + "learning_rate": 5.324810126582279e-06, + "loss": 0.0157, + "step": 18975 + }, + { + "epoch": 10.04, + "learning_rate": 5.318481012658229e-06, + "loss": 0.0111, + "step": 19000 + }, + { + "epoch": 10.04, + "eval_loss": 0.2696678638458252, + "eval_runtime": 1463.5986, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.445, + "eval_wer": 24.79877721588969, + "step": 19000 + }, + { + "epoch": 10.04, + "learning_rate": 5.312151898734178e-06, + "loss": 0.0176, + "step": 19025 + }, + { + "epoch": 10.04, + "learning_rate": 5.3058227848101275e-06, + "loss": 0.0144, + "step": 19050 + }, + { + "epoch": 10.04, + "learning_rate": 5.299493670886077e-06, + "loss": 0.0129, + "step": 19075 + }, + { + "epoch": 10.04, + "learning_rate": 5.293164556962026e-06, + "loss": 0.0162, + "step": 19100 + }, + { + "epoch": 10.04, + "learning_rate": 5.286835443037976e-06, + "loss": 0.0128, + "step": 19125 + }, + { + "epoch": 10.04, + "learning_rate": 5.280506329113924e-06, + "loss": 0.0152, + "step": 19150 + }, + { + "epoch": 10.04, + "learning_rate": 5.274177215189874e-06, + "loss": 0.0183, + "step": 19175 + }, + { + "epoch": 10.04, + "learning_rate": 5.267848101265823e-06, + "loss": 0.0198, + "step": 19200 + }, + { + "epoch": 10.04, + "learning_rate": 5.261518987341773e-06, + "loss": 0.0179, + "step": 19225 + }, + { + "epoch": 10.04, + "learning_rate": 5.255443037974683e-06, + "loss": 0.019, + "step": 19250 + }, + { + "epoch": 11.0, + "learning_rate": 5.249113924050633e-06, + "loss": 0.0159, + "step": 19275 + }, + { + "epoch": 11.0, + "learning_rate": 5.242784810126583e-06, + "loss": 0.0156, + "step": 19300 + }, + { + "epoch": 11.0, + "learning_rate": 5.236455696202532e-06, + "loss": 0.015, + "step": 19325 + }, + { + "epoch": 11.0, + "learning_rate": 5.230126582278482e-06, + "loss": 0.0162, + "step": 19350 + }, + { + "epoch": 11.0, + "learning_rate": 5.223797468354431e-06, + "loss": 0.0262, + "step": 19375 + }, + { + "epoch": 11.0, + "learning_rate": 5.217468354430381e-06, + "loss": 0.0207, + "step": 19400 + }, + { + "epoch": 11.0, + "learning_rate": 5.21113924050633e-06, + "loss": 0.0222, + "step": 19425 + }, + { + "epoch": 11.0, + "learning_rate": 5.2048101265822795e-06, + "loss": 0.016, + "step": 19450 + }, + { + "epoch": 11.01, + "learning_rate": 5.198481012658229e-06, + "loss": 0.0176, + "step": 19475 + }, + { + "epoch": 11.01, + "learning_rate": 5.192151898734178e-06, + "loss": 0.0209, + "step": 19500 + }, + { + "epoch": 11.01, + "learning_rate": 5.185822784810127e-06, + "loss": 0.0212, + "step": 19525 + }, + { + "epoch": 11.01, + "learning_rate": 5.179493670886076e-06, + "loss": 0.0143, + "step": 19550 + }, + { + "epoch": 11.01, + "learning_rate": 5.173164556962026e-06, + "loss": 0.0224, + "step": 19575 + }, + { + "epoch": 11.01, + "learning_rate": 5.166835443037975e-06, + "loss": 0.0171, + "step": 19600 + }, + { + "epoch": 11.01, + "learning_rate": 5.160506329113925e-06, + "loss": 0.0166, + "step": 19625 + }, + { + "epoch": 11.01, + "learning_rate": 5.154177215189874e-06, + "loss": 0.0135, + "step": 19650 + }, + { + "epoch": 11.01, + "learning_rate": 5.1478481012658235e-06, + "loss": 0.0164, + "step": 19675 + }, + { + "epoch": 11.01, + "learning_rate": 5.141518987341773e-06, + "loss": 0.0154, + "step": 19700 + }, + { + "epoch": 11.01, + "learning_rate": 5.135189873417722e-06, + "loss": 0.0183, + "step": 19725 + }, + { + "epoch": 11.01, + "learning_rate": 5.128860759493671e-06, + "loss": 0.0149, + "step": 19750 + }, + { + "epoch": 11.01, + "learning_rate": 5.12253164556962e-06, + "loss": 0.0176, + "step": 19775 + }, + { + "epoch": 11.01, + "learning_rate": 5.11620253164557e-06, + "loss": 0.0135, + "step": 19800 + }, + { + "epoch": 11.01, + "learning_rate": 5.109873417721519e-06, + "loss": 0.0121, + "step": 19825 + }, + { + "epoch": 11.01, + "learning_rate": 5.103544303797469e-06, + "loss": 0.0158, + "step": 19850 + }, + { + "epoch": 11.02, + "learning_rate": 5.097215189873418e-06, + "loss": 0.0191, + "step": 19875 + }, + { + "epoch": 11.02, + "learning_rate": 5.0908860759493675e-06, + "loss": 0.0127, + "step": 19900 + }, + { + "epoch": 11.02, + "learning_rate": 5.084556962025317e-06, + "loss": 0.0149, + "step": 19925 + }, + { + "epoch": 11.02, + "learning_rate": 5.0782278481012655e-06, + "loss": 0.0152, + "step": 19950 + }, + { + "epoch": 11.02, + "learning_rate": 5.071898734177215e-06, + "loss": 0.0139, + "step": 19975 + }, + { + "epoch": 11.02, + "learning_rate": 5.065569620253164e-06, + "loss": 0.015, + "step": 20000 + }, + { + "epoch": 11.02, + "eval_loss": 0.2623814344406128, + "eval_runtime": 1462.1621, + "eval_samples_per_second": 7.12, + "eval_steps_per_second": 0.445, + "eval_wer": 24.392266540919362, + "step": 20000 + }, + { + "epoch": 11.02, + "learning_rate": 5.059240506329114e-06, + "loss": 0.0094, + "step": 20025 + }, + { + "epoch": 11.02, + "learning_rate": 5.052911392405063e-06, + "loss": 0.0108, + "step": 20050 + }, + { + "epoch": 11.02, + "learning_rate": 5.046582278481013e-06, + "loss": 0.0155, + "step": 20075 + }, + { + "epoch": 11.02, + "learning_rate": 5.040253164556962e-06, + "loss": 0.0103, + "step": 20100 + }, + { + "epoch": 11.02, + "learning_rate": 5.033924050632912e-06, + "loss": 0.0109, + "step": 20125 + }, + { + "epoch": 11.02, + "learning_rate": 5.027594936708862e-06, + "loss": 0.0188, + "step": 20150 + }, + { + "epoch": 11.02, + "learning_rate": 5.021265822784811e-06, + "loss": 0.0178, + "step": 20175 + }, + { + "epoch": 11.02, + "learning_rate": 5.014936708860761e-06, + "loss": 0.0262, + "step": 20200 + }, + { + "epoch": 11.02, + "learning_rate": 5.00860759493671e-06, + "loss": 0.0194, + "step": 20225 + }, + { + "epoch": 11.02, + "learning_rate": 5.002278481012659e-06, + "loss": 0.0107, + "step": 20250 + }, + { + "epoch": 11.03, + "learning_rate": 4.995949367088608e-06, + "loss": 0.0147, + "step": 20275 + }, + { + "epoch": 11.03, + "learning_rate": 4.9896202531645575e-06, + "loss": 0.0084, + "step": 20300 + }, + { + "epoch": 11.03, + "learning_rate": 4.983291139240507e-06, + "loss": 0.0214, + "step": 20325 + }, + { + "epoch": 11.03, + "learning_rate": 4.976962025316456e-06, + "loss": 0.014, + "step": 20350 + }, + { + "epoch": 11.03, + "learning_rate": 4.970632911392406e-06, + "loss": 0.0129, + "step": 20375 + }, + { + "epoch": 11.03, + "learning_rate": 4.964303797468355e-06, + "loss": 0.0145, + "step": 20400 + }, + { + "epoch": 11.03, + "learning_rate": 4.957974683544305e-06, + "loss": 0.0109, + "step": 20425 + }, + { + "epoch": 11.03, + "learning_rate": 4.951645569620254e-06, + "loss": 0.0117, + "step": 20450 + }, + { + "epoch": 11.03, + "learning_rate": 4.945316455696203e-06, + "loss": 0.0137, + "step": 20475 + }, + { + "epoch": 11.03, + "learning_rate": 4.938987341772152e-06, + "loss": 0.0086, + "step": 20500 + }, + { + "epoch": 11.03, + "learning_rate": 4.9326582278481015e-06, + "loss": 0.0128, + "step": 20525 + }, + { + "epoch": 11.03, + "learning_rate": 4.926329113924051e-06, + "loss": 0.0101, + "step": 20550 + }, + { + "epoch": 11.03, + "learning_rate": 4.92e-06, + "loss": 0.0132, + "step": 20575 + }, + { + "epoch": 11.03, + "learning_rate": 4.91367088607595e-06, + "loss": 0.0147, + "step": 20600 + }, + { + "epoch": 11.03, + "learning_rate": 4.907341772151899e-06, + "loss": 0.0077, + "step": 20625 + }, + { + "epoch": 11.03, + "learning_rate": 4.901012658227849e-06, + "loss": 0.0125, + "step": 20650 + }, + { + "epoch": 11.04, + "learning_rate": 4.894683544303798e-06, + "loss": 0.0115, + "step": 20675 + }, + { + "epoch": 11.04, + "learning_rate": 4.888354430379747e-06, + "loss": 0.0136, + "step": 20700 + }, + { + "epoch": 11.04, + "learning_rate": 4.882025316455696e-06, + "loss": 0.0098, + "step": 20725 + }, + { + "epoch": 11.04, + "learning_rate": 4.875696202531646e-06, + "loss": 0.0154, + "step": 20750 + }, + { + "epoch": 11.04, + "learning_rate": 4.869367088607596e-06, + "loss": 0.0126, + "step": 20775 + }, + { + "epoch": 11.04, + "learning_rate": 4.863037974683545e-06, + "loss": 0.0116, + "step": 20800 + }, + { + "epoch": 11.04, + "learning_rate": 4.856708860759495e-06, + "loss": 0.0133, + "step": 20825 + }, + { + "epoch": 11.04, + "learning_rate": 4.850379746835443e-06, + "loss": 0.0106, + "step": 20850 + }, + { + "epoch": 11.04, + "learning_rate": 4.844050632911393e-06, + "loss": 0.0151, + "step": 20875 + }, + { + "epoch": 11.04, + "learning_rate": 4.837721518987342e-06, + "loss": 0.0148, + "step": 20900 + }, + { + "epoch": 11.04, + "learning_rate": 4.8313924050632915e-06, + "loss": 0.0173, + "step": 20925 + }, + { + "epoch": 11.04, + "learning_rate": 4.825063291139241e-06, + "loss": 0.0126, + "step": 20950 + }, + { + "epoch": 11.04, + "learning_rate": 4.81873417721519e-06, + "loss": 0.0163, + "step": 20975 + }, + { + "epoch": 11.04, + "learning_rate": 4.81240506329114e-06, + "loss": 0.0203, + "step": 21000 + }, + { + "epoch": 11.04, + "eval_loss": 0.27788785099983215, + "eval_runtime": 1546.852, + "eval_samples_per_second": 6.73, + "eval_steps_per_second": 0.421, + "eval_wer": 25.284963983154196, + "step": 21000 + }, + { + "epoch": 12.0, + "learning_rate": 4.806075949367089e-06, + "loss": 0.0176, + "step": 21025 + }, + { + "epoch": 12.0, + "learning_rate": 4.799746835443039e-06, + "loss": 0.0157, + "step": 21050 + }, + { + "epoch": 12.0, + "learning_rate": 4.793417721518987e-06, + "loss": 0.0192, + "step": 21075 + }, + { + "epoch": 12.0, + "learning_rate": 4.787088607594937e-06, + "loss": 0.0124, + "step": 21100 + }, + { + "epoch": 12.0, + "learning_rate": 4.780759493670886e-06, + "loss": 0.0194, + "step": 21125 + }, + { + "epoch": 12.0, + "learning_rate": 4.774430379746836e-06, + "loss": 0.0138, + "step": 21150 + }, + { + "epoch": 12.0, + "learning_rate": 4.768101265822786e-06, + "loss": 0.0191, + "step": 21175 + }, + { + "epoch": 12.0, + "learning_rate": 4.761772151898735e-06, + "loss": 0.0124, + "step": 21200 + }, + { + "epoch": 12.01, + "learning_rate": 4.755443037974684e-06, + "loss": 0.0175, + "step": 21225 + }, + { + "epoch": 12.01, + "learning_rate": 4.749113924050633e-06, + "loss": 0.0118, + "step": 21250 + }, + { + "epoch": 12.01, + "learning_rate": 4.742784810126583e-06, + "loss": 0.0172, + "step": 21275 + }, + { + "epoch": 12.01, + "learning_rate": 4.736455696202532e-06, + "loss": 0.0136, + "step": 21300 + }, + { + "epoch": 12.01, + "learning_rate": 4.7301265822784815e-06, + "loss": 0.018, + "step": 21325 + }, + { + "epoch": 12.01, + "learning_rate": 4.723797468354431e-06, + "loss": 0.0184, + "step": 21350 + }, + { + "epoch": 12.01, + "learning_rate": 4.71746835443038e-06, + "loss": 0.0166, + "step": 21375 + }, + { + "epoch": 12.01, + "learning_rate": 4.71113924050633e-06, + "loss": 0.0107, + "step": 21400 + }, + { + "epoch": 12.01, + "learning_rate": 4.704810126582279e-06, + "loss": 0.0125, + "step": 21425 + }, + { + "epoch": 12.01, + "learning_rate": 4.698481012658228e-06, + "loss": 0.0098, + "step": 21450 + }, + { + "epoch": 12.01, + "learning_rate": 4.692151898734177e-06, + "loss": 0.0103, + "step": 21475 + }, + { + "epoch": 12.01, + "learning_rate": 4.685822784810127e-06, + "loss": 0.0211, + "step": 21500 + }, + { + "epoch": 12.01, + "learning_rate": 4.679493670886076e-06, + "loss": 0.0125, + "step": 21525 + }, + { + "epoch": 12.01, + "learning_rate": 4.6731645569620255e-06, + "loss": 0.0083, + "step": 21550 + }, + { + "epoch": 12.01, + "learning_rate": 4.666835443037975e-06, + "loss": 0.0156, + "step": 21575 + }, + { + "epoch": 12.01, + "learning_rate": 4.660506329113924e-06, + "loss": 0.0137, + "step": 21600 + }, + { + "epoch": 12.02, + "learning_rate": 4.654177215189874e-06, + "loss": 0.0137, + "step": 21625 + }, + { + "epoch": 12.02, + "learning_rate": 4.647848101265823e-06, + "loss": 0.0092, + "step": 21650 + }, + { + "epoch": 12.02, + "learning_rate": 4.641518987341773e-06, + "loss": 0.0101, + "step": 21675 + }, + { + "epoch": 12.02, + "learning_rate": 4.635189873417722e-06, + "loss": 0.0118, + "step": 21700 + }, + { + "epoch": 12.02, + "learning_rate": 4.6288607594936715e-06, + "loss": 0.0119, + "step": 21725 + }, + { + "epoch": 12.02, + "learning_rate": 4.622531645569621e-06, + "loss": 0.009, + "step": 21750 + }, + { + "epoch": 12.02, + "learning_rate": 4.61620253164557e-06, + "loss": 0.014, + "step": 21775 + }, + { + "epoch": 12.02, + "learning_rate": 4.609873417721519e-06, + "loss": 0.0108, + "step": 21800 + }, + { + "epoch": 12.02, + "learning_rate": 4.603544303797468e-06, + "loss": 0.0117, + "step": 21825 + }, + { + "epoch": 12.02, + "learning_rate": 4.597215189873418e-06, + "loss": 0.0147, + "step": 21850 + }, + { + "epoch": 12.02, + "learning_rate": 4.590886075949367e-06, + "loss": 0.0133, + "step": 21875 + }, + { + "epoch": 12.02, + "learning_rate": 4.584556962025317e-06, + "loss": 0.0131, + "step": 21900 + }, + { + "epoch": 12.02, + "learning_rate": 4.578227848101266e-06, + "loss": 0.0156, + "step": 21925 + }, + { + "epoch": 12.02, + "learning_rate": 4.5718987341772155e-06, + "loss": 0.0211, + "step": 21950 + }, + { + "epoch": 12.02, + "learning_rate": 4.565569620253165e-06, + "loss": 0.0127, + "step": 21975 + }, + { + "epoch": 12.02, + "learning_rate": 4.559240506329114e-06, + "loss": 0.011, + "step": 22000 + }, + { + "epoch": 12.02, + "eval_loss": 0.26152506470680237, + "eval_runtime": 1497.9019, + "eval_samples_per_second": 6.95, + "eval_steps_per_second": 0.435, + "eval_wer": 26.236198962584755, + "step": 22000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.015474306646016e+20, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-22000/training_args.bin b/checkpoint-22000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-22000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-23000/config.json b/checkpoint-23000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-23000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-23000/generation_config.json b/checkpoint-23000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-23000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-23000/optimizer.pt b/checkpoint-23000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c3917d9d2d1ba13a88f4bc6bd56d63602b94a3a5 --- /dev/null +++ b/checkpoint-23000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8604969183c36841d65e2f60eeb9e3876c7fdafe99971742cd693ccd7ccf24f1 +size 1934161093 diff --git a/checkpoint-23000/preprocessor_config.json b/checkpoint-23000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-23000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-23000/pytorch_model.bin b/checkpoint-23000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..14189f89e2487dc4df6d6a0832d57e5cb6cec22e --- /dev/null +++ b/checkpoint-23000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a04f0cfa46dc53b41b3b493fad98a464ce9989111db9ff8b1d0ad47dec3019e +size 967102601 diff --git a/checkpoint-23000/rng_state.pth b/checkpoint-23000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..54806c25a871545feb1ab56e6b1a8e5949420544 --- /dev/null +++ b/checkpoint-23000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48bf2d27d78215e3c909749c52cac147b31bf121217bbe9be02587db3d63fc7b +size 14575 diff --git a/checkpoint-23000/scaler.pt b/checkpoint-23000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..478f31859c8f07fdcebcb05ca043cdc3e1a844a6 --- /dev/null +++ b/checkpoint-23000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35aaf358488bf2cb76930c63b100940fb3480a27e9787e0ed3c7879099b75c3b +size 557 diff --git a/checkpoint-23000/scheduler.pt b/checkpoint-23000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1edf11827532d2930fe31213ba05467862dd8755 --- /dev/null +++ b/checkpoint-23000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8a95852e8ca395bdba234a828856f1a84949e5bb4d22503299bfb770b89edfc +size 627 diff --git a/checkpoint-23000/trainer_state.json b/checkpoint-23000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..48dcfbad93f4a754d36c8cb64455503daee3ac03 --- /dev/null +++ b/checkpoint-23000/trainer_state.json @@ -0,0 +1,5743 @@ +{ + "best_metric": 23.982503780549276, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-23000", + "epoch": 13.005925, + "global_step": 23000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + }, + { + "epoch": 7.02, + "learning_rate": 6.830632911392405e-06, + "loss": 0.0159, + "step": 13025 + }, + { + "epoch": 7.02, + "learning_rate": 6.824303797468354e-06, + "loss": 0.0186, + "step": 13050 + }, + { + "epoch": 7.02, + "learning_rate": 6.817974683544304e-06, + "loss": 0.0246, + "step": 13075 + }, + { + "epoch": 7.02, + "learning_rate": 6.811645569620253e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 7.02, + "learning_rate": 6.805316455696203e-06, + "loss": 0.0287, + "step": 13125 + }, + { + "epoch": 7.02, + "learning_rate": 6.798987341772152e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 7.02, + "learning_rate": 6.792658227848102e-06, + "loss": 0.035, + "step": 13175 + }, + { + "epoch": 7.02, + "learning_rate": 6.786329113924052e-06, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 7.02, + "learning_rate": 6.780000000000001e-06, + "loss": 0.0283, + "step": 13225 + }, + { + "epoch": 7.02, + "learning_rate": 6.773670886075951e-06, + "loss": 0.0203, + "step": 13250 + }, + { + "epoch": 7.03, + "learning_rate": 6.7673417721519e-06, + "loss": 0.0193, + "step": 13275 + }, + { + "epoch": 7.03, + "learning_rate": 6.7610126582278494e-06, + "loss": 0.0174, + "step": 13300 + }, + { + "epoch": 7.03, + "learning_rate": 6.754683544303798e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 7.03, + "learning_rate": 6.7483544303797474e-06, + "loss": 0.023, + "step": 13350 + }, + { + "epoch": 7.03, + "learning_rate": 6.742025316455697e-06, + "loss": 0.0266, + "step": 13375 + }, + { + "epoch": 7.03, + "learning_rate": 6.735696202531646e-06, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 7.03, + "learning_rate": 6.729367088607596e-06, + "loss": 0.0164, + "step": 13425 + }, + { + "epoch": 7.03, + "learning_rate": 6.723037974683545e-06, + "loss": 0.019, + "step": 13450 + }, + { + "epoch": 7.03, + "learning_rate": 6.716708860759495e-06, + "loss": 0.0209, + "step": 13475 + }, + { + "epoch": 7.03, + "learning_rate": 6.710379746835444e-06, + "loss": 0.0241, + "step": 13500 + }, + { + "epoch": 7.03, + "learning_rate": 6.7040506329113934e-06, + "loss": 0.0233, + "step": 13525 + }, + { + "epoch": 7.03, + "learning_rate": 6.697721518987342e-06, + "loss": 0.0204, + "step": 13550 + }, + { + "epoch": 7.03, + "learning_rate": 6.6913924050632914e-06, + "loss": 0.0131, + "step": 13575 + }, + { + "epoch": 7.03, + "learning_rate": 6.685063291139241e-06, + "loss": 0.0268, + "step": 13600 + }, + { + "epoch": 7.03, + "learning_rate": 6.67873417721519e-06, + "loss": 0.0156, + "step": 13625 + }, + { + "epoch": 7.03, + "learning_rate": 6.67240506329114e-06, + "loss": 0.0205, + "step": 13650 + }, + { + "epoch": 7.04, + "learning_rate": 6.666075949367089e-06, + "loss": 0.0217, + "step": 13675 + }, + { + "epoch": 7.04, + "learning_rate": 6.659746835443039e-06, + "loss": 0.0258, + "step": 13700 + }, + { + "epoch": 7.04, + "learning_rate": 6.653417721518988e-06, + "loss": 0.0211, + "step": 13725 + }, + { + "epoch": 7.04, + "learning_rate": 6.647088607594937e-06, + "loss": 0.0188, + "step": 13750 + }, + { + "epoch": 7.04, + "learning_rate": 6.640759493670886e-06, + "loss": 0.0234, + "step": 13775 + }, + { + "epoch": 7.04, + "learning_rate": 6.6344303797468355e-06, + "loss": 0.0202, + "step": 13800 + }, + { + "epoch": 7.04, + "learning_rate": 6.628101265822785e-06, + "loss": 0.0251, + "step": 13825 + }, + { + "epoch": 7.04, + "learning_rate": 6.621772151898734e-06, + "loss": 0.017, + "step": 13850 + }, + { + "epoch": 7.04, + "learning_rate": 6.615443037974684e-06, + "loss": 0.0254, + "step": 13875 + }, + { + "epoch": 7.04, + "learning_rate": 6.609113924050633e-06, + "loss": 0.0289, + "step": 13900 + }, + { + "epoch": 7.04, + "learning_rate": 6.602784810126583e-06, + "loss": 0.0238, + "step": 13925 + }, + { + "epoch": 7.04, + "learning_rate": 6.596455696202532e-06, + "loss": 0.0218, + "step": 13950 + }, + { + "epoch": 7.04, + "learning_rate": 6.590126582278481e-06, + "loss": 0.0289, + "step": 13975 + }, + { + "epoch": 7.04, + "learning_rate": 6.583797468354432e-06, + "loss": 0.0271, + "step": 14000 + }, + { + "epoch": 7.04, + "eval_loss": 0.26684999465942383, + "eval_runtime": 1448.0228, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "eval_wer": 42.464105107400115, + "step": 14000 + }, + { + "epoch": 8.0, + "learning_rate": 6.577468354430381e-06, + "loss": 0.0292, + "step": 14025 + }, + { + "epoch": 8.0, + "learning_rate": 6.57113924050633e-06, + "loss": 0.0192, + "step": 14050 + }, + { + "epoch": 8.0, + "learning_rate": 6.564810126582279e-06, + "loss": 0.027, + "step": 14075 + }, + { + "epoch": 8.0, + "learning_rate": 6.558481012658229e-06, + "loss": 0.03, + "step": 14100 + }, + { + "epoch": 8.0, + "learning_rate": 6.552151898734178e-06, + "loss": 0.0375, + "step": 14125 + }, + { + "epoch": 8.0, + "learning_rate": 6.5458227848101274e-06, + "loss": 0.0338, + "step": 14150 + }, + { + "epoch": 8.0, + "learning_rate": 6.539493670886077e-06, + "loss": 0.0305, + "step": 14175 + }, + { + "epoch": 8.0, + "learning_rate": 6.533164556962026e-06, + "loss": 0.0266, + "step": 14200 + }, + { + "epoch": 8.01, + "learning_rate": 6.526835443037976e-06, + "loss": 0.0378, + "step": 14225 + }, + { + "epoch": 8.01, + "learning_rate": 6.520506329113925e-06, + "loss": 0.0256, + "step": 14250 + }, + { + "epoch": 8.01, + "learning_rate": 6.514177215189874e-06, + "loss": 0.0203, + "step": 14275 + }, + { + "epoch": 8.01, + "learning_rate": 6.507848101265823e-06, + "loss": 0.0212, + "step": 14300 + }, + { + "epoch": 8.01, + "learning_rate": 6.501518987341773e-06, + "loss": 0.0204, + "step": 14325 + }, + { + "epoch": 8.01, + "learning_rate": 6.495189873417722e-06, + "loss": 0.0259, + "step": 14350 + }, + { + "epoch": 8.01, + "learning_rate": 6.4888607594936714e-06, + "loss": 0.0214, + "step": 14375 + }, + { + "epoch": 8.01, + "learning_rate": 6.482531645569621e-06, + "loss": 0.021, + "step": 14400 + }, + { + "epoch": 8.01, + "learning_rate": 6.47620253164557e-06, + "loss": 0.0241, + "step": 14425 + }, + { + "epoch": 8.01, + "learning_rate": 6.46987341772152e-06, + "loss": 0.0216, + "step": 14450 + }, + { + "epoch": 8.01, + "learning_rate": 6.463544303797469e-06, + "loss": 0.0209, + "step": 14475 + }, + { + "epoch": 8.01, + "learning_rate": 6.457215189873418e-06, + "loss": 0.0292, + "step": 14500 + }, + { + "epoch": 8.01, + "learning_rate": 6.450886075949367e-06, + "loss": 0.0286, + "step": 14525 + }, + { + "epoch": 8.01, + "learning_rate": 6.444556962025317e-06, + "loss": 0.0218, + "step": 14550 + }, + { + "epoch": 8.01, + "learning_rate": 6.438227848101266e-06, + "loss": 0.0209, + "step": 14575 + }, + { + "epoch": 8.01, + "learning_rate": 6.4318987341772154e-06, + "loss": 0.0244, + "step": 14600 + }, + { + "epoch": 8.02, + "learning_rate": 6.425569620253165e-06, + "loss": 0.0212, + "step": 14625 + }, + { + "epoch": 8.02, + "learning_rate": 6.419240506329114e-06, + "loss": 0.0155, + "step": 14650 + }, + { + "epoch": 8.02, + "learning_rate": 6.412911392405064e-06, + "loss": 0.0209, + "step": 14675 + }, + { + "epoch": 8.02, + "learning_rate": 6.406582278481013e-06, + "loss": 0.0203, + "step": 14700 + }, + { + "epoch": 8.02, + "learning_rate": 6.400253164556962e-06, + "loss": 0.0192, + "step": 14725 + }, + { + "epoch": 8.02, + "learning_rate": 6.393924050632911e-06, + "loss": 0.0216, + "step": 14750 + }, + { + "epoch": 8.02, + "learning_rate": 6.387594936708861e-06, + "loss": 0.012, + "step": 14775 + }, + { + "epoch": 8.02, + "learning_rate": 6.38126582278481e-06, + "loss": 0.0241, + "step": 14800 + }, + { + "epoch": 8.02, + "learning_rate": 6.37493670886076e-06, + "loss": 0.0192, + "step": 14825 + }, + { + "epoch": 8.02, + "learning_rate": 6.36860759493671e-06, + "loss": 0.0202, + "step": 14850 + }, + { + "epoch": 8.02, + "learning_rate": 6.362278481012659e-06, + "loss": 0.0257, + "step": 14875 + }, + { + "epoch": 8.02, + "learning_rate": 6.3559493670886086e-06, + "loss": 0.0218, + "step": 14900 + }, + { + "epoch": 8.02, + "learning_rate": 6.349620253164558e-06, + "loss": 0.0232, + "step": 14925 + }, + { + "epoch": 8.02, + "learning_rate": 6.3432911392405074e-06, + "loss": 0.03, + "step": 14950 + }, + { + "epoch": 8.02, + "learning_rate": 6.337215189873418e-06, + "loss": 0.0252, + "step": 14975 + }, + { + "epoch": 8.02, + "learning_rate": 6.3308860759493675e-06, + "loss": 0.0176, + "step": 15000 + }, + { + "epoch": 8.02, + "eval_loss": 0.2596448063850403, + "eval_runtime": 1417.6481, + "eval_samples_per_second": 7.344, + "eval_steps_per_second": 0.459, + "eval_wer": 28.52241499861786, + "step": 15000 + }, + { + "epoch": 8.03, + "learning_rate": 6.324556962025317e-06, + "loss": 0.0166, + "step": 15025 + }, + { + "epoch": 8.03, + "learning_rate": 6.318227848101266e-06, + "loss": 0.017, + "step": 15050 + }, + { + "epoch": 8.03, + "learning_rate": 6.311898734177216e-06, + "loss": 0.0291, + "step": 15075 + }, + { + "epoch": 8.03, + "learning_rate": 6.305569620253164e-06, + "loss": 0.0149, + "step": 15100 + }, + { + "epoch": 8.03, + "learning_rate": 6.299240506329114e-06, + "loss": 0.0223, + "step": 15125 + }, + { + "epoch": 8.03, + "learning_rate": 6.292911392405063e-06, + "loss": 0.0212, + "step": 15150 + }, + { + "epoch": 8.03, + "learning_rate": 6.286582278481013e-06, + "loss": 0.0125, + "step": 15175 + }, + { + "epoch": 8.03, + "learning_rate": 6.280253164556962e-06, + "loss": 0.0169, + "step": 15200 + }, + { + "epoch": 8.03, + "learning_rate": 6.273924050632912e-06, + "loss": 0.0244, + "step": 15225 + }, + { + "epoch": 8.03, + "learning_rate": 6.267594936708862e-06, + "loss": 0.015, + "step": 15250 + }, + { + "epoch": 8.03, + "learning_rate": 6.261265822784811e-06, + "loss": 0.0171, + "step": 15275 + }, + { + "epoch": 8.03, + "learning_rate": 6.254936708860761e-06, + "loss": 0.017, + "step": 15300 + }, + { + "epoch": 8.03, + "learning_rate": 6.24860759493671e-06, + "loss": 0.0162, + "step": 15325 + }, + { + "epoch": 8.03, + "learning_rate": 6.2422784810126594e-06, + "loss": 0.0133, + "step": 15350 + }, + { + "epoch": 8.03, + "learning_rate": 6.235949367088608e-06, + "loss": 0.0139, + "step": 15375 + }, + { + "epoch": 8.03, + "learning_rate": 6.2296202531645575e-06, + "loss": 0.0177, + "step": 15400 + }, + { + "epoch": 8.04, + "learning_rate": 6.223291139240507e-06, + "loss": 0.0207, + "step": 15425 + }, + { + "epoch": 8.04, + "learning_rate": 6.216962025316456e-06, + "loss": 0.0192, + "step": 15450 + }, + { + "epoch": 8.04, + "learning_rate": 6.210632911392406e-06, + "loss": 0.0176, + "step": 15475 + }, + { + "epoch": 8.04, + "learning_rate": 6.204303797468355e-06, + "loss": 0.0133, + "step": 15500 + }, + { + "epoch": 8.04, + "learning_rate": 6.197974683544305e-06, + "loss": 0.0199, + "step": 15525 + }, + { + "epoch": 8.04, + "learning_rate": 6.191645569620254e-06, + "loss": 0.0135, + "step": 15550 + }, + { + "epoch": 8.04, + "learning_rate": 6.1853164556962035e-06, + "loss": 0.0121, + "step": 15575 + }, + { + "epoch": 8.04, + "learning_rate": 6.178987341772152e-06, + "loss": 0.0152, + "step": 15600 + }, + { + "epoch": 8.04, + "learning_rate": 6.1726582278481015e-06, + "loss": 0.0201, + "step": 15625 + }, + { + "epoch": 8.04, + "learning_rate": 6.166329113924051e-06, + "loss": 0.0226, + "step": 15650 + }, + { + "epoch": 8.04, + "learning_rate": 6.16e-06, + "loss": 0.0222, + "step": 15675 + }, + { + "epoch": 8.04, + "learning_rate": 6.15367088607595e-06, + "loss": 0.0184, + "step": 15700 + }, + { + "epoch": 8.04, + "learning_rate": 6.147341772151899e-06, + "loss": 0.0219, + "step": 15725 + }, + { + "epoch": 8.04, + "learning_rate": 6.141012658227849e-06, + "loss": 0.0219, + "step": 15750 + }, + { + "epoch": 9.0, + "learning_rate": 6.134683544303798e-06, + "loss": 0.0208, + "step": 15775 + }, + { + "epoch": 9.0, + "learning_rate": 6.1283544303797475e-06, + "loss": 0.0143, + "step": 15800 + }, + { + "epoch": 9.0, + "learning_rate": 6.122025316455696e-06, + "loss": 0.0254, + "step": 15825 + }, + { + "epoch": 9.0, + "learning_rate": 6.1156962025316455e-06, + "loss": 0.0287, + "step": 15850 + }, + { + "epoch": 9.0, + "learning_rate": 6.109367088607595e-06, + "loss": 0.0252, + "step": 15875 + }, + { + "epoch": 9.0, + "learning_rate": 6.103037974683544e-06, + "loss": 0.0275, + "step": 15900 + }, + { + "epoch": 9.0, + "learning_rate": 6.096708860759494e-06, + "loss": 0.0193, + "step": 15925 + }, + { + "epoch": 9.0, + "learning_rate": 6.090379746835443e-06, + "loss": 0.0255, + "step": 15950 + }, + { + "epoch": 9.01, + "learning_rate": 6.084050632911393e-06, + "loss": 0.0248, + "step": 15975 + }, + { + "epoch": 9.01, + "learning_rate": 6.077721518987342e-06, + "loss": 0.0328, + "step": 16000 + }, + { + "epoch": 9.01, + "eval_loss": 0.2512108087539673, + "eval_runtime": 1474.2883, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.442, + "eval_wer": 35.66236849379665, + "step": 16000 + }, + { + "epoch": 9.01, + "learning_rate": 6.0713924050632915e-06, + "loss": 0.0181, + "step": 16025 + }, + { + "epoch": 9.01, + "learning_rate": 6.065063291139242e-06, + "loss": 0.0134, + "step": 16050 + }, + { + "epoch": 9.01, + "learning_rate": 6.058734177215191e-06, + "loss": 0.0203, + "step": 16075 + }, + { + "epoch": 9.01, + "learning_rate": 6.052405063291141e-06, + "loss": 0.0224, + "step": 16100 + }, + { + "epoch": 9.01, + "learning_rate": 6.046075949367089e-06, + "loss": 0.0164, + "step": 16125 + }, + { + "epoch": 9.01, + "learning_rate": 6.039746835443039e-06, + "loss": 0.019, + "step": 16150 + }, + { + "epoch": 9.01, + "learning_rate": 6.033417721518988e-06, + "loss": 0.0152, + "step": 16175 + }, + { + "epoch": 9.01, + "learning_rate": 6.0270886075949374e-06, + "loss": 0.0197, + "step": 16200 + }, + { + "epoch": 9.01, + "learning_rate": 6.020759493670887e-06, + "loss": 0.0188, + "step": 16225 + }, + { + "epoch": 9.01, + "learning_rate": 6.014430379746836e-06, + "loss": 0.0245, + "step": 16250 + }, + { + "epoch": 9.01, + "learning_rate": 6.008101265822786e-06, + "loss": 0.0202, + "step": 16275 + }, + { + "epoch": 9.01, + "learning_rate": 6.001772151898735e-06, + "loss": 0.0184, + "step": 16300 + }, + { + "epoch": 9.01, + "learning_rate": 5.995443037974685e-06, + "loss": 0.0158, + "step": 16325 + }, + { + "epoch": 9.01, + "learning_rate": 5.989113924050633e-06, + "loss": 0.0175, + "step": 16350 + }, + { + "epoch": 9.02, + "learning_rate": 5.982784810126583e-06, + "loss": 0.014, + "step": 16375 + }, + { + "epoch": 9.02, + "learning_rate": 5.976455696202532e-06, + "loss": 0.0149, + "step": 16400 + }, + { + "epoch": 9.02, + "learning_rate": 5.9701265822784815e-06, + "loss": 0.0188, + "step": 16425 + }, + { + "epoch": 9.02, + "learning_rate": 5.963797468354431e-06, + "loss": 0.0215, + "step": 16450 + }, + { + "epoch": 9.02, + "learning_rate": 5.95746835443038e-06, + "loss": 0.017, + "step": 16475 + }, + { + "epoch": 9.02, + "learning_rate": 5.95113924050633e-06, + "loss": 0.0095, + "step": 16500 + }, + { + "epoch": 9.02, + "learning_rate": 5.944810126582279e-06, + "loss": 0.0112, + "step": 16525 + }, + { + "epoch": 9.02, + "learning_rate": 5.938481012658229e-06, + "loss": 0.0198, + "step": 16550 + }, + { + "epoch": 9.02, + "learning_rate": 5.932151898734177e-06, + "loss": 0.0252, + "step": 16575 + }, + { + "epoch": 9.02, + "learning_rate": 5.925822784810127e-06, + "loss": 0.0226, + "step": 16600 + }, + { + "epoch": 9.02, + "learning_rate": 5.919493670886076e-06, + "loss": 0.0167, + "step": 16625 + }, + { + "epoch": 9.02, + "learning_rate": 5.9131645569620255e-06, + "loss": 0.0259, + "step": 16650 + }, + { + "epoch": 9.02, + "learning_rate": 5.906835443037975e-06, + "loss": 0.0229, + "step": 16675 + }, + { + "epoch": 9.02, + "learning_rate": 5.900506329113924e-06, + "loss": 0.0238, + "step": 16700 + }, + { + "epoch": 9.02, + "learning_rate": 5.894177215189874e-06, + "loss": 0.02, + "step": 16725 + }, + { + "epoch": 9.02, + "learning_rate": 5.887848101265823e-06, + "loss": 0.0182, + "step": 16750 + }, + { + "epoch": 9.03, + "learning_rate": 5.881518987341773e-06, + "loss": 0.0157, + "step": 16775 + }, + { + "epoch": 9.03, + "learning_rate": 5.875189873417721e-06, + "loss": 0.0145, + "step": 16800 + }, + { + "epoch": 9.03, + "learning_rate": 5.868860759493671e-06, + "loss": 0.0166, + "step": 16825 + }, + { + "epoch": 9.03, + "learning_rate": 5.86253164556962e-06, + "loss": 0.0108, + "step": 16850 + }, + { + "epoch": 9.03, + "learning_rate": 5.85620253164557e-06, + "loss": 0.014, + "step": 16875 + }, + { + "epoch": 9.03, + "learning_rate": 5.84987341772152e-06, + "loss": 0.0167, + "step": 16900 + }, + { + "epoch": 9.03, + "learning_rate": 5.843544303797469e-06, + "loss": 0.0121, + "step": 16925 + }, + { + "epoch": 9.03, + "learning_rate": 5.837215189873419e-06, + "loss": 0.0149, + "step": 16950 + }, + { + "epoch": 9.03, + "learning_rate": 5.831139240506329e-06, + "loss": 0.0143, + "step": 16975 + }, + { + "epoch": 9.03, + "learning_rate": 5.824810126582279e-06, + "loss": 0.0139, + "step": 17000 + }, + { + "epoch": 9.03, + "eval_loss": 0.2688957154750824, + "eval_runtime": 1423.1196, + "eval_samples_per_second": 7.316, + "eval_steps_per_second": 0.457, + "eval_wer": 25.920746678807788, + "step": 17000 + }, + { + "epoch": 9.03, + "learning_rate": 5.818481012658228e-06, + "loss": 0.0169, + "step": 17025 + }, + { + "epoch": 9.03, + "learning_rate": 5.8121518987341775e-06, + "loss": 0.013, + "step": 17050 + }, + { + "epoch": 9.03, + "learning_rate": 5.805822784810127e-06, + "loss": 0.0123, + "step": 17075 + }, + { + "epoch": 9.03, + "learning_rate": 5.799493670886076e-06, + "loss": 0.0161, + "step": 17100 + }, + { + "epoch": 9.03, + "learning_rate": 5.793164556962026e-06, + "loss": 0.0114, + "step": 17125 + }, + { + "epoch": 9.03, + "learning_rate": 5.786835443037974e-06, + "loss": 0.0194, + "step": 17150 + }, + { + "epoch": 9.04, + "learning_rate": 5.780506329113924e-06, + "loss": 0.0179, + "step": 17175 + }, + { + "epoch": 9.04, + "learning_rate": 5.774177215189873e-06, + "loss": 0.0165, + "step": 17200 + }, + { + "epoch": 9.04, + "learning_rate": 5.767848101265823e-06, + "loss": 0.0165, + "step": 17225 + }, + { + "epoch": 9.04, + "learning_rate": 5.761518987341773e-06, + "loss": 0.0125, + "step": 17250 + }, + { + "epoch": 9.04, + "learning_rate": 5.755189873417722e-06, + "loss": 0.0172, + "step": 17275 + }, + { + "epoch": 9.04, + "learning_rate": 5.748860759493672e-06, + "loss": 0.0158, + "step": 17300 + }, + { + "epoch": 9.04, + "learning_rate": 5.742531645569621e-06, + "loss": 0.0174, + "step": 17325 + }, + { + "epoch": 9.04, + "learning_rate": 5.736202531645571e-06, + "loss": 0.0162, + "step": 17350 + }, + { + "epoch": 9.04, + "learning_rate": 5.72987341772152e-06, + "loss": 0.0203, + "step": 17375 + }, + { + "epoch": 9.04, + "learning_rate": 5.7235443037974695e-06, + "loss": 0.0157, + "step": 17400 + }, + { + "epoch": 9.04, + "learning_rate": 5.717215189873419e-06, + "loss": 0.018, + "step": 17425 + }, + { + "epoch": 9.04, + "learning_rate": 5.7108860759493675e-06, + "loss": 0.0199, + "step": 17450 + }, + { + "epoch": 9.04, + "learning_rate": 5.704556962025317e-06, + "loss": 0.0153, + "step": 17475 + }, + { + "epoch": 9.04, + "learning_rate": 5.698227848101266e-06, + "loss": 0.0234, + "step": 17500 + }, + { + "epoch": 10.0, + "learning_rate": 5.691898734177216e-06, + "loss": 0.0168, + "step": 17525 + }, + { + "epoch": 10.0, + "learning_rate": 5.685569620253165e-06, + "loss": 0.0214, + "step": 17550 + }, + { + "epoch": 10.0, + "learning_rate": 5.679240506329115e-06, + "loss": 0.0185, + "step": 17575 + }, + { + "epoch": 10.0, + "learning_rate": 5.672911392405064e-06, + "loss": 0.0228, + "step": 17600 + }, + { + "epoch": 10.0, + "learning_rate": 5.6665822784810135e-06, + "loss": 0.024, + "step": 17625 + }, + { + "epoch": 10.0, + "learning_rate": 5.660253164556963e-06, + "loss": 0.0235, + "step": 17650 + }, + { + "epoch": 10.0, + "learning_rate": 5.6539240506329115e-06, + "loss": 0.0262, + "step": 17675 + }, + { + "epoch": 10.0, + "learning_rate": 5.647594936708861e-06, + "loss": 0.0214, + "step": 17700 + }, + { + "epoch": 10.01, + "learning_rate": 5.64126582278481e-06, + "loss": 0.0279, + "step": 17725 + }, + { + "epoch": 10.01, + "learning_rate": 5.63493670886076e-06, + "loss": 0.0248, + "step": 17750 + }, + { + "epoch": 10.01, + "learning_rate": 5.628607594936709e-06, + "loss": 0.0155, + "step": 17775 + }, + { + "epoch": 10.01, + "learning_rate": 5.622278481012659e-06, + "loss": 0.017, + "step": 17800 + }, + { + "epoch": 10.01, + "learning_rate": 5.615949367088608e-06, + "loss": 0.0205, + "step": 17825 + }, + { + "epoch": 10.01, + "learning_rate": 5.6096202531645575e-06, + "loss": 0.021, + "step": 17850 + }, + { + "epoch": 10.01, + "learning_rate": 5.603291139240507e-06, + "loss": 0.0178, + "step": 17875 + }, + { + "epoch": 10.01, + "learning_rate": 5.5969620253164555e-06, + "loss": 0.0169, + "step": 17900 + }, + { + "epoch": 10.01, + "learning_rate": 5.590632911392405e-06, + "loss": 0.0158, + "step": 17925 + }, + { + "epoch": 10.01, + "learning_rate": 5.584303797468354e-06, + "loss": 0.0195, + "step": 17950 + }, + { + "epoch": 10.01, + "learning_rate": 5.577974683544304e-06, + "loss": 0.0195, + "step": 17975 + }, + { + "epoch": 10.01, + "learning_rate": 5.571645569620253e-06, + "loss": 0.0187, + "step": 18000 + }, + { + "epoch": 10.01, + "eval_loss": 0.2605174481868744, + "eval_runtime": 1553.7854, + "eval_samples_per_second": 6.7, + "eval_steps_per_second": 0.419, + "eval_wer": 25.330493178750874, + "step": 18000 + }, + { + "epoch": 10.01, + "learning_rate": 5.565316455696203e-06, + "loss": 0.0214, + "step": 18025 + }, + { + "epoch": 10.01, + "learning_rate": 5.558987341772152e-06, + "loss": 0.0193, + "step": 18050 + }, + { + "epoch": 10.01, + "learning_rate": 5.552658227848102e-06, + "loss": 0.0168, + "step": 18075 + }, + { + "epoch": 10.01, + "learning_rate": 5.546329113924052e-06, + "loss": 0.0147, + "step": 18100 + }, + { + "epoch": 10.02, + "learning_rate": 5.540000000000001e-06, + "loss": 0.0192, + "step": 18125 + }, + { + "epoch": 10.02, + "learning_rate": 5.533670886075951e-06, + "loss": 0.014, + "step": 18150 + }, + { + "epoch": 10.02, + "learning_rate": 5.527341772151899e-06, + "loss": 0.0163, + "step": 18175 + }, + { + "epoch": 10.02, + "learning_rate": 5.521012658227849e-06, + "loss": 0.0139, + "step": 18200 + }, + { + "epoch": 10.02, + "learning_rate": 5.514683544303798e-06, + "loss": 0.0168, + "step": 18225 + }, + { + "epoch": 10.02, + "learning_rate": 5.5083544303797475e-06, + "loss": 0.0118, + "step": 18250 + }, + { + "epoch": 10.02, + "learning_rate": 5.502025316455697e-06, + "loss": 0.0119, + "step": 18275 + }, + { + "epoch": 10.02, + "learning_rate": 5.495696202531646e-06, + "loss": 0.0136, + "step": 18300 + }, + { + "epoch": 10.02, + "learning_rate": 5.489367088607596e-06, + "loss": 0.0132, + "step": 18325 + }, + { + "epoch": 10.02, + "learning_rate": 5.483037974683545e-06, + "loss": 0.0148, + "step": 18350 + }, + { + "epoch": 10.02, + "learning_rate": 5.476708860759495e-06, + "loss": 0.015, + "step": 18375 + }, + { + "epoch": 10.02, + "learning_rate": 5.470379746835443e-06, + "loss": 0.0154, + "step": 18400 + }, + { + "epoch": 10.02, + "learning_rate": 5.464050632911393e-06, + "loss": 0.0139, + "step": 18425 + }, + { + "epoch": 10.02, + "learning_rate": 5.457721518987342e-06, + "loss": 0.0239, + "step": 18450 + }, + { + "epoch": 10.02, + "learning_rate": 5.4513924050632915e-06, + "loss": 0.0203, + "step": 18475 + }, + { + "epoch": 10.02, + "learning_rate": 5.445063291139241e-06, + "loss": 0.0199, + "step": 18500 + }, + { + "epoch": 10.03, + "learning_rate": 5.43873417721519e-06, + "loss": 0.013, + "step": 18525 + }, + { + "epoch": 10.03, + "learning_rate": 5.43240506329114e-06, + "loss": 0.0128, + "step": 18550 + }, + { + "epoch": 10.03, + "learning_rate": 5.426075949367089e-06, + "loss": 0.0161, + "step": 18575 + }, + { + "epoch": 10.03, + "learning_rate": 5.419746835443039e-06, + "loss": 0.0129, + "step": 18600 + }, + { + "epoch": 10.03, + "learning_rate": 5.413417721518987e-06, + "loss": 0.0124, + "step": 18625 + }, + { + "epoch": 10.03, + "learning_rate": 5.407088607594937e-06, + "loss": 0.0161, + "step": 18650 + }, + { + "epoch": 10.03, + "learning_rate": 5.400759493670886e-06, + "loss": 0.0104, + "step": 18675 + }, + { + "epoch": 10.03, + "learning_rate": 5.3944303797468355e-06, + "loss": 0.0109, + "step": 18700 + }, + { + "epoch": 10.03, + "learning_rate": 5.388101265822785e-06, + "loss": 0.0128, + "step": 18725 + }, + { + "epoch": 10.03, + "learning_rate": 5.381772151898734e-06, + "loss": 0.014, + "step": 18750 + }, + { + "epoch": 10.03, + "learning_rate": 5.375443037974684e-06, + "loss": 0.0159, + "step": 18775 + }, + { + "epoch": 10.03, + "learning_rate": 5.369113924050633e-06, + "loss": 0.017, + "step": 18800 + }, + { + "epoch": 10.03, + "learning_rate": 5.362784810126583e-06, + "loss": 0.012, + "step": 18825 + }, + { + "epoch": 10.03, + "learning_rate": 5.356455696202531e-06, + "loss": 0.0165, + "step": 18850 + }, + { + "epoch": 10.03, + "learning_rate": 5.350126582278481e-06, + "loss": 0.0153, + "step": 18875 + }, + { + "epoch": 10.03, + "learning_rate": 5.343797468354432e-06, + "loss": 0.0182, + "step": 18900 + }, + { + "epoch": 10.04, + "learning_rate": 5.33746835443038e-06, + "loss": 0.0194, + "step": 18925 + }, + { + "epoch": 10.04, + "learning_rate": 5.33113924050633e-06, + "loss": 0.0144, + "step": 18950 + }, + { + "epoch": 10.04, + "learning_rate": 5.324810126582279e-06, + "loss": 0.0157, + "step": 18975 + }, + { + "epoch": 10.04, + "learning_rate": 5.318481012658229e-06, + "loss": 0.0111, + "step": 19000 + }, + { + "epoch": 10.04, + "eval_loss": 0.2696678638458252, + "eval_runtime": 1463.5986, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.445, + "eval_wer": 24.79877721588969, + "step": 19000 + }, + { + "epoch": 10.04, + "learning_rate": 5.312151898734178e-06, + "loss": 0.0176, + "step": 19025 + }, + { + "epoch": 10.04, + "learning_rate": 5.3058227848101275e-06, + "loss": 0.0144, + "step": 19050 + }, + { + "epoch": 10.04, + "learning_rate": 5.299493670886077e-06, + "loss": 0.0129, + "step": 19075 + }, + { + "epoch": 10.04, + "learning_rate": 5.293164556962026e-06, + "loss": 0.0162, + "step": 19100 + }, + { + "epoch": 10.04, + "learning_rate": 5.286835443037976e-06, + "loss": 0.0128, + "step": 19125 + }, + { + "epoch": 10.04, + "learning_rate": 5.280506329113924e-06, + "loss": 0.0152, + "step": 19150 + }, + { + "epoch": 10.04, + "learning_rate": 5.274177215189874e-06, + "loss": 0.0183, + "step": 19175 + }, + { + "epoch": 10.04, + "learning_rate": 5.267848101265823e-06, + "loss": 0.0198, + "step": 19200 + }, + { + "epoch": 10.04, + "learning_rate": 5.261518987341773e-06, + "loss": 0.0179, + "step": 19225 + }, + { + "epoch": 10.04, + "learning_rate": 5.255443037974683e-06, + "loss": 0.019, + "step": 19250 + }, + { + "epoch": 11.0, + "learning_rate": 5.249113924050633e-06, + "loss": 0.0159, + "step": 19275 + }, + { + "epoch": 11.0, + "learning_rate": 5.242784810126583e-06, + "loss": 0.0156, + "step": 19300 + }, + { + "epoch": 11.0, + "learning_rate": 5.236455696202532e-06, + "loss": 0.015, + "step": 19325 + }, + { + "epoch": 11.0, + "learning_rate": 5.230126582278482e-06, + "loss": 0.0162, + "step": 19350 + }, + { + "epoch": 11.0, + "learning_rate": 5.223797468354431e-06, + "loss": 0.0262, + "step": 19375 + }, + { + "epoch": 11.0, + "learning_rate": 5.217468354430381e-06, + "loss": 0.0207, + "step": 19400 + }, + { + "epoch": 11.0, + "learning_rate": 5.21113924050633e-06, + "loss": 0.0222, + "step": 19425 + }, + { + "epoch": 11.0, + "learning_rate": 5.2048101265822795e-06, + "loss": 0.016, + "step": 19450 + }, + { + "epoch": 11.01, + "learning_rate": 5.198481012658229e-06, + "loss": 0.0176, + "step": 19475 + }, + { + "epoch": 11.01, + "learning_rate": 5.192151898734178e-06, + "loss": 0.0209, + "step": 19500 + }, + { + "epoch": 11.01, + "learning_rate": 5.185822784810127e-06, + "loss": 0.0212, + "step": 19525 + }, + { + "epoch": 11.01, + "learning_rate": 5.179493670886076e-06, + "loss": 0.0143, + "step": 19550 + }, + { + "epoch": 11.01, + "learning_rate": 5.173164556962026e-06, + "loss": 0.0224, + "step": 19575 + }, + { + "epoch": 11.01, + "learning_rate": 5.166835443037975e-06, + "loss": 0.0171, + "step": 19600 + }, + { + "epoch": 11.01, + "learning_rate": 5.160506329113925e-06, + "loss": 0.0166, + "step": 19625 + }, + { + "epoch": 11.01, + "learning_rate": 5.154177215189874e-06, + "loss": 0.0135, + "step": 19650 + }, + { + "epoch": 11.01, + "learning_rate": 5.1478481012658235e-06, + "loss": 0.0164, + "step": 19675 + }, + { + "epoch": 11.01, + "learning_rate": 5.141518987341773e-06, + "loss": 0.0154, + "step": 19700 + }, + { + "epoch": 11.01, + "learning_rate": 5.135189873417722e-06, + "loss": 0.0183, + "step": 19725 + }, + { + "epoch": 11.01, + "learning_rate": 5.128860759493671e-06, + "loss": 0.0149, + "step": 19750 + }, + { + "epoch": 11.01, + "learning_rate": 5.12253164556962e-06, + "loss": 0.0176, + "step": 19775 + }, + { + "epoch": 11.01, + "learning_rate": 5.11620253164557e-06, + "loss": 0.0135, + "step": 19800 + }, + { + "epoch": 11.01, + "learning_rate": 5.109873417721519e-06, + "loss": 0.0121, + "step": 19825 + }, + { + "epoch": 11.01, + "learning_rate": 5.103544303797469e-06, + "loss": 0.0158, + "step": 19850 + }, + { + "epoch": 11.02, + "learning_rate": 5.097215189873418e-06, + "loss": 0.0191, + "step": 19875 + }, + { + "epoch": 11.02, + "learning_rate": 5.0908860759493675e-06, + "loss": 0.0127, + "step": 19900 + }, + { + "epoch": 11.02, + "learning_rate": 5.084556962025317e-06, + "loss": 0.0149, + "step": 19925 + }, + { + "epoch": 11.02, + "learning_rate": 5.0782278481012655e-06, + "loss": 0.0152, + "step": 19950 + }, + { + "epoch": 11.02, + "learning_rate": 5.071898734177215e-06, + "loss": 0.0139, + "step": 19975 + }, + { + "epoch": 11.02, + "learning_rate": 5.065569620253164e-06, + "loss": 0.015, + "step": 20000 + }, + { + "epoch": 11.02, + "eval_loss": 0.2623814344406128, + "eval_runtime": 1462.1621, + "eval_samples_per_second": 7.12, + "eval_steps_per_second": 0.445, + "eval_wer": 24.392266540919362, + "step": 20000 + }, + { + "epoch": 11.02, + "learning_rate": 5.059240506329114e-06, + "loss": 0.0094, + "step": 20025 + }, + { + "epoch": 11.02, + "learning_rate": 5.052911392405063e-06, + "loss": 0.0108, + "step": 20050 + }, + { + "epoch": 11.02, + "learning_rate": 5.046582278481013e-06, + "loss": 0.0155, + "step": 20075 + }, + { + "epoch": 11.02, + "learning_rate": 5.040253164556962e-06, + "loss": 0.0103, + "step": 20100 + }, + { + "epoch": 11.02, + "learning_rate": 5.033924050632912e-06, + "loss": 0.0109, + "step": 20125 + }, + { + "epoch": 11.02, + "learning_rate": 5.027594936708862e-06, + "loss": 0.0188, + "step": 20150 + }, + { + "epoch": 11.02, + "learning_rate": 5.021265822784811e-06, + "loss": 0.0178, + "step": 20175 + }, + { + "epoch": 11.02, + "learning_rate": 5.014936708860761e-06, + "loss": 0.0262, + "step": 20200 + }, + { + "epoch": 11.02, + "learning_rate": 5.00860759493671e-06, + "loss": 0.0194, + "step": 20225 + }, + { + "epoch": 11.02, + "learning_rate": 5.002278481012659e-06, + "loss": 0.0107, + "step": 20250 + }, + { + "epoch": 11.03, + "learning_rate": 4.995949367088608e-06, + "loss": 0.0147, + "step": 20275 + }, + { + "epoch": 11.03, + "learning_rate": 4.9896202531645575e-06, + "loss": 0.0084, + "step": 20300 + }, + { + "epoch": 11.03, + "learning_rate": 4.983291139240507e-06, + "loss": 0.0214, + "step": 20325 + }, + { + "epoch": 11.03, + "learning_rate": 4.976962025316456e-06, + "loss": 0.014, + "step": 20350 + }, + { + "epoch": 11.03, + "learning_rate": 4.970632911392406e-06, + "loss": 0.0129, + "step": 20375 + }, + { + "epoch": 11.03, + "learning_rate": 4.964303797468355e-06, + "loss": 0.0145, + "step": 20400 + }, + { + "epoch": 11.03, + "learning_rate": 4.957974683544305e-06, + "loss": 0.0109, + "step": 20425 + }, + { + "epoch": 11.03, + "learning_rate": 4.951645569620254e-06, + "loss": 0.0117, + "step": 20450 + }, + { + "epoch": 11.03, + "learning_rate": 4.945316455696203e-06, + "loss": 0.0137, + "step": 20475 + }, + { + "epoch": 11.03, + "learning_rate": 4.938987341772152e-06, + "loss": 0.0086, + "step": 20500 + }, + { + "epoch": 11.03, + "learning_rate": 4.9326582278481015e-06, + "loss": 0.0128, + "step": 20525 + }, + { + "epoch": 11.03, + "learning_rate": 4.926329113924051e-06, + "loss": 0.0101, + "step": 20550 + }, + { + "epoch": 11.03, + "learning_rate": 4.92e-06, + "loss": 0.0132, + "step": 20575 + }, + { + "epoch": 11.03, + "learning_rate": 4.91367088607595e-06, + "loss": 0.0147, + "step": 20600 + }, + { + "epoch": 11.03, + "learning_rate": 4.907341772151899e-06, + "loss": 0.0077, + "step": 20625 + }, + { + "epoch": 11.03, + "learning_rate": 4.901012658227849e-06, + "loss": 0.0125, + "step": 20650 + }, + { + "epoch": 11.04, + "learning_rate": 4.894683544303798e-06, + "loss": 0.0115, + "step": 20675 + }, + { + "epoch": 11.04, + "learning_rate": 4.888354430379747e-06, + "loss": 0.0136, + "step": 20700 + }, + { + "epoch": 11.04, + "learning_rate": 4.882025316455696e-06, + "loss": 0.0098, + "step": 20725 + }, + { + "epoch": 11.04, + "learning_rate": 4.875696202531646e-06, + "loss": 0.0154, + "step": 20750 + }, + { + "epoch": 11.04, + "learning_rate": 4.869367088607596e-06, + "loss": 0.0126, + "step": 20775 + }, + { + "epoch": 11.04, + "learning_rate": 4.863037974683545e-06, + "loss": 0.0116, + "step": 20800 + }, + { + "epoch": 11.04, + "learning_rate": 4.856708860759495e-06, + "loss": 0.0133, + "step": 20825 + }, + { + "epoch": 11.04, + "learning_rate": 4.850379746835443e-06, + "loss": 0.0106, + "step": 20850 + }, + { + "epoch": 11.04, + "learning_rate": 4.844050632911393e-06, + "loss": 0.0151, + "step": 20875 + }, + { + "epoch": 11.04, + "learning_rate": 4.837721518987342e-06, + "loss": 0.0148, + "step": 20900 + }, + { + "epoch": 11.04, + "learning_rate": 4.8313924050632915e-06, + "loss": 0.0173, + "step": 20925 + }, + { + "epoch": 11.04, + "learning_rate": 4.825063291139241e-06, + "loss": 0.0126, + "step": 20950 + }, + { + "epoch": 11.04, + "learning_rate": 4.81873417721519e-06, + "loss": 0.0163, + "step": 20975 + }, + { + "epoch": 11.04, + "learning_rate": 4.81240506329114e-06, + "loss": 0.0203, + "step": 21000 + }, + { + "epoch": 11.04, + "eval_loss": 0.27788785099983215, + "eval_runtime": 1546.852, + "eval_samples_per_second": 6.73, + "eval_steps_per_second": 0.421, + "eval_wer": 25.284963983154196, + "step": 21000 + }, + { + "epoch": 12.0, + "learning_rate": 4.806075949367089e-06, + "loss": 0.0176, + "step": 21025 + }, + { + "epoch": 12.0, + "learning_rate": 4.799746835443039e-06, + "loss": 0.0157, + "step": 21050 + }, + { + "epoch": 12.0, + "learning_rate": 4.793417721518987e-06, + "loss": 0.0192, + "step": 21075 + }, + { + "epoch": 12.0, + "learning_rate": 4.787088607594937e-06, + "loss": 0.0124, + "step": 21100 + }, + { + "epoch": 12.0, + "learning_rate": 4.780759493670886e-06, + "loss": 0.0194, + "step": 21125 + }, + { + "epoch": 12.0, + "learning_rate": 4.774430379746836e-06, + "loss": 0.0138, + "step": 21150 + }, + { + "epoch": 12.0, + "learning_rate": 4.768101265822786e-06, + "loss": 0.0191, + "step": 21175 + }, + { + "epoch": 12.0, + "learning_rate": 4.761772151898735e-06, + "loss": 0.0124, + "step": 21200 + }, + { + "epoch": 12.01, + "learning_rate": 4.755443037974684e-06, + "loss": 0.0175, + "step": 21225 + }, + { + "epoch": 12.01, + "learning_rate": 4.749113924050633e-06, + "loss": 0.0118, + "step": 21250 + }, + { + "epoch": 12.01, + "learning_rate": 4.742784810126583e-06, + "loss": 0.0172, + "step": 21275 + }, + { + "epoch": 12.01, + "learning_rate": 4.736455696202532e-06, + "loss": 0.0136, + "step": 21300 + }, + { + "epoch": 12.01, + "learning_rate": 4.7301265822784815e-06, + "loss": 0.018, + "step": 21325 + }, + { + "epoch": 12.01, + "learning_rate": 4.723797468354431e-06, + "loss": 0.0184, + "step": 21350 + }, + { + "epoch": 12.01, + "learning_rate": 4.71746835443038e-06, + "loss": 0.0166, + "step": 21375 + }, + { + "epoch": 12.01, + "learning_rate": 4.71113924050633e-06, + "loss": 0.0107, + "step": 21400 + }, + { + "epoch": 12.01, + "learning_rate": 4.704810126582279e-06, + "loss": 0.0125, + "step": 21425 + }, + { + "epoch": 12.01, + "learning_rate": 4.698481012658228e-06, + "loss": 0.0098, + "step": 21450 + }, + { + "epoch": 12.01, + "learning_rate": 4.692151898734177e-06, + "loss": 0.0103, + "step": 21475 + }, + { + "epoch": 12.01, + "learning_rate": 4.685822784810127e-06, + "loss": 0.0211, + "step": 21500 + }, + { + "epoch": 12.01, + "learning_rate": 4.679493670886076e-06, + "loss": 0.0125, + "step": 21525 + }, + { + "epoch": 12.01, + "learning_rate": 4.6731645569620255e-06, + "loss": 0.0083, + "step": 21550 + }, + { + "epoch": 12.01, + "learning_rate": 4.666835443037975e-06, + "loss": 0.0156, + "step": 21575 + }, + { + "epoch": 12.01, + "learning_rate": 4.660506329113924e-06, + "loss": 0.0137, + "step": 21600 + }, + { + "epoch": 12.02, + "learning_rate": 4.654177215189874e-06, + "loss": 0.0137, + "step": 21625 + }, + { + "epoch": 12.02, + "learning_rate": 4.647848101265823e-06, + "loss": 0.0092, + "step": 21650 + }, + { + "epoch": 12.02, + "learning_rate": 4.641518987341773e-06, + "loss": 0.0101, + "step": 21675 + }, + { + "epoch": 12.02, + "learning_rate": 4.635189873417722e-06, + "loss": 0.0118, + "step": 21700 + }, + { + "epoch": 12.02, + "learning_rate": 4.6288607594936715e-06, + "loss": 0.0119, + "step": 21725 + }, + { + "epoch": 12.02, + "learning_rate": 4.622531645569621e-06, + "loss": 0.009, + "step": 21750 + }, + { + "epoch": 12.02, + "learning_rate": 4.61620253164557e-06, + "loss": 0.014, + "step": 21775 + }, + { + "epoch": 12.02, + "learning_rate": 4.609873417721519e-06, + "loss": 0.0108, + "step": 21800 + }, + { + "epoch": 12.02, + "learning_rate": 4.603544303797468e-06, + "loss": 0.0117, + "step": 21825 + }, + { + "epoch": 12.02, + "learning_rate": 4.597215189873418e-06, + "loss": 0.0147, + "step": 21850 + }, + { + "epoch": 12.02, + "learning_rate": 4.590886075949367e-06, + "loss": 0.0133, + "step": 21875 + }, + { + "epoch": 12.02, + "learning_rate": 4.584556962025317e-06, + "loss": 0.0131, + "step": 21900 + }, + { + "epoch": 12.02, + "learning_rate": 4.578227848101266e-06, + "loss": 0.0156, + "step": 21925 + }, + { + "epoch": 12.02, + "learning_rate": 4.5718987341772155e-06, + "loss": 0.0211, + "step": 21950 + }, + { + "epoch": 12.02, + "learning_rate": 4.565569620253165e-06, + "loss": 0.0127, + "step": 21975 + }, + { + "epoch": 12.02, + "learning_rate": 4.559240506329114e-06, + "loss": 0.011, + "step": 22000 + }, + { + "epoch": 12.02, + "eval_loss": 0.26152506470680237, + "eval_runtime": 1497.9019, + "eval_samples_per_second": 6.95, + "eval_steps_per_second": 0.435, + "eval_wer": 26.236198962584755, + "step": 22000 + }, + { + "epoch": 12.03, + "learning_rate": 4.552911392405064e-06, + "loss": 0.0164, + "step": 22025 + }, + { + "epoch": 12.03, + "learning_rate": 4.546582278481013e-06, + "loss": 0.01, + "step": 22050 + }, + { + "epoch": 12.03, + "learning_rate": 4.540253164556963e-06, + "loss": 0.0127, + "step": 22075 + }, + { + "epoch": 12.03, + "learning_rate": 4.533924050632912e-06, + "loss": 0.0089, + "step": 22100 + }, + { + "epoch": 12.03, + "learning_rate": 4.5275949367088615e-06, + "loss": 0.0077, + "step": 22125 + }, + { + "epoch": 12.03, + "learning_rate": 4.521265822784811e-06, + "loss": 0.0098, + "step": 22150 + }, + { + "epoch": 12.03, + "learning_rate": 4.5149367088607595e-06, + "loss": 0.0084, + "step": 22175 + }, + { + "epoch": 12.03, + "learning_rate": 4.508607594936709e-06, + "loss": 0.0096, + "step": 22200 + }, + { + "epoch": 12.03, + "learning_rate": 4.502278481012658e-06, + "loss": 0.0097, + "step": 22225 + }, + { + "epoch": 12.03, + "learning_rate": 4.495949367088608e-06, + "loss": 0.0114, + "step": 22250 + }, + { + "epoch": 12.03, + "learning_rate": 4.489620253164557e-06, + "loss": 0.0109, + "step": 22275 + }, + { + "epoch": 12.03, + "learning_rate": 4.483291139240507e-06, + "loss": 0.0096, + "step": 22300 + }, + { + "epoch": 12.03, + "learning_rate": 4.476962025316456e-06, + "loss": 0.0117, + "step": 22325 + }, + { + "epoch": 12.03, + "learning_rate": 4.4706329113924055e-06, + "loss": 0.0105, + "step": 22350 + }, + { + "epoch": 12.03, + "learning_rate": 4.464303797468355e-06, + "loss": 0.0103, + "step": 22375 + }, + { + "epoch": 12.03, + "learning_rate": 4.457974683544304e-06, + "loss": 0.0085, + "step": 22400 + }, + { + "epoch": 12.04, + "learning_rate": 4.451645569620254e-06, + "loss": 0.0129, + "step": 22425 + }, + { + "epoch": 12.04, + "learning_rate": 4.445316455696203e-06, + "loss": 0.0188, + "step": 22450 + }, + { + "epoch": 12.04, + "learning_rate": 4.438987341772153e-06, + "loss": 0.013, + "step": 22475 + }, + { + "epoch": 12.04, + "learning_rate": 4.432658227848102e-06, + "loss": 0.0095, + "step": 22500 + }, + { + "epoch": 12.04, + "learning_rate": 4.4263291139240515e-06, + "loss": 0.0111, + "step": 22525 + }, + { + "epoch": 12.04, + "learning_rate": 4.42e-06, + "loss": 0.0115, + "step": 22550 + }, + { + "epoch": 12.04, + "learning_rate": 4.4136708860759495e-06, + "loss": 0.0096, + "step": 22575 + }, + { + "epoch": 12.04, + "learning_rate": 4.407341772151899e-06, + "loss": 0.0108, + "step": 22600 + }, + { + "epoch": 12.04, + "learning_rate": 4.401012658227848e-06, + "loss": 0.0118, + "step": 22625 + }, + { + "epoch": 12.04, + "learning_rate": 4.394683544303798e-06, + "loss": 0.0136, + "step": 22650 + }, + { + "epoch": 12.04, + "learning_rate": 4.388354430379747e-06, + "loss": 0.0176, + "step": 22675 + }, + { + "epoch": 12.04, + "learning_rate": 4.382025316455697e-06, + "loss": 0.011, + "step": 22700 + }, + { + "epoch": 12.04, + "learning_rate": 4.375696202531646e-06, + "loss": 0.0125, + "step": 22725 + }, + { + "epoch": 12.04, + "learning_rate": 4.3693670886075955e-06, + "loss": 0.0164, + "step": 22750 + }, + { + "epoch": 13.0, + "learning_rate": 4.363037974683544e-06, + "loss": 0.0192, + "step": 22775 + }, + { + "epoch": 13.0, + "learning_rate": 4.356708860759494e-06, + "loss": 0.0145, + "step": 22800 + }, + { + "epoch": 13.0, + "learning_rate": 4.350379746835444e-06, + "loss": 0.0174, + "step": 22825 + }, + { + "epoch": 13.0, + "learning_rate": 4.344050632911393e-06, + "loss": 0.0142, + "step": 22850 + }, + { + "epoch": 13.0, + "learning_rate": 4.337721518987343e-06, + "loss": 0.0228, + "step": 22875 + }, + { + "epoch": 13.0, + "learning_rate": 4.331392405063292e-06, + "loss": 0.0201, + "step": 22900 + }, + { + "epoch": 13.0, + "learning_rate": 4.325063291139241e-06, + "loss": 0.0188, + "step": 22925 + }, + { + "epoch": 13.0, + "learning_rate": 4.31873417721519e-06, + "loss": 0.0185, + "step": 22950 + }, + { + "epoch": 13.01, + "learning_rate": 4.3124050632911395e-06, + "loss": 0.0186, + "step": 22975 + }, + { + "epoch": 13.01, + "learning_rate": 4.306075949367089e-06, + "loss": 0.0191, + "step": 23000 + }, + { + "epoch": 13.01, + "eval_loss": 0.25467297434806824, + "eval_runtime": 1486.1182, + "eval_samples_per_second": 7.005, + "eval_steps_per_second": 0.438, + "eval_wer": 23.982503780549276, + "step": 23000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.061619112157184e+20, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-23000/training_args.bin b/checkpoint-23000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-23000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-24000/config.json b/checkpoint-24000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-24000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-24000/generation_config.json b/checkpoint-24000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-24000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-24000/optimizer.pt b/checkpoint-24000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ae517bb2bcccb662befc803228d0fca7f8978d8 --- /dev/null +++ b/checkpoint-24000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e330e055fea989745c5bf6d597bd0ba2c29c126a43969d8f30db607a38cde512 +size 1934161093 diff --git a/checkpoint-24000/preprocessor_config.json b/checkpoint-24000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-24000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-24000/pytorch_model.bin b/checkpoint-24000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..2765d41682187f2f80a45bd9f9d7fa8da1f27663 --- /dev/null +++ b/checkpoint-24000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45cbcae625dcfa89300c5141f57ee34c595b2f1747de117a51a1e8981217a94a +size 967102601 diff --git a/checkpoint-24000/rng_state.pth b/checkpoint-24000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a88d2c5673dcea3733a492de8cc9ccb63089c926 --- /dev/null +++ b/checkpoint-24000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67736fc86cb391ee2de47cc75943d97f6130fb4475fc23f92d0fc98cc6091d3a +size 14575 diff --git a/checkpoint-24000/scaler.pt b/checkpoint-24000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0593f05ee5a0a7727bfd5408fb64806f3dafe0dc --- /dev/null +++ b/checkpoint-24000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ff694532efc9aa102a9df1b12588365f8a813e88f900aa3b82f011b9ff17989 +size 557 diff --git a/checkpoint-24000/scheduler.pt b/checkpoint-24000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f6d1e89b609f6bc16527fe3d86cc309733a5b7e --- /dev/null +++ b/checkpoint-24000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b67dfc2b5c42f196659d3f9f36b07e6237addc3e5c419b6ae9beba3a68e2060c +size 627 diff --git a/checkpoint-24000/trainer_state.json b/checkpoint-24000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..351aafc787ff7284e5eedac200941ec46129fb08 --- /dev/null +++ b/checkpoint-24000/trainer_state.json @@ -0,0 +1,5992 @@ +{ + "best_metric": 23.982503780549276, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-23000", + "epoch": 13.030925, + "global_step": 24000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + }, + { + "epoch": 7.02, + "learning_rate": 6.830632911392405e-06, + "loss": 0.0159, + "step": 13025 + }, + { + "epoch": 7.02, + "learning_rate": 6.824303797468354e-06, + "loss": 0.0186, + "step": 13050 + }, + { + "epoch": 7.02, + "learning_rate": 6.817974683544304e-06, + "loss": 0.0246, + "step": 13075 + }, + { + "epoch": 7.02, + "learning_rate": 6.811645569620253e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 7.02, + "learning_rate": 6.805316455696203e-06, + "loss": 0.0287, + "step": 13125 + }, + { + "epoch": 7.02, + "learning_rate": 6.798987341772152e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 7.02, + "learning_rate": 6.792658227848102e-06, + "loss": 0.035, + "step": 13175 + }, + { + "epoch": 7.02, + "learning_rate": 6.786329113924052e-06, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 7.02, + "learning_rate": 6.780000000000001e-06, + "loss": 0.0283, + "step": 13225 + }, + { + "epoch": 7.02, + "learning_rate": 6.773670886075951e-06, + "loss": 0.0203, + "step": 13250 + }, + { + "epoch": 7.03, + "learning_rate": 6.7673417721519e-06, + "loss": 0.0193, + "step": 13275 + }, + { + "epoch": 7.03, + "learning_rate": 6.7610126582278494e-06, + "loss": 0.0174, + "step": 13300 + }, + { + "epoch": 7.03, + "learning_rate": 6.754683544303798e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 7.03, + "learning_rate": 6.7483544303797474e-06, + "loss": 0.023, + "step": 13350 + }, + { + "epoch": 7.03, + "learning_rate": 6.742025316455697e-06, + "loss": 0.0266, + "step": 13375 + }, + { + "epoch": 7.03, + "learning_rate": 6.735696202531646e-06, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 7.03, + "learning_rate": 6.729367088607596e-06, + "loss": 0.0164, + "step": 13425 + }, + { + "epoch": 7.03, + "learning_rate": 6.723037974683545e-06, + "loss": 0.019, + "step": 13450 + }, + { + "epoch": 7.03, + "learning_rate": 6.716708860759495e-06, + "loss": 0.0209, + "step": 13475 + }, + { + "epoch": 7.03, + "learning_rate": 6.710379746835444e-06, + "loss": 0.0241, + "step": 13500 + }, + { + "epoch": 7.03, + "learning_rate": 6.7040506329113934e-06, + "loss": 0.0233, + "step": 13525 + }, + { + "epoch": 7.03, + "learning_rate": 6.697721518987342e-06, + "loss": 0.0204, + "step": 13550 + }, + { + "epoch": 7.03, + "learning_rate": 6.6913924050632914e-06, + "loss": 0.0131, + "step": 13575 + }, + { + "epoch": 7.03, + "learning_rate": 6.685063291139241e-06, + "loss": 0.0268, + "step": 13600 + }, + { + "epoch": 7.03, + "learning_rate": 6.67873417721519e-06, + "loss": 0.0156, + "step": 13625 + }, + { + "epoch": 7.03, + "learning_rate": 6.67240506329114e-06, + "loss": 0.0205, + "step": 13650 + }, + { + "epoch": 7.04, + "learning_rate": 6.666075949367089e-06, + "loss": 0.0217, + "step": 13675 + }, + { + "epoch": 7.04, + "learning_rate": 6.659746835443039e-06, + "loss": 0.0258, + "step": 13700 + }, + { + "epoch": 7.04, + "learning_rate": 6.653417721518988e-06, + "loss": 0.0211, + "step": 13725 + }, + { + "epoch": 7.04, + "learning_rate": 6.647088607594937e-06, + "loss": 0.0188, + "step": 13750 + }, + { + "epoch": 7.04, + "learning_rate": 6.640759493670886e-06, + "loss": 0.0234, + "step": 13775 + }, + { + "epoch": 7.04, + "learning_rate": 6.6344303797468355e-06, + "loss": 0.0202, + "step": 13800 + }, + { + "epoch": 7.04, + "learning_rate": 6.628101265822785e-06, + "loss": 0.0251, + "step": 13825 + }, + { + "epoch": 7.04, + "learning_rate": 6.621772151898734e-06, + "loss": 0.017, + "step": 13850 + }, + { + "epoch": 7.04, + "learning_rate": 6.615443037974684e-06, + "loss": 0.0254, + "step": 13875 + }, + { + "epoch": 7.04, + "learning_rate": 6.609113924050633e-06, + "loss": 0.0289, + "step": 13900 + }, + { + "epoch": 7.04, + "learning_rate": 6.602784810126583e-06, + "loss": 0.0238, + "step": 13925 + }, + { + "epoch": 7.04, + "learning_rate": 6.596455696202532e-06, + "loss": 0.0218, + "step": 13950 + }, + { + "epoch": 7.04, + "learning_rate": 6.590126582278481e-06, + "loss": 0.0289, + "step": 13975 + }, + { + "epoch": 7.04, + "learning_rate": 6.583797468354432e-06, + "loss": 0.0271, + "step": 14000 + }, + { + "epoch": 7.04, + "eval_loss": 0.26684999465942383, + "eval_runtime": 1448.0228, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "eval_wer": 42.464105107400115, + "step": 14000 + }, + { + "epoch": 8.0, + "learning_rate": 6.577468354430381e-06, + "loss": 0.0292, + "step": 14025 + }, + { + "epoch": 8.0, + "learning_rate": 6.57113924050633e-06, + "loss": 0.0192, + "step": 14050 + }, + { + "epoch": 8.0, + "learning_rate": 6.564810126582279e-06, + "loss": 0.027, + "step": 14075 + }, + { + "epoch": 8.0, + "learning_rate": 6.558481012658229e-06, + "loss": 0.03, + "step": 14100 + }, + { + "epoch": 8.0, + "learning_rate": 6.552151898734178e-06, + "loss": 0.0375, + "step": 14125 + }, + { + "epoch": 8.0, + "learning_rate": 6.5458227848101274e-06, + "loss": 0.0338, + "step": 14150 + }, + { + "epoch": 8.0, + "learning_rate": 6.539493670886077e-06, + "loss": 0.0305, + "step": 14175 + }, + { + "epoch": 8.0, + "learning_rate": 6.533164556962026e-06, + "loss": 0.0266, + "step": 14200 + }, + { + "epoch": 8.01, + "learning_rate": 6.526835443037976e-06, + "loss": 0.0378, + "step": 14225 + }, + { + "epoch": 8.01, + "learning_rate": 6.520506329113925e-06, + "loss": 0.0256, + "step": 14250 + }, + { + "epoch": 8.01, + "learning_rate": 6.514177215189874e-06, + "loss": 0.0203, + "step": 14275 + }, + { + "epoch": 8.01, + "learning_rate": 6.507848101265823e-06, + "loss": 0.0212, + "step": 14300 + }, + { + "epoch": 8.01, + "learning_rate": 6.501518987341773e-06, + "loss": 0.0204, + "step": 14325 + }, + { + "epoch": 8.01, + "learning_rate": 6.495189873417722e-06, + "loss": 0.0259, + "step": 14350 + }, + { + "epoch": 8.01, + "learning_rate": 6.4888607594936714e-06, + "loss": 0.0214, + "step": 14375 + }, + { + "epoch": 8.01, + "learning_rate": 6.482531645569621e-06, + "loss": 0.021, + "step": 14400 + }, + { + "epoch": 8.01, + "learning_rate": 6.47620253164557e-06, + "loss": 0.0241, + "step": 14425 + }, + { + "epoch": 8.01, + "learning_rate": 6.46987341772152e-06, + "loss": 0.0216, + "step": 14450 + }, + { + "epoch": 8.01, + "learning_rate": 6.463544303797469e-06, + "loss": 0.0209, + "step": 14475 + }, + { + "epoch": 8.01, + "learning_rate": 6.457215189873418e-06, + "loss": 0.0292, + "step": 14500 + }, + { + "epoch": 8.01, + "learning_rate": 6.450886075949367e-06, + "loss": 0.0286, + "step": 14525 + }, + { + "epoch": 8.01, + "learning_rate": 6.444556962025317e-06, + "loss": 0.0218, + "step": 14550 + }, + { + "epoch": 8.01, + "learning_rate": 6.438227848101266e-06, + "loss": 0.0209, + "step": 14575 + }, + { + "epoch": 8.01, + "learning_rate": 6.4318987341772154e-06, + "loss": 0.0244, + "step": 14600 + }, + { + "epoch": 8.02, + "learning_rate": 6.425569620253165e-06, + "loss": 0.0212, + "step": 14625 + }, + { + "epoch": 8.02, + "learning_rate": 6.419240506329114e-06, + "loss": 0.0155, + "step": 14650 + }, + { + "epoch": 8.02, + "learning_rate": 6.412911392405064e-06, + "loss": 0.0209, + "step": 14675 + }, + { + "epoch": 8.02, + "learning_rate": 6.406582278481013e-06, + "loss": 0.0203, + "step": 14700 + }, + { + "epoch": 8.02, + "learning_rate": 6.400253164556962e-06, + "loss": 0.0192, + "step": 14725 + }, + { + "epoch": 8.02, + "learning_rate": 6.393924050632911e-06, + "loss": 0.0216, + "step": 14750 + }, + { + "epoch": 8.02, + "learning_rate": 6.387594936708861e-06, + "loss": 0.012, + "step": 14775 + }, + { + "epoch": 8.02, + "learning_rate": 6.38126582278481e-06, + "loss": 0.0241, + "step": 14800 + }, + { + "epoch": 8.02, + "learning_rate": 6.37493670886076e-06, + "loss": 0.0192, + "step": 14825 + }, + { + "epoch": 8.02, + "learning_rate": 6.36860759493671e-06, + "loss": 0.0202, + "step": 14850 + }, + { + "epoch": 8.02, + "learning_rate": 6.362278481012659e-06, + "loss": 0.0257, + "step": 14875 + }, + { + "epoch": 8.02, + "learning_rate": 6.3559493670886086e-06, + "loss": 0.0218, + "step": 14900 + }, + { + "epoch": 8.02, + "learning_rate": 6.349620253164558e-06, + "loss": 0.0232, + "step": 14925 + }, + { + "epoch": 8.02, + "learning_rate": 6.3432911392405074e-06, + "loss": 0.03, + "step": 14950 + }, + { + "epoch": 8.02, + "learning_rate": 6.337215189873418e-06, + "loss": 0.0252, + "step": 14975 + }, + { + "epoch": 8.02, + "learning_rate": 6.3308860759493675e-06, + "loss": 0.0176, + "step": 15000 + }, + { + "epoch": 8.02, + "eval_loss": 0.2596448063850403, + "eval_runtime": 1417.6481, + "eval_samples_per_second": 7.344, + "eval_steps_per_second": 0.459, + "eval_wer": 28.52241499861786, + "step": 15000 + }, + { + "epoch": 8.03, + "learning_rate": 6.324556962025317e-06, + "loss": 0.0166, + "step": 15025 + }, + { + "epoch": 8.03, + "learning_rate": 6.318227848101266e-06, + "loss": 0.017, + "step": 15050 + }, + { + "epoch": 8.03, + "learning_rate": 6.311898734177216e-06, + "loss": 0.0291, + "step": 15075 + }, + { + "epoch": 8.03, + "learning_rate": 6.305569620253164e-06, + "loss": 0.0149, + "step": 15100 + }, + { + "epoch": 8.03, + "learning_rate": 6.299240506329114e-06, + "loss": 0.0223, + "step": 15125 + }, + { + "epoch": 8.03, + "learning_rate": 6.292911392405063e-06, + "loss": 0.0212, + "step": 15150 + }, + { + "epoch": 8.03, + "learning_rate": 6.286582278481013e-06, + "loss": 0.0125, + "step": 15175 + }, + { + "epoch": 8.03, + "learning_rate": 6.280253164556962e-06, + "loss": 0.0169, + "step": 15200 + }, + { + "epoch": 8.03, + "learning_rate": 6.273924050632912e-06, + "loss": 0.0244, + "step": 15225 + }, + { + "epoch": 8.03, + "learning_rate": 6.267594936708862e-06, + "loss": 0.015, + "step": 15250 + }, + { + "epoch": 8.03, + "learning_rate": 6.261265822784811e-06, + "loss": 0.0171, + "step": 15275 + }, + { + "epoch": 8.03, + "learning_rate": 6.254936708860761e-06, + "loss": 0.017, + "step": 15300 + }, + { + "epoch": 8.03, + "learning_rate": 6.24860759493671e-06, + "loss": 0.0162, + "step": 15325 + }, + { + "epoch": 8.03, + "learning_rate": 6.2422784810126594e-06, + "loss": 0.0133, + "step": 15350 + }, + { + "epoch": 8.03, + "learning_rate": 6.235949367088608e-06, + "loss": 0.0139, + "step": 15375 + }, + { + "epoch": 8.03, + "learning_rate": 6.2296202531645575e-06, + "loss": 0.0177, + "step": 15400 + }, + { + "epoch": 8.04, + "learning_rate": 6.223291139240507e-06, + "loss": 0.0207, + "step": 15425 + }, + { + "epoch": 8.04, + "learning_rate": 6.216962025316456e-06, + "loss": 0.0192, + "step": 15450 + }, + { + "epoch": 8.04, + "learning_rate": 6.210632911392406e-06, + "loss": 0.0176, + "step": 15475 + }, + { + "epoch": 8.04, + "learning_rate": 6.204303797468355e-06, + "loss": 0.0133, + "step": 15500 + }, + { + "epoch": 8.04, + "learning_rate": 6.197974683544305e-06, + "loss": 0.0199, + "step": 15525 + }, + { + "epoch": 8.04, + "learning_rate": 6.191645569620254e-06, + "loss": 0.0135, + "step": 15550 + }, + { + "epoch": 8.04, + "learning_rate": 6.1853164556962035e-06, + "loss": 0.0121, + "step": 15575 + }, + { + "epoch": 8.04, + "learning_rate": 6.178987341772152e-06, + "loss": 0.0152, + "step": 15600 + }, + { + "epoch": 8.04, + "learning_rate": 6.1726582278481015e-06, + "loss": 0.0201, + "step": 15625 + }, + { + "epoch": 8.04, + "learning_rate": 6.166329113924051e-06, + "loss": 0.0226, + "step": 15650 + }, + { + "epoch": 8.04, + "learning_rate": 6.16e-06, + "loss": 0.0222, + "step": 15675 + }, + { + "epoch": 8.04, + "learning_rate": 6.15367088607595e-06, + "loss": 0.0184, + "step": 15700 + }, + { + "epoch": 8.04, + "learning_rate": 6.147341772151899e-06, + "loss": 0.0219, + "step": 15725 + }, + { + "epoch": 8.04, + "learning_rate": 6.141012658227849e-06, + "loss": 0.0219, + "step": 15750 + }, + { + "epoch": 9.0, + "learning_rate": 6.134683544303798e-06, + "loss": 0.0208, + "step": 15775 + }, + { + "epoch": 9.0, + "learning_rate": 6.1283544303797475e-06, + "loss": 0.0143, + "step": 15800 + }, + { + "epoch": 9.0, + "learning_rate": 6.122025316455696e-06, + "loss": 0.0254, + "step": 15825 + }, + { + "epoch": 9.0, + "learning_rate": 6.1156962025316455e-06, + "loss": 0.0287, + "step": 15850 + }, + { + "epoch": 9.0, + "learning_rate": 6.109367088607595e-06, + "loss": 0.0252, + "step": 15875 + }, + { + "epoch": 9.0, + "learning_rate": 6.103037974683544e-06, + "loss": 0.0275, + "step": 15900 + }, + { + "epoch": 9.0, + "learning_rate": 6.096708860759494e-06, + "loss": 0.0193, + "step": 15925 + }, + { + "epoch": 9.0, + "learning_rate": 6.090379746835443e-06, + "loss": 0.0255, + "step": 15950 + }, + { + "epoch": 9.01, + "learning_rate": 6.084050632911393e-06, + "loss": 0.0248, + "step": 15975 + }, + { + "epoch": 9.01, + "learning_rate": 6.077721518987342e-06, + "loss": 0.0328, + "step": 16000 + }, + { + "epoch": 9.01, + "eval_loss": 0.2512108087539673, + "eval_runtime": 1474.2883, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.442, + "eval_wer": 35.66236849379665, + "step": 16000 + }, + { + "epoch": 9.01, + "learning_rate": 6.0713924050632915e-06, + "loss": 0.0181, + "step": 16025 + }, + { + "epoch": 9.01, + "learning_rate": 6.065063291139242e-06, + "loss": 0.0134, + "step": 16050 + }, + { + "epoch": 9.01, + "learning_rate": 6.058734177215191e-06, + "loss": 0.0203, + "step": 16075 + }, + { + "epoch": 9.01, + "learning_rate": 6.052405063291141e-06, + "loss": 0.0224, + "step": 16100 + }, + { + "epoch": 9.01, + "learning_rate": 6.046075949367089e-06, + "loss": 0.0164, + "step": 16125 + }, + { + "epoch": 9.01, + "learning_rate": 6.039746835443039e-06, + "loss": 0.019, + "step": 16150 + }, + { + "epoch": 9.01, + "learning_rate": 6.033417721518988e-06, + "loss": 0.0152, + "step": 16175 + }, + { + "epoch": 9.01, + "learning_rate": 6.0270886075949374e-06, + "loss": 0.0197, + "step": 16200 + }, + { + "epoch": 9.01, + "learning_rate": 6.020759493670887e-06, + "loss": 0.0188, + "step": 16225 + }, + { + "epoch": 9.01, + "learning_rate": 6.014430379746836e-06, + "loss": 0.0245, + "step": 16250 + }, + { + "epoch": 9.01, + "learning_rate": 6.008101265822786e-06, + "loss": 0.0202, + "step": 16275 + }, + { + "epoch": 9.01, + "learning_rate": 6.001772151898735e-06, + "loss": 0.0184, + "step": 16300 + }, + { + "epoch": 9.01, + "learning_rate": 5.995443037974685e-06, + "loss": 0.0158, + "step": 16325 + }, + { + "epoch": 9.01, + "learning_rate": 5.989113924050633e-06, + "loss": 0.0175, + "step": 16350 + }, + { + "epoch": 9.02, + "learning_rate": 5.982784810126583e-06, + "loss": 0.014, + "step": 16375 + }, + { + "epoch": 9.02, + "learning_rate": 5.976455696202532e-06, + "loss": 0.0149, + "step": 16400 + }, + { + "epoch": 9.02, + "learning_rate": 5.9701265822784815e-06, + "loss": 0.0188, + "step": 16425 + }, + { + "epoch": 9.02, + "learning_rate": 5.963797468354431e-06, + "loss": 0.0215, + "step": 16450 + }, + { + "epoch": 9.02, + "learning_rate": 5.95746835443038e-06, + "loss": 0.017, + "step": 16475 + }, + { + "epoch": 9.02, + "learning_rate": 5.95113924050633e-06, + "loss": 0.0095, + "step": 16500 + }, + { + "epoch": 9.02, + "learning_rate": 5.944810126582279e-06, + "loss": 0.0112, + "step": 16525 + }, + { + "epoch": 9.02, + "learning_rate": 5.938481012658229e-06, + "loss": 0.0198, + "step": 16550 + }, + { + "epoch": 9.02, + "learning_rate": 5.932151898734177e-06, + "loss": 0.0252, + "step": 16575 + }, + { + "epoch": 9.02, + "learning_rate": 5.925822784810127e-06, + "loss": 0.0226, + "step": 16600 + }, + { + "epoch": 9.02, + "learning_rate": 5.919493670886076e-06, + "loss": 0.0167, + "step": 16625 + }, + { + "epoch": 9.02, + "learning_rate": 5.9131645569620255e-06, + "loss": 0.0259, + "step": 16650 + }, + { + "epoch": 9.02, + "learning_rate": 5.906835443037975e-06, + "loss": 0.0229, + "step": 16675 + }, + { + "epoch": 9.02, + "learning_rate": 5.900506329113924e-06, + "loss": 0.0238, + "step": 16700 + }, + { + "epoch": 9.02, + "learning_rate": 5.894177215189874e-06, + "loss": 0.02, + "step": 16725 + }, + { + "epoch": 9.02, + "learning_rate": 5.887848101265823e-06, + "loss": 0.0182, + "step": 16750 + }, + { + "epoch": 9.03, + "learning_rate": 5.881518987341773e-06, + "loss": 0.0157, + "step": 16775 + }, + { + "epoch": 9.03, + "learning_rate": 5.875189873417721e-06, + "loss": 0.0145, + "step": 16800 + }, + { + "epoch": 9.03, + "learning_rate": 5.868860759493671e-06, + "loss": 0.0166, + "step": 16825 + }, + { + "epoch": 9.03, + "learning_rate": 5.86253164556962e-06, + "loss": 0.0108, + "step": 16850 + }, + { + "epoch": 9.03, + "learning_rate": 5.85620253164557e-06, + "loss": 0.014, + "step": 16875 + }, + { + "epoch": 9.03, + "learning_rate": 5.84987341772152e-06, + "loss": 0.0167, + "step": 16900 + }, + { + "epoch": 9.03, + "learning_rate": 5.843544303797469e-06, + "loss": 0.0121, + "step": 16925 + }, + { + "epoch": 9.03, + "learning_rate": 5.837215189873419e-06, + "loss": 0.0149, + "step": 16950 + }, + { + "epoch": 9.03, + "learning_rate": 5.831139240506329e-06, + "loss": 0.0143, + "step": 16975 + }, + { + "epoch": 9.03, + "learning_rate": 5.824810126582279e-06, + "loss": 0.0139, + "step": 17000 + }, + { + "epoch": 9.03, + "eval_loss": 0.2688957154750824, + "eval_runtime": 1423.1196, + "eval_samples_per_second": 7.316, + "eval_steps_per_second": 0.457, + "eval_wer": 25.920746678807788, + "step": 17000 + }, + { + "epoch": 9.03, + "learning_rate": 5.818481012658228e-06, + "loss": 0.0169, + "step": 17025 + }, + { + "epoch": 9.03, + "learning_rate": 5.8121518987341775e-06, + "loss": 0.013, + "step": 17050 + }, + { + "epoch": 9.03, + "learning_rate": 5.805822784810127e-06, + "loss": 0.0123, + "step": 17075 + }, + { + "epoch": 9.03, + "learning_rate": 5.799493670886076e-06, + "loss": 0.0161, + "step": 17100 + }, + { + "epoch": 9.03, + "learning_rate": 5.793164556962026e-06, + "loss": 0.0114, + "step": 17125 + }, + { + "epoch": 9.03, + "learning_rate": 5.786835443037974e-06, + "loss": 0.0194, + "step": 17150 + }, + { + "epoch": 9.04, + "learning_rate": 5.780506329113924e-06, + "loss": 0.0179, + "step": 17175 + }, + { + "epoch": 9.04, + "learning_rate": 5.774177215189873e-06, + "loss": 0.0165, + "step": 17200 + }, + { + "epoch": 9.04, + "learning_rate": 5.767848101265823e-06, + "loss": 0.0165, + "step": 17225 + }, + { + "epoch": 9.04, + "learning_rate": 5.761518987341773e-06, + "loss": 0.0125, + "step": 17250 + }, + { + "epoch": 9.04, + "learning_rate": 5.755189873417722e-06, + "loss": 0.0172, + "step": 17275 + }, + { + "epoch": 9.04, + "learning_rate": 5.748860759493672e-06, + "loss": 0.0158, + "step": 17300 + }, + { + "epoch": 9.04, + "learning_rate": 5.742531645569621e-06, + "loss": 0.0174, + "step": 17325 + }, + { + "epoch": 9.04, + "learning_rate": 5.736202531645571e-06, + "loss": 0.0162, + "step": 17350 + }, + { + "epoch": 9.04, + "learning_rate": 5.72987341772152e-06, + "loss": 0.0203, + "step": 17375 + }, + { + "epoch": 9.04, + "learning_rate": 5.7235443037974695e-06, + "loss": 0.0157, + "step": 17400 + }, + { + "epoch": 9.04, + "learning_rate": 5.717215189873419e-06, + "loss": 0.018, + "step": 17425 + }, + { + "epoch": 9.04, + "learning_rate": 5.7108860759493675e-06, + "loss": 0.0199, + "step": 17450 + }, + { + "epoch": 9.04, + "learning_rate": 5.704556962025317e-06, + "loss": 0.0153, + "step": 17475 + }, + { + "epoch": 9.04, + "learning_rate": 5.698227848101266e-06, + "loss": 0.0234, + "step": 17500 + }, + { + "epoch": 10.0, + "learning_rate": 5.691898734177216e-06, + "loss": 0.0168, + "step": 17525 + }, + { + "epoch": 10.0, + "learning_rate": 5.685569620253165e-06, + "loss": 0.0214, + "step": 17550 + }, + { + "epoch": 10.0, + "learning_rate": 5.679240506329115e-06, + "loss": 0.0185, + "step": 17575 + }, + { + "epoch": 10.0, + "learning_rate": 5.672911392405064e-06, + "loss": 0.0228, + "step": 17600 + }, + { + "epoch": 10.0, + "learning_rate": 5.6665822784810135e-06, + "loss": 0.024, + "step": 17625 + }, + { + "epoch": 10.0, + "learning_rate": 5.660253164556963e-06, + "loss": 0.0235, + "step": 17650 + }, + { + "epoch": 10.0, + "learning_rate": 5.6539240506329115e-06, + "loss": 0.0262, + "step": 17675 + }, + { + "epoch": 10.0, + "learning_rate": 5.647594936708861e-06, + "loss": 0.0214, + "step": 17700 + }, + { + "epoch": 10.01, + "learning_rate": 5.64126582278481e-06, + "loss": 0.0279, + "step": 17725 + }, + { + "epoch": 10.01, + "learning_rate": 5.63493670886076e-06, + "loss": 0.0248, + "step": 17750 + }, + { + "epoch": 10.01, + "learning_rate": 5.628607594936709e-06, + "loss": 0.0155, + "step": 17775 + }, + { + "epoch": 10.01, + "learning_rate": 5.622278481012659e-06, + "loss": 0.017, + "step": 17800 + }, + { + "epoch": 10.01, + "learning_rate": 5.615949367088608e-06, + "loss": 0.0205, + "step": 17825 + }, + { + "epoch": 10.01, + "learning_rate": 5.6096202531645575e-06, + "loss": 0.021, + "step": 17850 + }, + { + "epoch": 10.01, + "learning_rate": 5.603291139240507e-06, + "loss": 0.0178, + "step": 17875 + }, + { + "epoch": 10.01, + "learning_rate": 5.5969620253164555e-06, + "loss": 0.0169, + "step": 17900 + }, + { + "epoch": 10.01, + "learning_rate": 5.590632911392405e-06, + "loss": 0.0158, + "step": 17925 + }, + { + "epoch": 10.01, + "learning_rate": 5.584303797468354e-06, + "loss": 0.0195, + "step": 17950 + }, + { + "epoch": 10.01, + "learning_rate": 5.577974683544304e-06, + "loss": 0.0195, + "step": 17975 + }, + { + "epoch": 10.01, + "learning_rate": 5.571645569620253e-06, + "loss": 0.0187, + "step": 18000 + }, + { + "epoch": 10.01, + "eval_loss": 0.2605174481868744, + "eval_runtime": 1553.7854, + "eval_samples_per_second": 6.7, + "eval_steps_per_second": 0.419, + "eval_wer": 25.330493178750874, + "step": 18000 + }, + { + "epoch": 10.01, + "learning_rate": 5.565316455696203e-06, + "loss": 0.0214, + "step": 18025 + }, + { + "epoch": 10.01, + "learning_rate": 5.558987341772152e-06, + "loss": 0.0193, + "step": 18050 + }, + { + "epoch": 10.01, + "learning_rate": 5.552658227848102e-06, + "loss": 0.0168, + "step": 18075 + }, + { + "epoch": 10.01, + "learning_rate": 5.546329113924052e-06, + "loss": 0.0147, + "step": 18100 + }, + { + "epoch": 10.02, + "learning_rate": 5.540000000000001e-06, + "loss": 0.0192, + "step": 18125 + }, + { + "epoch": 10.02, + "learning_rate": 5.533670886075951e-06, + "loss": 0.014, + "step": 18150 + }, + { + "epoch": 10.02, + "learning_rate": 5.527341772151899e-06, + "loss": 0.0163, + "step": 18175 + }, + { + "epoch": 10.02, + "learning_rate": 5.521012658227849e-06, + "loss": 0.0139, + "step": 18200 + }, + { + "epoch": 10.02, + "learning_rate": 5.514683544303798e-06, + "loss": 0.0168, + "step": 18225 + }, + { + "epoch": 10.02, + "learning_rate": 5.5083544303797475e-06, + "loss": 0.0118, + "step": 18250 + }, + { + "epoch": 10.02, + "learning_rate": 5.502025316455697e-06, + "loss": 0.0119, + "step": 18275 + }, + { + "epoch": 10.02, + "learning_rate": 5.495696202531646e-06, + "loss": 0.0136, + "step": 18300 + }, + { + "epoch": 10.02, + "learning_rate": 5.489367088607596e-06, + "loss": 0.0132, + "step": 18325 + }, + { + "epoch": 10.02, + "learning_rate": 5.483037974683545e-06, + "loss": 0.0148, + "step": 18350 + }, + { + "epoch": 10.02, + "learning_rate": 5.476708860759495e-06, + "loss": 0.015, + "step": 18375 + }, + { + "epoch": 10.02, + "learning_rate": 5.470379746835443e-06, + "loss": 0.0154, + "step": 18400 + }, + { + "epoch": 10.02, + "learning_rate": 5.464050632911393e-06, + "loss": 0.0139, + "step": 18425 + }, + { + "epoch": 10.02, + "learning_rate": 5.457721518987342e-06, + "loss": 0.0239, + "step": 18450 + }, + { + "epoch": 10.02, + "learning_rate": 5.4513924050632915e-06, + "loss": 0.0203, + "step": 18475 + }, + { + "epoch": 10.02, + "learning_rate": 5.445063291139241e-06, + "loss": 0.0199, + "step": 18500 + }, + { + "epoch": 10.03, + "learning_rate": 5.43873417721519e-06, + "loss": 0.013, + "step": 18525 + }, + { + "epoch": 10.03, + "learning_rate": 5.43240506329114e-06, + "loss": 0.0128, + "step": 18550 + }, + { + "epoch": 10.03, + "learning_rate": 5.426075949367089e-06, + "loss": 0.0161, + "step": 18575 + }, + { + "epoch": 10.03, + "learning_rate": 5.419746835443039e-06, + "loss": 0.0129, + "step": 18600 + }, + { + "epoch": 10.03, + "learning_rate": 5.413417721518987e-06, + "loss": 0.0124, + "step": 18625 + }, + { + "epoch": 10.03, + "learning_rate": 5.407088607594937e-06, + "loss": 0.0161, + "step": 18650 + }, + { + "epoch": 10.03, + "learning_rate": 5.400759493670886e-06, + "loss": 0.0104, + "step": 18675 + }, + { + "epoch": 10.03, + "learning_rate": 5.3944303797468355e-06, + "loss": 0.0109, + "step": 18700 + }, + { + "epoch": 10.03, + "learning_rate": 5.388101265822785e-06, + "loss": 0.0128, + "step": 18725 + }, + { + "epoch": 10.03, + "learning_rate": 5.381772151898734e-06, + "loss": 0.014, + "step": 18750 + }, + { + "epoch": 10.03, + "learning_rate": 5.375443037974684e-06, + "loss": 0.0159, + "step": 18775 + }, + { + "epoch": 10.03, + "learning_rate": 5.369113924050633e-06, + "loss": 0.017, + "step": 18800 + }, + { + "epoch": 10.03, + "learning_rate": 5.362784810126583e-06, + "loss": 0.012, + "step": 18825 + }, + { + "epoch": 10.03, + "learning_rate": 5.356455696202531e-06, + "loss": 0.0165, + "step": 18850 + }, + { + "epoch": 10.03, + "learning_rate": 5.350126582278481e-06, + "loss": 0.0153, + "step": 18875 + }, + { + "epoch": 10.03, + "learning_rate": 5.343797468354432e-06, + "loss": 0.0182, + "step": 18900 + }, + { + "epoch": 10.04, + "learning_rate": 5.33746835443038e-06, + "loss": 0.0194, + "step": 18925 + }, + { + "epoch": 10.04, + "learning_rate": 5.33113924050633e-06, + "loss": 0.0144, + "step": 18950 + }, + { + "epoch": 10.04, + "learning_rate": 5.324810126582279e-06, + "loss": 0.0157, + "step": 18975 + }, + { + "epoch": 10.04, + "learning_rate": 5.318481012658229e-06, + "loss": 0.0111, + "step": 19000 + }, + { + "epoch": 10.04, + "eval_loss": 0.2696678638458252, + "eval_runtime": 1463.5986, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.445, + "eval_wer": 24.79877721588969, + "step": 19000 + }, + { + "epoch": 10.04, + "learning_rate": 5.312151898734178e-06, + "loss": 0.0176, + "step": 19025 + }, + { + "epoch": 10.04, + "learning_rate": 5.3058227848101275e-06, + "loss": 0.0144, + "step": 19050 + }, + { + "epoch": 10.04, + "learning_rate": 5.299493670886077e-06, + "loss": 0.0129, + "step": 19075 + }, + { + "epoch": 10.04, + "learning_rate": 5.293164556962026e-06, + "loss": 0.0162, + "step": 19100 + }, + { + "epoch": 10.04, + "learning_rate": 5.286835443037976e-06, + "loss": 0.0128, + "step": 19125 + }, + { + "epoch": 10.04, + "learning_rate": 5.280506329113924e-06, + "loss": 0.0152, + "step": 19150 + }, + { + "epoch": 10.04, + "learning_rate": 5.274177215189874e-06, + "loss": 0.0183, + "step": 19175 + }, + { + "epoch": 10.04, + "learning_rate": 5.267848101265823e-06, + "loss": 0.0198, + "step": 19200 + }, + { + "epoch": 10.04, + "learning_rate": 5.261518987341773e-06, + "loss": 0.0179, + "step": 19225 + }, + { + "epoch": 10.04, + "learning_rate": 5.255443037974683e-06, + "loss": 0.019, + "step": 19250 + }, + { + "epoch": 11.0, + "learning_rate": 5.249113924050633e-06, + "loss": 0.0159, + "step": 19275 + }, + { + "epoch": 11.0, + "learning_rate": 5.242784810126583e-06, + "loss": 0.0156, + "step": 19300 + }, + { + "epoch": 11.0, + "learning_rate": 5.236455696202532e-06, + "loss": 0.015, + "step": 19325 + }, + { + "epoch": 11.0, + "learning_rate": 5.230126582278482e-06, + "loss": 0.0162, + "step": 19350 + }, + { + "epoch": 11.0, + "learning_rate": 5.223797468354431e-06, + "loss": 0.0262, + "step": 19375 + }, + { + "epoch": 11.0, + "learning_rate": 5.217468354430381e-06, + "loss": 0.0207, + "step": 19400 + }, + { + "epoch": 11.0, + "learning_rate": 5.21113924050633e-06, + "loss": 0.0222, + "step": 19425 + }, + { + "epoch": 11.0, + "learning_rate": 5.2048101265822795e-06, + "loss": 0.016, + "step": 19450 + }, + { + "epoch": 11.01, + "learning_rate": 5.198481012658229e-06, + "loss": 0.0176, + "step": 19475 + }, + { + "epoch": 11.01, + "learning_rate": 5.192151898734178e-06, + "loss": 0.0209, + "step": 19500 + }, + { + "epoch": 11.01, + "learning_rate": 5.185822784810127e-06, + "loss": 0.0212, + "step": 19525 + }, + { + "epoch": 11.01, + "learning_rate": 5.179493670886076e-06, + "loss": 0.0143, + "step": 19550 + }, + { + "epoch": 11.01, + "learning_rate": 5.173164556962026e-06, + "loss": 0.0224, + "step": 19575 + }, + { + "epoch": 11.01, + "learning_rate": 5.166835443037975e-06, + "loss": 0.0171, + "step": 19600 + }, + { + "epoch": 11.01, + "learning_rate": 5.160506329113925e-06, + "loss": 0.0166, + "step": 19625 + }, + { + "epoch": 11.01, + "learning_rate": 5.154177215189874e-06, + "loss": 0.0135, + "step": 19650 + }, + { + "epoch": 11.01, + "learning_rate": 5.1478481012658235e-06, + "loss": 0.0164, + "step": 19675 + }, + { + "epoch": 11.01, + "learning_rate": 5.141518987341773e-06, + "loss": 0.0154, + "step": 19700 + }, + { + "epoch": 11.01, + "learning_rate": 5.135189873417722e-06, + "loss": 0.0183, + "step": 19725 + }, + { + "epoch": 11.01, + "learning_rate": 5.128860759493671e-06, + "loss": 0.0149, + "step": 19750 + }, + { + "epoch": 11.01, + "learning_rate": 5.12253164556962e-06, + "loss": 0.0176, + "step": 19775 + }, + { + "epoch": 11.01, + "learning_rate": 5.11620253164557e-06, + "loss": 0.0135, + "step": 19800 + }, + { + "epoch": 11.01, + "learning_rate": 5.109873417721519e-06, + "loss": 0.0121, + "step": 19825 + }, + { + "epoch": 11.01, + "learning_rate": 5.103544303797469e-06, + "loss": 0.0158, + "step": 19850 + }, + { + "epoch": 11.02, + "learning_rate": 5.097215189873418e-06, + "loss": 0.0191, + "step": 19875 + }, + { + "epoch": 11.02, + "learning_rate": 5.0908860759493675e-06, + "loss": 0.0127, + "step": 19900 + }, + { + "epoch": 11.02, + "learning_rate": 5.084556962025317e-06, + "loss": 0.0149, + "step": 19925 + }, + { + "epoch": 11.02, + "learning_rate": 5.0782278481012655e-06, + "loss": 0.0152, + "step": 19950 + }, + { + "epoch": 11.02, + "learning_rate": 5.071898734177215e-06, + "loss": 0.0139, + "step": 19975 + }, + { + "epoch": 11.02, + "learning_rate": 5.065569620253164e-06, + "loss": 0.015, + "step": 20000 + }, + { + "epoch": 11.02, + "eval_loss": 0.2623814344406128, + "eval_runtime": 1462.1621, + "eval_samples_per_second": 7.12, + "eval_steps_per_second": 0.445, + "eval_wer": 24.392266540919362, + "step": 20000 + }, + { + "epoch": 11.02, + "learning_rate": 5.059240506329114e-06, + "loss": 0.0094, + "step": 20025 + }, + { + "epoch": 11.02, + "learning_rate": 5.052911392405063e-06, + "loss": 0.0108, + "step": 20050 + }, + { + "epoch": 11.02, + "learning_rate": 5.046582278481013e-06, + "loss": 0.0155, + "step": 20075 + }, + { + "epoch": 11.02, + "learning_rate": 5.040253164556962e-06, + "loss": 0.0103, + "step": 20100 + }, + { + "epoch": 11.02, + "learning_rate": 5.033924050632912e-06, + "loss": 0.0109, + "step": 20125 + }, + { + "epoch": 11.02, + "learning_rate": 5.027594936708862e-06, + "loss": 0.0188, + "step": 20150 + }, + { + "epoch": 11.02, + "learning_rate": 5.021265822784811e-06, + "loss": 0.0178, + "step": 20175 + }, + { + "epoch": 11.02, + "learning_rate": 5.014936708860761e-06, + "loss": 0.0262, + "step": 20200 + }, + { + "epoch": 11.02, + "learning_rate": 5.00860759493671e-06, + "loss": 0.0194, + "step": 20225 + }, + { + "epoch": 11.02, + "learning_rate": 5.002278481012659e-06, + "loss": 0.0107, + "step": 20250 + }, + { + "epoch": 11.03, + "learning_rate": 4.995949367088608e-06, + "loss": 0.0147, + "step": 20275 + }, + { + "epoch": 11.03, + "learning_rate": 4.9896202531645575e-06, + "loss": 0.0084, + "step": 20300 + }, + { + "epoch": 11.03, + "learning_rate": 4.983291139240507e-06, + "loss": 0.0214, + "step": 20325 + }, + { + "epoch": 11.03, + "learning_rate": 4.976962025316456e-06, + "loss": 0.014, + "step": 20350 + }, + { + "epoch": 11.03, + "learning_rate": 4.970632911392406e-06, + "loss": 0.0129, + "step": 20375 + }, + { + "epoch": 11.03, + "learning_rate": 4.964303797468355e-06, + "loss": 0.0145, + "step": 20400 + }, + { + "epoch": 11.03, + "learning_rate": 4.957974683544305e-06, + "loss": 0.0109, + "step": 20425 + }, + { + "epoch": 11.03, + "learning_rate": 4.951645569620254e-06, + "loss": 0.0117, + "step": 20450 + }, + { + "epoch": 11.03, + "learning_rate": 4.945316455696203e-06, + "loss": 0.0137, + "step": 20475 + }, + { + "epoch": 11.03, + "learning_rate": 4.938987341772152e-06, + "loss": 0.0086, + "step": 20500 + }, + { + "epoch": 11.03, + "learning_rate": 4.9326582278481015e-06, + "loss": 0.0128, + "step": 20525 + }, + { + "epoch": 11.03, + "learning_rate": 4.926329113924051e-06, + "loss": 0.0101, + "step": 20550 + }, + { + "epoch": 11.03, + "learning_rate": 4.92e-06, + "loss": 0.0132, + "step": 20575 + }, + { + "epoch": 11.03, + "learning_rate": 4.91367088607595e-06, + "loss": 0.0147, + "step": 20600 + }, + { + "epoch": 11.03, + "learning_rate": 4.907341772151899e-06, + "loss": 0.0077, + "step": 20625 + }, + { + "epoch": 11.03, + "learning_rate": 4.901012658227849e-06, + "loss": 0.0125, + "step": 20650 + }, + { + "epoch": 11.04, + "learning_rate": 4.894683544303798e-06, + "loss": 0.0115, + "step": 20675 + }, + { + "epoch": 11.04, + "learning_rate": 4.888354430379747e-06, + "loss": 0.0136, + "step": 20700 + }, + { + "epoch": 11.04, + "learning_rate": 4.882025316455696e-06, + "loss": 0.0098, + "step": 20725 + }, + { + "epoch": 11.04, + "learning_rate": 4.875696202531646e-06, + "loss": 0.0154, + "step": 20750 + }, + { + "epoch": 11.04, + "learning_rate": 4.869367088607596e-06, + "loss": 0.0126, + "step": 20775 + }, + { + "epoch": 11.04, + "learning_rate": 4.863037974683545e-06, + "loss": 0.0116, + "step": 20800 + }, + { + "epoch": 11.04, + "learning_rate": 4.856708860759495e-06, + "loss": 0.0133, + "step": 20825 + }, + { + "epoch": 11.04, + "learning_rate": 4.850379746835443e-06, + "loss": 0.0106, + "step": 20850 + }, + { + "epoch": 11.04, + "learning_rate": 4.844050632911393e-06, + "loss": 0.0151, + "step": 20875 + }, + { + "epoch": 11.04, + "learning_rate": 4.837721518987342e-06, + "loss": 0.0148, + "step": 20900 + }, + { + "epoch": 11.04, + "learning_rate": 4.8313924050632915e-06, + "loss": 0.0173, + "step": 20925 + }, + { + "epoch": 11.04, + "learning_rate": 4.825063291139241e-06, + "loss": 0.0126, + "step": 20950 + }, + { + "epoch": 11.04, + "learning_rate": 4.81873417721519e-06, + "loss": 0.0163, + "step": 20975 + }, + { + "epoch": 11.04, + "learning_rate": 4.81240506329114e-06, + "loss": 0.0203, + "step": 21000 + }, + { + "epoch": 11.04, + "eval_loss": 0.27788785099983215, + "eval_runtime": 1546.852, + "eval_samples_per_second": 6.73, + "eval_steps_per_second": 0.421, + "eval_wer": 25.284963983154196, + "step": 21000 + }, + { + "epoch": 12.0, + "learning_rate": 4.806075949367089e-06, + "loss": 0.0176, + "step": 21025 + }, + { + "epoch": 12.0, + "learning_rate": 4.799746835443039e-06, + "loss": 0.0157, + "step": 21050 + }, + { + "epoch": 12.0, + "learning_rate": 4.793417721518987e-06, + "loss": 0.0192, + "step": 21075 + }, + { + "epoch": 12.0, + "learning_rate": 4.787088607594937e-06, + "loss": 0.0124, + "step": 21100 + }, + { + "epoch": 12.0, + "learning_rate": 4.780759493670886e-06, + "loss": 0.0194, + "step": 21125 + }, + { + "epoch": 12.0, + "learning_rate": 4.774430379746836e-06, + "loss": 0.0138, + "step": 21150 + }, + { + "epoch": 12.0, + "learning_rate": 4.768101265822786e-06, + "loss": 0.0191, + "step": 21175 + }, + { + "epoch": 12.0, + "learning_rate": 4.761772151898735e-06, + "loss": 0.0124, + "step": 21200 + }, + { + "epoch": 12.01, + "learning_rate": 4.755443037974684e-06, + "loss": 0.0175, + "step": 21225 + }, + { + "epoch": 12.01, + "learning_rate": 4.749113924050633e-06, + "loss": 0.0118, + "step": 21250 + }, + { + "epoch": 12.01, + "learning_rate": 4.742784810126583e-06, + "loss": 0.0172, + "step": 21275 + }, + { + "epoch": 12.01, + "learning_rate": 4.736455696202532e-06, + "loss": 0.0136, + "step": 21300 + }, + { + "epoch": 12.01, + "learning_rate": 4.7301265822784815e-06, + "loss": 0.018, + "step": 21325 + }, + { + "epoch": 12.01, + "learning_rate": 4.723797468354431e-06, + "loss": 0.0184, + "step": 21350 + }, + { + "epoch": 12.01, + "learning_rate": 4.71746835443038e-06, + "loss": 0.0166, + "step": 21375 + }, + { + "epoch": 12.01, + "learning_rate": 4.71113924050633e-06, + "loss": 0.0107, + "step": 21400 + }, + { + "epoch": 12.01, + "learning_rate": 4.704810126582279e-06, + "loss": 0.0125, + "step": 21425 + }, + { + "epoch": 12.01, + "learning_rate": 4.698481012658228e-06, + "loss": 0.0098, + "step": 21450 + }, + { + "epoch": 12.01, + "learning_rate": 4.692151898734177e-06, + "loss": 0.0103, + "step": 21475 + }, + { + "epoch": 12.01, + "learning_rate": 4.685822784810127e-06, + "loss": 0.0211, + "step": 21500 + }, + { + "epoch": 12.01, + "learning_rate": 4.679493670886076e-06, + "loss": 0.0125, + "step": 21525 + }, + { + "epoch": 12.01, + "learning_rate": 4.6731645569620255e-06, + "loss": 0.0083, + "step": 21550 + }, + { + "epoch": 12.01, + "learning_rate": 4.666835443037975e-06, + "loss": 0.0156, + "step": 21575 + }, + { + "epoch": 12.01, + "learning_rate": 4.660506329113924e-06, + "loss": 0.0137, + "step": 21600 + }, + { + "epoch": 12.02, + "learning_rate": 4.654177215189874e-06, + "loss": 0.0137, + "step": 21625 + }, + { + "epoch": 12.02, + "learning_rate": 4.647848101265823e-06, + "loss": 0.0092, + "step": 21650 + }, + { + "epoch": 12.02, + "learning_rate": 4.641518987341773e-06, + "loss": 0.0101, + "step": 21675 + }, + { + "epoch": 12.02, + "learning_rate": 4.635189873417722e-06, + "loss": 0.0118, + "step": 21700 + }, + { + "epoch": 12.02, + "learning_rate": 4.6288607594936715e-06, + "loss": 0.0119, + "step": 21725 + }, + { + "epoch": 12.02, + "learning_rate": 4.622531645569621e-06, + "loss": 0.009, + "step": 21750 + }, + { + "epoch": 12.02, + "learning_rate": 4.61620253164557e-06, + "loss": 0.014, + "step": 21775 + }, + { + "epoch": 12.02, + "learning_rate": 4.609873417721519e-06, + "loss": 0.0108, + "step": 21800 + }, + { + "epoch": 12.02, + "learning_rate": 4.603544303797468e-06, + "loss": 0.0117, + "step": 21825 + }, + { + "epoch": 12.02, + "learning_rate": 4.597215189873418e-06, + "loss": 0.0147, + "step": 21850 + }, + { + "epoch": 12.02, + "learning_rate": 4.590886075949367e-06, + "loss": 0.0133, + "step": 21875 + }, + { + "epoch": 12.02, + "learning_rate": 4.584556962025317e-06, + "loss": 0.0131, + "step": 21900 + }, + { + "epoch": 12.02, + "learning_rate": 4.578227848101266e-06, + "loss": 0.0156, + "step": 21925 + }, + { + "epoch": 12.02, + "learning_rate": 4.5718987341772155e-06, + "loss": 0.0211, + "step": 21950 + }, + { + "epoch": 12.02, + "learning_rate": 4.565569620253165e-06, + "loss": 0.0127, + "step": 21975 + }, + { + "epoch": 12.02, + "learning_rate": 4.559240506329114e-06, + "loss": 0.011, + "step": 22000 + }, + { + "epoch": 12.02, + "eval_loss": 0.26152506470680237, + "eval_runtime": 1497.9019, + "eval_samples_per_second": 6.95, + "eval_steps_per_second": 0.435, + "eval_wer": 26.236198962584755, + "step": 22000 + }, + { + "epoch": 12.03, + "learning_rate": 4.552911392405064e-06, + "loss": 0.0164, + "step": 22025 + }, + { + "epoch": 12.03, + "learning_rate": 4.546582278481013e-06, + "loss": 0.01, + "step": 22050 + }, + { + "epoch": 12.03, + "learning_rate": 4.540253164556963e-06, + "loss": 0.0127, + "step": 22075 + }, + { + "epoch": 12.03, + "learning_rate": 4.533924050632912e-06, + "loss": 0.0089, + "step": 22100 + }, + { + "epoch": 12.03, + "learning_rate": 4.5275949367088615e-06, + "loss": 0.0077, + "step": 22125 + }, + { + "epoch": 12.03, + "learning_rate": 4.521265822784811e-06, + "loss": 0.0098, + "step": 22150 + }, + { + "epoch": 12.03, + "learning_rate": 4.5149367088607595e-06, + "loss": 0.0084, + "step": 22175 + }, + { + "epoch": 12.03, + "learning_rate": 4.508607594936709e-06, + "loss": 0.0096, + "step": 22200 + }, + { + "epoch": 12.03, + "learning_rate": 4.502278481012658e-06, + "loss": 0.0097, + "step": 22225 + }, + { + "epoch": 12.03, + "learning_rate": 4.495949367088608e-06, + "loss": 0.0114, + "step": 22250 + }, + { + "epoch": 12.03, + "learning_rate": 4.489620253164557e-06, + "loss": 0.0109, + "step": 22275 + }, + { + "epoch": 12.03, + "learning_rate": 4.483291139240507e-06, + "loss": 0.0096, + "step": 22300 + }, + { + "epoch": 12.03, + "learning_rate": 4.476962025316456e-06, + "loss": 0.0117, + "step": 22325 + }, + { + "epoch": 12.03, + "learning_rate": 4.4706329113924055e-06, + "loss": 0.0105, + "step": 22350 + }, + { + "epoch": 12.03, + "learning_rate": 4.464303797468355e-06, + "loss": 0.0103, + "step": 22375 + }, + { + "epoch": 12.03, + "learning_rate": 4.457974683544304e-06, + "loss": 0.0085, + "step": 22400 + }, + { + "epoch": 12.04, + "learning_rate": 4.451645569620254e-06, + "loss": 0.0129, + "step": 22425 + }, + { + "epoch": 12.04, + "learning_rate": 4.445316455696203e-06, + "loss": 0.0188, + "step": 22450 + }, + { + "epoch": 12.04, + "learning_rate": 4.438987341772153e-06, + "loss": 0.013, + "step": 22475 + }, + { + "epoch": 12.04, + "learning_rate": 4.432658227848102e-06, + "loss": 0.0095, + "step": 22500 + }, + { + "epoch": 12.04, + "learning_rate": 4.4263291139240515e-06, + "loss": 0.0111, + "step": 22525 + }, + { + "epoch": 12.04, + "learning_rate": 4.42e-06, + "loss": 0.0115, + "step": 22550 + }, + { + "epoch": 12.04, + "learning_rate": 4.4136708860759495e-06, + "loss": 0.0096, + "step": 22575 + }, + { + "epoch": 12.04, + "learning_rate": 4.407341772151899e-06, + "loss": 0.0108, + "step": 22600 + }, + { + "epoch": 12.04, + "learning_rate": 4.401012658227848e-06, + "loss": 0.0118, + "step": 22625 + }, + { + "epoch": 12.04, + "learning_rate": 4.394683544303798e-06, + "loss": 0.0136, + "step": 22650 + }, + { + "epoch": 12.04, + "learning_rate": 4.388354430379747e-06, + "loss": 0.0176, + "step": 22675 + }, + { + "epoch": 12.04, + "learning_rate": 4.382025316455697e-06, + "loss": 0.011, + "step": 22700 + }, + { + "epoch": 12.04, + "learning_rate": 4.375696202531646e-06, + "loss": 0.0125, + "step": 22725 + }, + { + "epoch": 12.04, + "learning_rate": 4.3693670886075955e-06, + "loss": 0.0164, + "step": 22750 + }, + { + "epoch": 13.0, + "learning_rate": 4.363037974683544e-06, + "loss": 0.0192, + "step": 22775 + }, + { + "epoch": 13.0, + "learning_rate": 4.356708860759494e-06, + "loss": 0.0145, + "step": 22800 + }, + { + "epoch": 13.0, + "learning_rate": 4.350379746835444e-06, + "loss": 0.0174, + "step": 22825 + }, + { + "epoch": 13.0, + "learning_rate": 4.344050632911393e-06, + "loss": 0.0142, + "step": 22850 + }, + { + "epoch": 13.0, + "learning_rate": 4.337721518987343e-06, + "loss": 0.0228, + "step": 22875 + }, + { + "epoch": 13.0, + "learning_rate": 4.331392405063292e-06, + "loss": 0.0201, + "step": 22900 + }, + { + "epoch": 13.0, + "learning_rate": 4.325063291139241e-06, + "loss": 0.0188, + "step": 22925 + }, + { + "epoch": 13.0, + "learning_rate": 4.31873417721519e-06, + "loss": 0.0185, + "step": 22950 + }, + { + "epoch": 13.01, + "learning_rate": 4.3124050632911395e-06, + "loss": 0.0186, + "step": 22975 + }, + { + "epoch": 13.01, + "learning_rate": 4.306075949367089e-06, + "loss": 0.0191, + "step": 23000 + }, + { + "epoch": 13.01, + "eval_loss": 0.25467297434806824, + "eval_runtime": 1486.1182, + "eval_samples_per_second": 7.005, + "eval_steps_per_second": 0.438, + "eval_wer": 23.982503780549276, + "step": 23000 + }, + { + "epoch": 13.01, + "learning_rate": 4.299746835443038e-06, + "loss": 0.0145, + "step": 23025 + }, + { + "epoch": 13.01, + "learning_rate": 4.293417721518988e-06, + "loss": 0.0106, + "step": 23050 + }, + { + "epoch": 13.01, + "learning_rate": 4.287088607594937e-06, + "loss": 0.0135, + "step": 23075 + }, + { + "epoch": 13.01, + "learning_rate": 4.280759493670887e-06, + "loss": 0.0173, + "step": 23100 + }, + { + "epoch": 13.01, + "learning_rate": 4.274430379746836e-06, + "loss": 0.0126, + "step": 23125 + }, + { + "epoch": 13.01, + "learning_rate": 4.268101265822785e-06, + "loss": 0.0166, + "step": 23150 + }, + { + "epoch": 13.01, + "learning_rate": 4.261772151898734e-06, + "loss": 0.0097, + "step": 23175 + }, + { + "epoch": 13.01, + "learning_rate": 4.255443037974684e-06, + "loss": 0.0094, + "step": 23200 + }, + { + "epoch": 13.01, + "learning_rate": 4.249113924050634e-06, + "loss": 0.012, + "step": 23225 + }, + { + "epoch": 13.01, + "learning_rate": 4.242784810126583e-06, + "loss": 0.0153, + "step": 23250 + }, + { + "epoch": 13.01, + "learning_rate": 4.236455696202532e-06, + "loss": 0.0169, + "step": 23275 + }, + { + "epoch": 13.01, + "learning_rate": 4.230379746835443e-06, + "loss": 0.0097, + "step": 23300 + }, + { + "epoch": 13.01, + "learning_rate": 4.224050632911393e-06, + "loss": 0.0098, + "step": 23325 + }, + { + "epoch": 13.01, + "learning_rate": 4.217721518987342e-06, + "loss": 0.0129, + "step": 23350 + }, + { + "epoch": 13.02, + "learning_rate": 4.2113924050632915e-06, + "loss": 0.0157, + "step": 23375 + }, + { + "epoch": 13.02, + "learning_rate": 4.205063291139241e-06, + "loss": 0.0088, + "step": 23400 + }, + { + "epoch": 13.02, + "learning_rate": 4.19873417721519e-06, + "loss": 0.0105, + "step": 23425 + }, + { + "epoch": 13.02, + "learning_rate": 4.19240506329114e-06, + "loss": 0.0131, + "step": 23450 + }, + { + "epoch": 13.02, + "learning_rate": 4.186075949367089e-06, + "loss": 0.0133, + "step": 23475 + }, + { + "epoch": 13.02, + "learning_rate": 4.179746835443038e-06, + "loss": 0.0054, + "step": 23500 + }, + { + "epoch": 13.02, + "learning_rate": 4.173417721518987e-06, + "loss": 0.01, + "step": 23525 + }, + { + "epoch": 13.02, + "learning_rate": 4.167088607594937e-06, + "loss": 0.013, + "step": 23550 + }, + { + "epoch": 13.02, + "learning_rate": 4.160759493670886e-06, + "loss": 0.0141, + "step": 23575 + }, + { + "epoch": 13.02, + "learning_rate": 4.154430379746836e-06, + "loss": 0.0126, + "step": 23600 + }, + { + "epoch": 13.02, + "learning_rate": 4.148101265822786e-06, + "loss": 0.015, + "step": 23625 + }, + { + "epoch": 13.02, + "learning_rate": 4.141772151898734e-06, + "loss": 0.0164, + "step": 23650 + }, + { + "epoch": 13.02, + "learning_rate": 4.135443037974684e-06, + "loss": 0.0135, + "step": 23675 + }, + { + "epoch": 13.02, + "learning_rate": 4.129113924050633e-06, + "loss": 0.0151, + "step": 23700 + }, + { + "epoch": 13.02, + "learning_rate": 4.122784810126583e-06, + "loss": 0.0157, + "step": 23725 + }, + { + "epoch": 13.02, + "learning_rate": 4.116455696202532e-06, + "loss": 0.0087, + "step": 23750 + }, + { + "epoch": 13.03, + "learning_rate": 4.1101265822784815e-06, + "loss": 0.0097, + "step": 23775 + }, + { + "epoch": 13.03, + "learning_rate": 4.103797468354431e-06, + "loss": 0.0141, + "step": 23800 + }, + { + "epoch": 13.03, + "learning_rate": 4.09746835443038e-06, + "loss": 0.0079, + "step": 23825 + }, + { + "epoch": 13.03, + "learning_rate": 4.09113924050633e-06, + "loss": 0.0116, + "step": 23850 + }, + { + "epoch": 13.03, + "learning_rate": 4.084810126582278e-06, + "loss": 0.0093, + "step": 23875 + }, + { + "epoch": 13.03, + "learning_rate": 4.078481012658228e-06, + "loss": 0.0149, + "step": 23900 + }, + { + "epoch": 13.03, + "learning_rate": 4.072151898734177e-06, + "loss": 0.0125, + "step": 23925 + }, + { + "epoch": 13.03, + "learning_rate": 4.065822784810127e-06, + "loss": 0.0106, + "step": 23950 + }, + { + "epoch": 13.03, + "learning_rate": 4.059493670886076e-06, + "loss": 0.0124, + "step": 23975 + }, + { + "epoch": 13.03, + "learning_rate": 4.0531645569620255e-06, + "loss": 0.0074, + "step": 24000 + }, + { + "epoch": 13.03, + "eval_loss": 0.27036768198013306, + "eval_runtime": 1458.9195, + "eval_samples_per_second": 7.136, + "eval_steps_per_second": 0.446, + "eval_wer": 24.153238264036816, + "step": 24000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.107792776208384e+20, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-24000/training_args.bin b/checkpoint-24000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-24000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-25000/config.json b/checkpoint-25000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-25000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-25000/generation_config.json b/checkpoint-25000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-25000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-25000/optimizer.pt b/checkpoint-25000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..50883e64fad6e91056ebc14faa95a166fd754e12 --- /dev/null +++ b/checkpoint-25000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f440db1ddb638e0a04b40744714ff515ecbf893c50ed20826e4e2782fea0c41 +size 1934161093 diff --git a/checkpoint-25000/preprocessor_config.json b/checkpoint-25000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-25000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-25000/pytorch_model.bin b/checkpoint-25000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..21bd94fa3a9f4895572ef32cf42786f5ef498d71 --- /dev/null +++ b/checkpoint-25000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66812c1ad90e00bd4645a23054caaa144fcd60b7018b22e375251b30a36558ef +size 967102601 diff --git a/checkpoint-25000/rng_state.pth b/checkpoint-25000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6a27d8802169d1800c326639350298c440442232 --- /dev/null +++ b/checkpoint-25000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1da2f6d424e52b2676970aa4d389563def62ee5fd35870da6cd9bf78968627bf +size 14511 diff --git a/checkpoint-25000/scaler.pt b/checkpoint-25000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..64f7528c917c8559ff906fca654f2b7c24681322 --- /dev/null +++ b/checkpoint-25000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1f1685b9cbe33ad27b25a1ad831e91523a3a15307825b4c93f487d0ab9d4a99 +size 557 diff --git a/checkpoint-25000/scheduler.pt b/checkpoint-25000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5eead9dfbec3e8605bf584f51678d1a974a092af --- /dev/null +++ b/checkpoint-25000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5003eaad2b23fbe336730835a298c6c865dad03356479b5d9b3d8515a82513f5 +size 627 diff --git a/checkpoint-25000/trainer_state.json b/checkpoint-25000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..14cf3a1a4e538b1f3b7a03f42658cdd166b70683 --- /dev/null +++ b/checkpoint-25000/trainer_state.json @@ -0,0 +1,6241 @@ +{ + "best_metric": 23.982503780549276, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-23000", + "epoch": 14.01215, + "global_step": 25000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + }, + { + "epoch": 7.02, + "learning_rate": 6.830632911392405e-06, + "loss": 0.0159, + "step": 13025 + }, + { + "epoch": 7.02, + "learning_rate": 6.824303797468354e-06, + "loss": 0.0186, + "step": 13050 + }, + { + "epoch": 7.02, + "learning_rate": 6.817974683544304e-06, + "loss": 0.0246, + "step": 13075 + }, + { + "epoch": 7.02, + "learning_rate": 6.811645569620253e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 7.02, + "learning_rate": 6.805316455696203e-06, + "loss": 0.0287, + "step": 13125 + }, + { + "epoch": 7.02, + "learning_rate": 6.798987341772152e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 7.02, + "learning_rate": 6.792658227848102e-06, + "loss": 0.035, + "step": 13175 + }, + { + "epoch": 7.02, + "learning_rate": 6.786329113924052e-06, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 7.02, + "learning_rate": 6.780000000000001e-06, + "loss": 0.0283, + "step": 13225 + }, + { + "epoch": 7.02, + "learning_rate": 6.773670886075951e-06, + "loss": 0.0203, + "step": 13250 + }, + { + "epoch": 7.03, + "learning_rate": 6.7673417721519e-06, + "loss": 0.0193, + "step": 13275 + }, + { + "epoch": 7.03, + "learning_rate": 6.7610126582278494e-06, + "loss": 0.0174, + "step": 13300 + }, + { + "epoch": 7.03, + "learning_rate": 6.754683544303798e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 7.03, + "learning_rate": 6.7483544303797474e-06, + "loss": 0.023, + "step": 13350 + }, + { + "epoch": 7.03, + "learning_rate": 6.742025316455697e-06, + "loss": 0.0266, + "step": 13375 + }, + { + "epoch": 7.03, + "learning_rate": 6.735696202531646e-06, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 7.03, + "learning_rate": 6.729367088607596e-06, + "loss": 0.0164, + "step": 13425 + }, + { + "epoch": 7.03, + "learning_rate": 6.723037974683545e-06, + "loss": 0.019, + "step": 13450 + }, + { + "epoch": 7.03, + "learning_rate": 6.716708860759495e-06, + "loss": 0.0209, + "step": 13475 + }, + { + "epoch": 7.03, + "learning_rate": 6.710379746835444e-06, + "loss": 0.0241, + "step": 13500 + }, + { + "epoch": 7.03, + "learning_rate": 6.7040506329113934e-06, + "loss": 0.0233, + "step": 13525 + }, + { + "epoch": 7.03, + "learning_rate": 6.697721518987342e-06, + "loss": 0.0204, + "step": 13550 + }, + { + "epoch": 7.03, + "learning_rate": 6.6913924050632914e-06, + "loss": 0.0131, + "step": 13575 + }, + { + "epoch": 7.03, + "learning_rate": 6.685063291139241e-06, + "loss": 0.0268, + "step": 13600 + }, + { + "epoch": 7.03, + "learning_rate": 6.67873417721519e-06, + "loss": 0.0156, + "step": 13625 + }, + { + "epoch": 7.03, + "learning_rate": 6.67240506329114e-06, + "loss": 0.0205, + "step": 13650 + }, + { + "epoch": 7.04, + "learning_rate": 6.666075949367089e-06, + "loss": 0.0217, + "step": 13675 + }, + { + "epoch": 7.04, + "learning_rate": 6.659746835443039e-06, + "loss": 0.0258, + "step": 13700 + }, + { + "epoch": 7.04, + "learning_rate": 6.653417721518988e-06, + "loss": 0.0211, + "step": 13725 + }, + { + "epoch": 7.04, + "learning_rate": 6.647088607594937e-06, + "loss": 0.0188, + "step": 13750 + }, + { + "epoch": 7.04, + "learning_rate": 6.640759493670886e-06, + "loss": 0.0234, + "step": 13775 + }, + { + "epoch": 7.04, + "learning_rate": 6.6344303797468355e-06, + "loss": 0.0202, + "step": 13800 + }, + { + "epoch": 7.04, + "learning_rate": 6.628101265822785e-06, + "loss": 0.0251, + "step": 13825 + }, + { + "epoch": 7.04, + "learning_rate": 6.621772151898734e-06, + "loss": 0.017, + "step": 13850 + }, + { + "epoch": 7.04, + "learning_rate": 6.615443037974684e-06, + "loss": 0.0254, + "step": 13875 + }, + { + "epoch": 7.04, + "learning_rate": 6.609113924050633e-06, + "loss": 0.0289, + "step": 13900 + }, + { + "epoch": 7.04, + "learning_rate": 6.602784810126583e-06, + "loss": 0.0238, + "step": 13925 + }, + { + "epoch": 7.04, + "learning_rate": 6.596455696202532e-06, + "loss": 0.0218, + "step": 13950 + }, + { + "epoch": 7.04, + "learning_rate": 6.590126582278481e-06, + "loss": 0.0289, + "step": 13975 + }, + { + "epoch": 7.04, + "learning_rate": 6.583797468354432e-06, + "loss": 0.0271, + "step": 14000 + }, + { + "epoch": 7.04, + "eval_loss": 0.26684999465942383, + "eval_runtime": 1448.0228, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "eval_wer": 42.464105107400115, + "step": 14000 + }, + { + "epoch": 8.0, + "learning_rate": 6.577468354430381e-06, + "loss": 0.0292, + "step": 14025 + }, + { + "epoch": 8.0, + "learning_rate": 6.57113924050633e-06, + "loss": 0.0192, + "step": 14050 + }, + { + "epoch": 8.0, + "learning_rate": 6.564810126582279e-06, + "loss": 0.027, + "step": 14075 + }, + { + "epoch": 8.0, + "learning_rate": 6.558481012658229e-06, + "loss": 0.03, + "step": 14100 + }, + { + "epoch": 8.0, + "learning_rate": 6.552151898734178e-06, + "loss": 0.0375, + "step": 14125 + }, + { + "epoch": 8.0, + "learning_rate": 6.5458227848101274e-06, + "loss": 0.0338, + "step": 14150 + }, + { + "epoch": 8.0, + "learning_rate": 6.539493670886077e-06, + "loss": 0.0305, + "step": 14175 + }, + { + "epoch": 8.0, + "learning_rate": 6.533164556962026e-06, + "loss": 0.0266, + "step": 14200 + }, + { + "epoch": 8.01, + "learning_rate": 6.526835443037976e-06, + "loss": 0.0378, + "step": 14225 + }, + { + "epoch": 8.01, + "learning_rate": 6.520506329113925e-06, + "loss": 0.0256, + "step": 14250 + }, + { + "epoch": 8.01, + "learning_rate": 6.514177215189874e-06, + "loss": 0.0203, + "step": 14275 + }, + { + "epoch": 8.01, + "learning_rate": 6.507848101265823e-06, + "loss": 0.0212, + "step": 14300 + }, + { + "epoch": 8.01, + "learning_rate": 6.501518987341773e-06, + "loss": 0.0204, + "step": 14325 + }, + { + "epoch": 8.01, + "learning_rate": 6.495189873417722e-06, + "loss": 0.0259, + "step": 14350 + }, + { + "epoch": 8.01, + "learning_rate": 6.4888607594936714e-06, + "loss": 0.0214, + "step": 14375 + }, + { + "epoch": 8.01, + "learning_rate": 6.482531645569621e-06, + "loss": 0.021, + "step": 14400 + }, + { + "epoch": 8.01, + "learning_rate": 6.47620253164557e-06, + "loss": 0.0241, + "step": 14425 + }, + { + "epoch": 8.01, + "learning_rate": 6.46987341772152e-06, + "loss": 0.0216, + "step": 14450 + }, + { + "epoch": 8.01, + "learning_rate": 6.463544303797469e-06, + "loss": 0.0209, + "step": 14475 + }, + { + "epoch": 8.01, + "learning_rate": 6.457215189873418e-06, + "loss": 0.0292, + "step": 14500 + }, + { + "epoch": 8.01, + "learning_rate": 6.450886075949367e-06, + "loss": 0.0286, + "step": 14525 + }, + { + "epoch": 8.01, + "learning_rate": 6.444556962025317e-06, + "loss": 0.0218, + "step": 14550 + }, + { + "epoch": 8.01, + "learning_rate": 6.438227848101266e-06, + "loss": 0.0209, + "step": 14575 + }, + { + "epoch": 8.01, + "learning_rate": 6.4318987341772154e-06, + "loss": 0.0244, + "step": 14600 + }, + { + "epoch": 8.02, + "learning_rate": 6.425569620253165e-06, + "loss": 0.0212, + "step": 14625 + }, + { + "epoch": 8.02, + "learning_rate": 6.419240506329114e-06, + "loss": 0.0155, + "step": 14650 + }, + { + "epoch": 8.02, + "learning_rate": 6.412911392405064e-06, + "loss": 0.0209, + "step": 14675 + }, + { + "epoch": 8.02, + "learning_rate": 6.406582278481013e-06, + "loss": 0.0203, + "step": 14700 + }, + { + "epoch": 8.02, + "learning_rate": 6.400253164556962e-06, + "loss": 0.0192, + "step": 14725 + }, + { + "epoch": 8.02, + "learning_rate": 6.393924050632911e-06, + "loss": 0.0216, + "step": 14750 + }, + { + "epoch": 8.02, + "learning_rate": 6.387594936708861e-06, + "loss": 0.012, + "step": 14775 + }, + { + "epoch": 8.02, + "learning_rate": 6.38126582278481e-06, + "loss": 0.0241, + "step": 14800 + }, + { + "epoch": 8.02, + "learning_rate": 6.37493670886076e-06, + "loss": 0.0192, + "step": 14825 + }, + { + "epoch": 8.02, + "learning_rate": 6.36860759493671e-06, + "loss": 0.0202, + "step": 14850 + }, + { + "epoch": 8.02, + "learning_rate": 6.362278481012659e-06, + "loss": 0.0257, + "step": 14875 + }, + { + "epoch": 8.02, + "learning_rate": 6.3559493670886086e-06, + "loss": 0.0218, + "step": 14900 + }, + { + "epoch": 8.02, + "learning_rate": 6.349620253164558e-06, + "loss": 0.0232, + "step": 14925 + }, + { + "epoch": 8.02, + "learning_rate": 6.3432911392405074e-06, + "loss": 0.03, + "step": 14950 + }, + { + "epoch": 8.02, + "learning_rate": 6.337215189873418e-06, + "loss": 0.0252, + "step": 14975 + }, + { + "epoch": 8.02, + "learning_rate": 6.3308860759493675e-06, + "loss": 0.0176, + "step": 15000 + }, + { + "epoch": 8.02, + "eval_loss": 0.2596448063850403, + "eval_runtime": 1417.6481, + "eval_samples_per_second": 7.344, + "eval_steps_per_second": 0.459, + "eval_wer": 28.52241499861786, + "step": 15000 + }, + { + "epoch": 8.03, + "learning_rate": 6.324556962025317e-06, + "loss": 0.0166, + "step": 15025 + }, + { + "epoch": 8.03, + "learning_rate": 6.318227848101266e-06, + "loss": 0.017, + "step": 15050 + }, + { + "epoch": 8.03, + "learning_rate": 6.311898734177216e-06, + "loss": 0.0291, + "step": 15075 + }, + { + "epoch": 8.03, + "learning_rate": 6.305569620253164e-06, + "loss": 0.0149, + "step": 15100 + }, + { + "epoch": 8.03, + "learning_rate": 6.299240506329114e-06, + "loss": 0.0223, + "step": 15125 + }, + { + "epoch": 8.03, + "learning_rate": 6.292911392405063e-06, + "loss": 0.0212, + "step": 15150 + }, + { + "epoch": 8.03, + "learning_rate": 6.286582278481013e-06, + "loss": 0.0125, + "step": 15175 + }, + { + "epoch": 8.03, + "learning_rate": 6.280253164556962e-06, + "loss": 0.0169, + "step": 15200 + }, + { + "epoch": 8.03, + "learning_rate": 6.273924050632912e-06, + "loss": 0.0244, + "step": 15225 + }, + { + "epoch": 8.03, + "learning_rate": 6.267594936708862e-06, + "loss": 0.015, + "step": 15250 + }, + { + "epoch": 8.03, + "learning_rate": 6.261265822784811e-06, + "loss": 0.0171, + "step": 15275 + }, + { + "epoch": 8.03, + "learning_rate": 6.254936708860761e-06, + "loss": 0.017, + "step": 15300 + }, + { + "epoch": 8.03, + "learning_rate": 6.24860759493671e-06, + "loss": 0.0162, + "step": 15325 + }, + { + "epoch": 8.03, + "learning_rate": 6.2422784810126594e-06, + "loss": 0.0133, + "step": 15350 + }, + { + "epoch": 8.03, + "learning_rate": 6.235949367088608e-06, + "loss": 0.0139, + "step": 15375 + }, + { + "epoch": 8.03, + "learning_rate": 6.2296202531645575e-06, + "loss": 0.0177, + "step": 15400 + }, + { + "epoch": 8.04, + "learning_rate": 6.223291139240507e-06, + "loss": 0.0207, + "step": 15425 + }, + { + "epoch": 8.04, + "learning_rate": 6.216962025316456e-06, + "loss": 0.0192, + "step": 15450 + }, + { + "epoch": 8.04, + "learning_rate": 6.210632911392406e-06, + "loss": 0.0176, + "step": 15475 + }, + { + "epoch": 8.04, + "learning_rate": 6.204303797468355e-06, + "loss": 0.0133, + "step": 15500 + }, + { + "epoch": 8.04, + "learning_rate": 6.197974683544305e-06, + "loss": 0.0199, + "step": 15525 + }, + { + "epoch": 8.04, + "learning_rate": 6.191645569620254e-06, + "loss": 0.0135, + "step": 15550 + }, + { + "epoch": 8.04, + "learning_rate": 6.1853164556962035e-06, + "loss": 0.0121, + "step": 15575 + }, + { + "epoch": 8.04, + "learning_rate": 6.178987341772152e-06, + "loss": 0.0152, + "step": 15600 + }, + { + "epoch": 8.04, + "learning_rate": 6.1726582278481015e-06, + "loss": 0.0201, + "step": 15625 + }, + { + "epoch": 8.04, + "learning_rate": 6.166329113924051e-06, + "loss": 0.0226, + "step": 15650 + }, + { + "epoch": 8.04, + "learning_rate": 6.16e-06, + "loss": 0.0222, + "step": 15675 + }, + { + "epoch": 8.04, + "learning_rate": 6.15367088607595e-06, + "loss": 0.0184, + "step": 15700 + }, + { + "epoch": 8.04, + "learning_rate": 6.147341772151899e-06, + "loss": 0.0219, + "step": 15725 + }, + { + "epoch": 8.04, + "learning_rate": 6.141012658227849e-06, + "loss": 0.0219, + "step": 15750 + }, + { + "epoch": 9.0, + "learning_rate": 6.134683544303798e-06, + "loss": 0.0208, + "step": 15775 + }, + { + "epoch": 9.0, + "learning_rate": 6.1283544303797475e-06, + "loss": 0.0143, + "step": 15800 + }, + { + "epoch": 9.0, + "learning_rate": 6.122025316455696e-06, + "loss": 0.0254, + "step": 15825 + }, + { + "epoch": 9.0, + "learning_rate": 6.1156962025316455e-06, + "loss": 0.0287, + "step": 15850 + }, + { + "epoch": 9.0, + "learning_rate": 6.109367088607595e-06, + "loss": 0.0252, + "step": 15875 + }, + { + "epoch": 9.0, + "learning_rate": 6.103037974683544e-06, + "loss": 0.0275, + "step": 15900 + }, + { + "epoch": 9.0, + "learning_rate": 6.096708860759494e-06, + "loss": 0.0193, + "step": 15925 + }, + { + "epoch": 9.0, + "learning_rate": 6.090379746835443e-06, + "loss": 0.0255, + "step": 15950 + }, + { + "epoch": 9.01, + "learning_rate": 6.084050632911393e-06, + "loss": 0.0248, + "step": 15975 + }, + { + "epoch": 9.01, + "learning_rate": 6.077721518987342e-06, + "loss": 0.0328, + "step": 16000 + }, + { + "epoch": 9.01, + "eval_loss": 0.2512108087539673, + "eval_runtime": 1474.2883, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.442, + "eval_wer": 35.66236849379665, + "step": 16000 + }, + { + "epoch": 9.01, + "learning_rate": 6.0713924050632915e-06, + "loss": 0.0181, + "step": 16025 + }, + { + "epoch": 9.01, + "learning_rate": 6.065063291139242e-06, + "loss": 0.0134, + "step": 16050 + }, + { + "epoch": 9.01, + "learning_rate": 6.058734177215191e-06, + "loss": 0.0203, + "step": 16075 + }, + { + "epoch": 9.01, + "learning_rate": 6.052405063291141e-06, + "loss": 0.0224, + "step": 16100 + }, + { + "epoch": 9.01, + "learning_rate": 6.046075949367089e-06, + "loss": 0.0164, + "step": 16125 + }, + { + "epoch": 9.01, + "learning_rate": 6.039746835443039e-06, + "loss": 0.019, + "step": 16150 + }, + { + "epoch": 9.01, + "learning_rate": 6.033417721518988e-06, + "loss": 0.0152, + "step": 16175 + }, + { + "epoch": 9.01, + "learning_rate": 6.0270886075949374e-06, + "loss": 0.0197, + "step": 16200 + }, + { + "epoch": 9.01, + "learning_rate": 6.020759493670887e-06, + "loss": 0.0188, + "step": 16225 + }, + { + "epoch": 9.01, + "learning_rate": 6.014430379746836e-06, + "loss": 0.0245, + "step": 16250 + }, + { + "epoch": 9.01, + "learning_rate": 6.008101265822786e-06, + "loss": 0.0202, + "step": 16275 + }, + { + "epoch": 9.01, + "learning_rate": 6.001772151898735e-06, + "loss": 0.0184, + "step": 16300 + }, + { + "epoch": 9.01, + "learning_rate": 5.995443037974685e-06, + "loss": 0.0158, + "step": 16325 + }, + { + "epoch": 9.01, + "learning_rate": 5.989113924050633e-06, + "loss": 0.0175, + "step": 16350 + }, + { + "epoch": 9.02, + "learning_rate": 5.982784810126583e-06, + "loss": 0.014, + "step": 16375 + }, + { + "epoch": 9.02, + "learning_rate": 5.976455696202532e-06, + "loss": 0.0149, + "step": 16400 + }, + { + "epoch": 9.02, + "learning_rate": 5.9701265822784815e-06, + "loss": 0.0188, + "step": 16425 + }, + { + "epoch": 9.02, + "learning_rate": 5.963797468354431e-06, + "loss": 0.0215, + "step": 16450 + }, + { + "epoch": 9.02, + "learning_rate": 5.95746835443038e-06, + "loss": 0.017, + "step": 16475 + }, + { + "epoch": 9.02, + "learning_rate": 5.95113924050633e-06, + "loss": 0.0095, + "step": 16500 + }, + { + "epoch": 9.02, + "learning_rate": 5.944810126582279e-06, + "loss": 0.0112, + "step": 16525 + }, + { + "epoch": 9.02, + "learning_rate": 5.938481012658229e-06, + "loss": 0.0198, + "step": 16550 + }, + { + "epoch": 9.02, + "learning_rate": 5.932151898734177e-06, + "loss": 0.0252, + "step": 16575 + }, + { + "epoch": 9.02, + "learning_rate": 5.925822784810127e-06, + "loss": 0.0226, + "step": 16600 + }, + { + "epoch": 9.02, + "learning_rate": 5.919493670886076e-06, + "loss": 0.0167, + "step": 16625 + }, + { + "epoch": 9.02, + "learning_rate": 5.9131645569620255e-06, + "loss": 0.0259, + "step": 16650 + }, + { + "epoch": 9.02, + "learning_rate": 5.906835443037975e-06, + "loss": 0.0229, + "step": 16675 + }, + { + "epoch": 9.02, + "learning_rate": 5.900506329113924e-06, + "loss": 0.0238, + "step": 16700 + }, + { + "epoch": 9.02, + "learning_rate": 5.894177215189874e-06, + "loss": 0.02, + "step": 16725 + }, + { + "epoch": 9.02, + "learning_rate": 5.887848101265823e-06, + "loss": 0.0182, + "step": 16750 + }, + { + "epoch": 9.03, + "learning_rate": 5.881518987341773e-06, + "loss": 0.0157, + "step": 16775 + }, + { + "epoch": 9.03, + "learning_rate": 5.875189873417721e-06, + "loss": 0.0145, + "step": 16800 + }, + { + "epoch": 9.03, + "learning_rate": 5.868860759493671e-06, + "loss": 0.0166, + "step": 16825 + }, + { + "epoch": 9.03, + "learning_rate": 5.86253164556962e-06, + "loss": 0.0108, + "step": 16850 + }, + { + "epoch": 9.03, + "learning_rate": 5.85620253164557e-06, + "loss": 0.014, + "step": 16875 + }, + { + "epoch": 9.03, + "learning_rate": 5.84987341772152e-06, + "loss": 0.0167, + "step": 16900 + }, + { + "epoch": 9.03, + "learning_rate": 5.843544303797469e-06, + "loss": 0.0121, + "step": 16925 + }, + { + "epoch": 9.03, + "learning_rate": 5.837215189873419e-06, + "loss": 0.0149, + "step": 16950 + }, + { + "epoch": 9.03, + "learning_rate": 5.831139240506329e-06, + "loss": 0.0143, + "step": 16975 + }, + { + "epoch": 9.03, + "learning_rate": 5.824810126582279e-06, + "loss": 0.0139, + "step": 17000 + }, + { + "epoch": 9.03, + "eval_loss": 0.2688957154750824, + "eval_runtime": 1423.1196, + "eval_samples_per_second": 7.316, + "eval_steps_per_second": 0.457, + "eval_wer": 25.920746678807788, + "step": 17000 + }, + { + "epoch": 9.03, + "learning_rate": 5.818481012658228e-06, + "loss": 0.0169, + "step": 17025 + }, + { + "epoch": 9.03, + "learning_rate": 5.8121518987341775e-06, + "loss": 0.013, + "step": 17050 + }, + { + "epoch": 9.03, + "learning_rate": 5.805822784810127e-06, + "loss": 0.0123, + "step": 17075 + }, + { + "epoch": 9.03, + "learning_rate": 5.799493670886076e-06, + "loss": 0.0161, + "step": 17100 + }, + { + "epoch": 9.03, + "learning_rate": 5.793164556962026e-06, + "loss": 0.0114, + "step": 17125 + }, + { + "epoch": 9.03, + "learning_rate": 5.786835443037974e-06, + "loss": 0.0194, + "step": 17150 + }, + { + "epoch": 9.04, + "learning_rate": 5.780506329113924e-06, + "loss": 0.0179, + "step": 17175 + }, + { + "epoch": 9.04, + "learning_rate": 5.774177215189873e-06, + "loss": 0.0165, + "step": 17200 + }, + { + "epoch": 9.04, + "learning_rate": 5.767848101265823e-06, + "loss": 0.0165, + "step": 17225 + }, + { + "epoch": 9.04, + "learning_rate": 5.761518987341773e-06, + "loss": 0.0125, + "step": 17250 + }, + { + "epoch": 9.04, + "learning_rate": 5.755189873417722e-06, + "loss": 0.0172, + "step": 17275 + }, + { + "epoch": 9.04, + "learning_rate": 5.748860759493672e-06, + "loss": 0.0158, + "step": 17300 + }, + { + "epoch": 9.04, + "learning_rate": 5.742531645569621e-06, + "loss": 0.0174, + "step": 17325 + }, + { + "epoch": 9.04, + "learning_rate": 5.736202531645571e-06, + "loss": 0.0162, + "step": 17350 + }, + { + "epoch": 9.04, + "learning_rate": 5.72987341772152e-06, + "loss": 0.0203, + "step": 17375 + }, + { + "epoch": 9.04, + "learning_rate": 5.7235443037974695e-06, + "loss": 0.0157, + "step": 17400 + }, + { + "epoch": 9.04, + "learning_rate": 5.717215189873419e-06, + "loss": 0.018, + "step": 17425 + }, + { + "epoch": 9.04, + "learning_rate": 5.7108860759493675e-06, + "loss": 0.0199, + "step": 17450 + }, + { + "epoch": 9.04, + "learning_rate": 5.704556962025317e-06, + "loss": 0.0153, + "step": 17475 + }, + { + "epoch": 9.04, + "learning_rate": 5.698227848101266e-06, + "loss": 0.0234, + "step": 17500 + }, + { + "epoch": 10.0, + "learning_rate": 5.691898734177216e-06, + "loss": 0.0168, + "step": 17525 + }, + { + "epoch": 10.0, + "learning_rate": 5.685569620253165e-06, + "loss": 0.0214, + "step": 17550 + }, + { + "epoch": 10.0, + "learning_rate": 5.679240506329115e-06, + "loss": 0.0185, + "step": 17575 + }, + { + "epoch": 10.0, + "learning_rate": 5.672911392405064e-06, + "loss": 0.0228, + "step": 17600 + }, + { + "epoch": 10.0, + "learning_rate": 5.6665822784810135e-06, + "loss": 0.024, + "step": 17625 + }, + { + "epoch": 10.0, + "learning_rate": 5.660253164556963e-06, + "loss": 0.0235, + "step": 17650 + }, + { + "epoch": 10.0, + "learning_rate": 5.6539240506329115e-06, + "loss": 0.0262, + "step": 17675 + }, + { + "epoch": 10.0, + "learning_rate": 5.647594936708861e-06, + "loss": 0.0214, + "step": 17700 + }, + { + "epoch": 10.01, + "learning_rate": 5.64126582278481e-06, + "loss": 0.0279, + "step": 17725 + }, + { + "epoch": 10.01, + "learning_rate": 5.63493670886076e-06, + "loss": 0.0248, + "step": 17750 + }, + { + "epoch": 10.01, + "learning_rate": 5.628607594936709e-06, + "loss": 0.0155, + "step": 17775 + }, + { + "epoch": 10.01, + "learning_rate": 5.622278481012659e-06, + "loss": 0.017, + "step": 17800 + }, + { + "epoch": 10.01, + "learning_rate": 5.615949367088608e-06, + "loss": 0.0205, + "step": 17825 + }, + { + "epoch": 10.01, + "learning_rate": 5.6096202531645575e-06, + "loss": 0.021, + "step": 17850 + }, + { + "epoch": 10.01, + "learning_rate": 5.603291139240507e-06, + "loss": 0.0178, + "step": 17875 + }, + { + "epoch": 10.01, + "learning_rate": 5.5969620253164555e-06, + "loss": 0.0169, + "step": 17900 + }, + { + "epoch": 10.01, + "learning_rate": 5.590632911392405e-06, + "loss": 0.0158, + "step": 17925 + }, + { + "epoch": 10.01, + "learning_rate": 5.584303797468354e-06, + "loss": 0.0195, + "step": 17950 + }, + { + "epoch": 10.01, + "learning_rate": 5.577974683544304e-06, + "loss": 0.0195, + "step": 17975 + }, + { + "epoch": 10.01, + "learning_rate": 5.571645569620253e-06, + "loss": 0.0187, + "step": 18000 + }, + { + "epoch": 10.01, + "eval_loss": 0.2605174481868744, + "eval_runtime": 1553.7854, + "eval_samples_per_second": 6.7, + "eval_steps_per_second": 0.419, + "eval_wer": 25.330493178750874, + "step": 18000 + }, + { + "epoch": 10.01, + "learning_rate": 5.565316455696203e-06, + "loss": 0.0214, + "step": 18025 + }, + { + "epoch": 10.01, + "learning_rate": 5.558987341772152e-06, + "loss": 0.0193, + "step": 18050 + }, + { + "epoch": 10.01, + "learning_rate": 5.552658227848102e-06, + "loss": 0.0168, + "step": 18075 + }, + { + "epoch": 10.01, + "learning_rate": 5.546329113924052e-06, + "loss": 0.0147, + "step": 18100 + }, + { + "epoch": 10.02, + "learning_rate": 5.540000000000001e-06, + "loss": 0.0192, + "step": 18125 + }, + { + "epoch": 10.02, + "learning_rate": 5.533670886075951e-06, + "loss": 0.014, + "step": 18150 + }, + { + "epoch": 10.02, + "learning_rate": 5.527341772151899e-06, + "loss": 0.0163, + "step": 18175 + }, + { + "epoch": 10.02, + "learning_rate": 5.521012658227849e-06, + "loss": 0.0139, + "step": 18200 + }, + { + "epoch": 10.02, + "learning_rate": 5.514683544303798e-06, + "loss": 0.0168, + "step": 18225 + }, + { + "epoch": 10.02, + "learning_rate": 5.5083544303797475e-06, + "loss": 0.0118, + "step": 18250 + }, + { + "epoch": 10.02, + "learning_rate": 5.502025316455697e-06, + "loss": 0.0119, + "step": 18275 + }, + { + "epoch": 10.02, + "learning_rate": 5.495696202531646e-06, + "loss": 0.0136, + "step": 18300 + }, + { + "epoch": 10.02, + "learning_rate": 5.489367088607596e-06, + "loss": 0.0132, + "step": 18325 + }, + { + "epoch": 10.02, + "learning_rate": 5.483037974683545e-06, + "loss": 0.0148, + "step": 18350 + }, + { + "epoch": 10.02, + "learning_rate": 5.476708860759495e-06, + "loss": 0.015, + "step": 18375 + }, + { + "epoch": 10.02, + "learning_rate": 5.470379746835443e-06, + "loss": 0.0154, + "step": 18400 + }, + { + "epoch": 10.02, + "learning_rate": 5.464050632911393e-06, + "loss": 0.0139, + "step": 18425 + }, + { + "epoch": 10.02, + "learning_rate": 5.457721518987342e-06, + "loss": 0.0239, + "step": 18450 + }, + { + "epoch": 10.02, + "learning_rate": 5.4513924050632915e-06, + "loss": 0.0203, + "step": 18475 + }, + { + "epoch": 10.02, + "learning_rate": 5.445063291139241e-06, + "loss": 0.0199, + "step": 18500 + }, + { + "epoch": 10.03, + "learning_rate": 5.43873417721519e-06, + "loss": 0.013, + "step": 18525 + }, + { + "epoch": 10.03, + "learning_rate": 5.43240506329114e-06, + "loss": 0.0128, + "step": 18550 + }, + { + "epoch": 10.03, + "learning_rate": 5.426075949367089e-06, + "loss": 0.0161, + "step": 18575 + }, + { + "epoch": 10.03, + "learning_rate": 5.419746835443039e-06, + "loss": 0.0129, + "step": 18600 + }, + { + "epoch": 10.03, + "learning_rate": 5.413417721518987e-06, + "loss": 0.0124, + "step": 18625 + }, + { + "epoch": 10.03, + "learning_rate": 5.407088607594937e-06, + "loss": 0.0161, + "step": 18650 + }, + { + "epoch": 10.03, + "learning_rate": 5.400759493670886e-06, + "loss": 0.0104, + "step": 18675 + }, + { + "epoch": 10.03, + "learning_rate": 5.3944303797468355e-06, + "loss": 0.0109, + "step": 18700 + }, + { + "epoch": 10.03, + "learning_rate": 5.388101265822785e-06, + "loss": 0.0128, + "step": 18725 + }, + { + "epoch": 10.03, + "learning_rate": 5.381772151898734e-06, + "loss": 0.014, + "step": 18750 + }, + { + "epoch": 10.03, + "learning_rate": 5.375443037974684e-06, + "loss": 0.0159, + "step": 18775 + }, + { + "epoch": 10.03, + "learning_rate": 5.369113924050633e-06, + "loss": 0.017, + "step": 18800 + }, + { + "epoch": 10.03, + "learning_rate": 5.362784810126583e-06, + "loss": 0.012, + "step": 18825 + }, + { + "epoch": 10.03, + "learning_rate": 5.356455696202531e-06, + "loss": 0.0165, + "step": 18850 + }, + { + "epoch": 10.03, + "learning_rate": 5.350126582278481e-06, + "loss": 0.0153, + "step": 18875 + }, + { + "epoch": 10.03, + "learning_rate": 5.343797468354432e-06, + "loss": 0.0182, + "step": 18900 + }, + { + "epoch": 10.04, + "learning_rate": 5.33746835443038e-06, + "loss": 0.0194, + "step": 18925 + }, + { + "epoch": 10.04, + "learning_rate": 5.33113924050633e-06, + "loss": 0.0144, + "step": 18950 + }, + { + "epoch": 10.04, + "learning_rate": 5.324810126582279e-06, + "loss": 0.0157, + "step": 18975 + }, + { + "epoch": 10.04, + "learning_rate": 5.318481012658229e-06, + "loss": 0.0111, + "step": 19000 + }, + { + "epoch": 10.04, + "eval_loss": 0.2696678638458252, + "eval_runtime": 1463.5986, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.445, + "eval_wer": 24.79877721588969, + "step": 19000 + }, + { + "epoch": 10.04, + "learning_rate": 5.312151898734178e-06, + "loss": 0.0176, + "step": 19025 + }, + { + "epoch": 10.04, + "learning_rate": 5.3058227848101275e-06, + "loss": 0.0144, + "step": 19050 + }, + { + "epoch": 10.04, + "learning_rate": 5.299493670886077e-06, + "loss": 0.0129, + "step": 19075 + }, + { + "epoch": 10.04, + "learning_rate": 5.293164556962026e-06, + "loss": 0.0162, + "step": 19100 + }, + { + "epoch": 10.04, + "learning_rate": 5.286835443037976e-06, + "loss": 0.0128, + "step": 19125 + }, + { + "epoch": 10.04, + "learning_rate": 5.280506329113924e-06, + "loss": 0.0152, + "step": 19150 + }, + { + "epoch": 10.04, + "learning_rate": 5.274177215189874e-06, + "loss": 0.0183, + "step": 19175 + }, + { + "epoch": 10.04, + "learning_rate": 5.267848101265823e-06, + "loss": 0.0198, + "step": 19200 + }, + { + "epoch": 10.04, + "learning_rate": 5.261518987341773e-06, + "loss": 0.0179, + "step": 19225 + }, + { + "epoch": 10.04, + "learning_rate": 5.255443037974683e-06, + "loss": 0.019, + "step": 19250 + }, + { + "epoch": 11.0, + "learning_rate": 5.249113924050633e-06, + "loss": 0.0159, + "step": 19275 + }, + { + "epoch": 11.0, + "learning_rate": 5.242784810126583e-06, + "loss": 0.0156, + "step": 19300 + }, + { + "epoch": 11.0, + "learning_rate": 5.236455696202532e-06, + "loss": 0.015, + "step": 19325 + }, + { + "epoch": 11.0, + "learning_rate": 5.230126582278482e-06, + "loss": 0.0162, + "step": 19350 + }, + { + "epoch": 11.0, + "learning_rate": 5.223797468354431e-06, + "loss": 0.0262, + "step": 19375 + }, + { + "epoch": 11.0, + "learning_rate": 5.217468354430381e-06, + "loss": 0.0207, + "step": 19400 + }, + { + "epoch": 11.0, + "learning_rate": 5.21113924050633e-06, + "loss": 0.0222, + "step": 19425 + }, + { + "epoch": 11.0, + "learning_rate": 5.2048101265822795e-06, + "loss": 0.016, + "step": 19450 + }, + { + "epoch": 11.01, + "learning_rate": 5.198481012658229e-06, + "loss": 0.0176, + "step": 19475 + }, + { + "epoch": 11.01, + "learning_rate": 5.192151898734178e-06, + "loss": 0.0209, + "step": 19500 + }, + { + "epoch": 11.01, + "learning_rate": 5.185822784810127e-06, + "loss": 0.0212, + "step": 19525 + }, + { + "epoch": 11.01, + "learning_rate": 5.179493670886076e-06, + "loss": 0.0143, + "step": 19550 + }, + { + "epoch": 11.01, + "learning_rate": 5.173164556962026e-06, + "loss": 0.0224, + "step": 19575 + }, + { + "epoch": 11.01, + "learning_rate": 5.166835443037975e-06, + "loss": 0.0171, + "step": 19600 + }, + { + "epoch": 11.01, + "learning_rate": 5.160506329113925e-06, + "loss": 0.0166, + "step": 19625 + }, + { + "epoch": 11.01, + "learning_rate": 5.154177215189874e-06, + "loss": 0.0135, + "step": 19650 + }, + { + "epoch": 11.01, + "learning_rate": 5.1478481012658235e-06, + "loss": 0.0164, + "step": 19675 + }, + { + "epoch": 11.01, + "learning_rate": 5.141518987341773e-06, + "loss": 0.0154, + "step": 19700 + }, + { + "epoch": 11.01, + "learning_rate": 5.135189873417722e-06, + "loss": 0.0183, + "step": 19725 + }, + { + "epoch": 11.01, + "learning_rate": 5.128860759493671e-06, + "loss": 0.0149, + "step": 19750 + }, + { + "epoch": 11.01, + "learning_rate": 5.12253164556962e-06, + "loss": 0.0176, + "step": 19775 + }, + { + "epoch": 11.01, + "learning_rate": 5.11620253164557e-06, + "loss": 0.0135, + "step": 19800 + }, + { + "epoch": 11.01, + "learning_rate": 5.109873417721519e-06, + "loss": 0.0121, + "step": 19825 + }, + { + "epoch": 11.01, + "learning_rate": 5.103544303797469e-06, + "loss": 0.0158, + "step": 19850 + }, + { + "epoch": 11.02, + "learning_rate": 5.097215189873418e-06, + "loss": 0.0191, + "step": 19875 + }, + { + "epoch": 11.02, + "learning_rate": 5.0908860759493675e-06, + "loss": 0.0127, + "step": 19900 + }, + { + "epoch": 11.02, + "learning_rate": 5.084556962025317e-06, + "loss": 0.0149, + "step": 19925 + }, + { + "epoch": 11.02, + "learning_rate": 5.0782278481012655e-06, + "loss": 0.0152, + "step": 19950 + }, + { + "epoch": 11.02, + "learning_rate": 5.071898734177215e-06, + "loss": 0.0139, + "step": 19975 + }, + { + "epoch": 11.02, + "learning_rate": 5.065569620253164e-06, + "loss": 0.015, + "step": 20000 + }, + { + "epoch": 11.02, + "eval_loss": 0.2623814344406128, + "eval_runtime": 1462.1621, + "eval_samples_per_second": 7.12, + "eval_steps_per_second": 0.445, + "eval_wer": 24.392266540919362, + "step": 20000 + }, + { + "epoch": 11.02, + "learning_rate": 5.059240506329114e-06, + "loss": 0.0094, + "step": 20025 + }, + { + "epoch": 11.02, + "learning_rate": 5.052911392405063e-06, + "loss": 0.0108, + "step": 20050 + }, + { + "epoch": 11.02, + "learning_rate": 5.046582278481013e-06, + "loss": 0.0155, + "step": 20075 + }, + { + "epoch": 11.02, + "learning_rate": 5.040253164556962e-06, + "loss": 0.0103, + "step": 20100 + }, + { + "epoch": 11.02, + "learning_rate": 5.033924050632912e-06, + "loss": 0.0109, + "step": 20125 + }, + { + "epoch": 11.02, + "learning_rate": 5.027594936708862e-06, + "loss": 0.0188, + "step": 20150 + }, + { + "epoch": 11.02, + "learning_rate": 5.021265822784811e-06, + "loss": 0.0178, + "step": 20175 + }, + { + "epoch": 11.02, + "learning_rate": 5.014936708860761e-06, + "loss": 0.0262, + "step": 20200 + }, + { + "epoch": 11.02, + "learning_rate": 5.00860759493671e-06, + "loss": 0.0194, + "step": 20225 + }, + { + "epoch": 11.02, + "learning_rate": 5.002278481012659e-06, + "loss": 0.0107, + "step": 20250 + }, + { + "epoch": 11.03, + "learning_rate": 4.995949367088608e-06, + "loss": 0.0147, + "step": 20275 + }, + { + "epoch": 11.03, + "learning_rate": 4.9896202531645575e-06, + "loss": 0.0084, + "step": 20300 + }, + { + "epoch": 11.03, + "learning_rate": 4.983291139240507e-06, + "loss": 0.0214, + "step": 20325 + }, + { + "epoch": 11.03, + "learning_rate": 4.976962025316456e-06, + "loss": 0.014, + "step": 20350 + }, + { + "epoch": 11.03, + "learning_rate": 4.970632911392406e-06, + "loss": 0.0129, + "step": 20375 + }, + { + "epoch": 11.03, + "learning_rate": 4.964303797468355e-06, + "loss": 0.0145, + "step": 20400 + }, + { + "epoch": 11.03, + "learning_rate": 4.957974683544305e-06, + "loss": 0.0109, + "step": 20425 + }, + { + "epoch": 11.03, + "learning_rate": 4.951645569620254e-06, + "loss": 0.0117, + "step": 20450 + }, + { + "epoch": 11.03, + "learning_rate": 4.945316455696203e-06, + "loss": 0.0137, + "step": 20475 + }, + { + "epoch": 11.03, + "learning_rate": 4.938987341772152e-06, + "loss": 0.0086, + "step": 20500 + }, + { + "epoch": 11.03, + "learning_rate": 4.9326582278481015e-06, + "loss": 0.0128, + "step": 20525 + }, + { + "epoch": 11.03, + "learning_rate": 4.926329113924051e-06, + "loss": 0.0101, + "step": 20550 + }, + { + "epoch": 11.03, + "learning_rate": 4.92e-06, + "loss": 0.0132, + "step": 20575 + }, + { + "epoch": 11.03, + "learning_rate": 4.91367088607595e-06, + "loss": 0.0147, + "step": 20600 + }, + { + "epoch": 11.03, + "learning_rate": 4.907341772151899e-06, + "loss": 0.0077, + "step": 20625 + }, + { + "epoch": 11.03, + "learning_rate": 4.901012658227849e-06, + "loss": 0.0125, + "step": 20650 + }, + { + "epoch": 11.04, + "learning_rate": 4.894683544303798e-06, + "loss": 0.0115, + "step": 20675 + }, + { + "epoch": 11.04, + "learning_rate": 4.888354430379747e-06, + "loss": 0.0136, + "step": 20700 + }, + { + "epoch": 11.04, + "learning_rate": 4.882025316455696e-06, + "loss": 0.0098, + "step": 20725 + }, + { + "epoch": 11.04, + "learning_rate": 4.875696202531646e-06, + "loss": 0.0154, + "step": 20750 + }, + { + "epoch": 11.04, + "learning_rate": 4.869367088607596e-06, + "loss": 0.0126, + "step": 20775 + }, + { + "epoch": 11.04, + "learning_rate": 4.863037974683545e-06, + "loss": 0.0116, + "step": 20800 + }, + { + "epoch": 11.04, + "learning_rate": 4.856708860759495e-06, + "loss": 0.0133, + "step": 20825 + }, + { + "epoch": 11.04, + "learning_rate": 4.850379746835443e-06, + "loss": 0.0106, + "step": 20850 + }, + { + "epoch": 11.04, + "learning_rate": 4.844050632911393e-06, + "loss": 0.0151, + "step": 20875 + }, + { + "epoch": 11.04, + "learning_rate": 4.837721518987342e-06, + "loss": 0.0148, + "step": 20900 + }, + { + "epoch": 11.04, + "learning_rate": 4.8313924050632915e-06, + "loss": 0.0173, + "step": 20925 + }, + { + "epoch": 11.04, + "learning_rate": 4.825063291139241e-06, + "loss": 0.0126, + "step": 20950 + }, + { + "epoch": 11.04, + "learning_rate": 4.81873417721519e-06, + "loss": 0.0163, + "step": 20975 + }, + { + "epoch": 11.04, + "learning_rate": 4.81240506329114e-06, + "loss": 0.0203, + "step": 21000 + }, + { + "epoch": 11.04, + "eval_loss": 0.27788785099983215, + "eval_runtime": 1546.852, + "eval_samples_per_second": 6.73, + "eval_steps_per_second": 0.421, + "eval_wer": 25.284963983154196, + "step": 21000 + }, + { + "epoch": 12.0, + "learning_rate": 4.806075949367089e-06, + "loss": 0.0176, + "step": 21025 + }, + { + "epoch": 12.0, + "learning_rate": 4.799746835443039e-06, + "loss": 0.0157, + "step": 21050 + }, + { + "epoch": 12.0, + "learning_rate": 4.793417721518987e-06, + "loss": 0.0192, + "step": 21075 + }, + { + "epoch": 12.0, + "learning_rate": 4.787088607594937e-06, + "loss": 0.0124, + "step": 21100 + }, + { + "epoch": 12.0, + "learning_rate": 4.780759493670886e-06, + "loss": 0.0194, + "step": 21125 + }, + { + "epoch": 12.0, + "learning_rate": 4.774430379746836e-06, + "loss": 0.0138, + "step": 21150 + }, + { + "epoch": 12.0, + "learning_rate": 4.768101265822786e-06, + "loss": 0.0191, + "step": 21175 + }, + { + "epoch": 12.0, + "learning_rate": 4.761772151898735e-06, + "loss": 0.0124, + "step": 21200 + }, + { + "epoch": 12.01, + "learning_rate": 4.755443037974684e-06, + "loss": 0.0175, + "step": 21225 + }, + { + "epoch": 12.01, + "learning_rate": 4.749113924050633e-06, + "loss": 0.0118, + "step": 21250 + }, + { + "epoch": 12.01, + "learning_rate": 4.742784810126583e-06, + "loss": 0.0172, + "step": 21275 + }, + { + "epoch": 12.01, + "learning_rate": 4.736455696202532e-06, + "loss": 0.0136, + "step": 21300 + }, + { + "epoch": 12.01, + "learning_rate": 4.7301265822784815e-06, + "loss": 0.018, + "step": 21325 + }, + { + "epoch": 12.01, + "learning_rate": 4.723797468354431e-06, + "loss": 0.0184, + "step": 21350 + }, + { + "epoch": 12.01, + "learning_rate": 4.71746835443038e-06, + "loss": 0.0166, + "step": 21375 + }, + { + "epoch": 12.01, + "learning_rate": 4.71113924050633e-06, + "loss": 0.0107, + "step": 21400 + }, + { + "epoch": 12.01, + "learning_rate": 4.704810126582279e-06, + "loss": 0.0125, + "step": 21425 + }, + { + "epoch": 12.01, + "learning_rate": 4.698481012658228e-06, + "loss": 0.0098, + "step": 21450 + }, + { + "epoch": 12.01, + "learning_rate": 4.692151898734177e-06, + "loss": 0.0103, + "step": 21475 + }, + { + "epoch": 12.01, + "learning_rate": 4.685822784810127e-06, + "loss": 0.0211, + "step": 21500 + }, + { + "epoch": 12.01, + "learning_rate": 4.679493670886076e-06, + "loss": 0.0125, + "step": 21525 + }, + { + "epoch": 12.01, + "learning_rate": 4.6731645569620255e-06, + "loss": 0.0083, + "step": 21550 + }, + { + "epoch": 12.01, + "learning_rate": 4.666835443037975e-06, + "loss": 0.0156, + "step": 21575 + }, + { + "epoch": 12.01, + "learning_rate": 4.660506329113924e-06, + "loss": 0.0137, + "step": 21600 + }, + { + "epoch": 12.02, + "learning_rate": 4.654177215189874e-06, + "loss": 0.0137, + "step": 21625 + }, + { + "epoch": 12.02, + "learning_rate": 4.647848101265823e-06, + "loss": 0.0092, + "step": 21650 + }, + { + "epoch": 12.02, + "learning_rate": 4.641518987341773e-06, + "loss": 0.0101, + "step": 21675 + }, + { + "epoch": 12.02, + "learning_rate": 4.635189873417722e-06, + "loss": 0.0118, + "step": 21700 + }, + { + "epoch": 12.02, + "learning_rate": 4.6288607594936715e-06, + "loss": 0.0119, + "step": 21725 + }, + { + "epoch": 12.02, + "learning_rate": 4.622531645569621e-06, + "loss": 0.009, + "step": 21750 + }, + { + "epoch": 12.02, + "learning_rate": 4.61620253164557e-06, + "loss": 0.014, + "step": 21775 + }, + { + "epoch": 12.02, + "learning_rate": 4.609873417721519e-06, + "loss": 0.0108, + "step": 21800 + }, + { + "epoch": 12.02, + "learning_rate": 4.603544303797468e-06, + "loss": 0.0117, + "step": 21825 + }, + { + "epoch": 12.02, + "learning_rate": 4.597215189873418e-06, + "loss": 0.0147, + "step": 21850 + }, + { + "epoch": 12.02, + "learning_rate": 4.590886075949367e-06, + "loss": 0.0133, + "step": 21875 + }, + { + "epoch": 12.02, + "learning_rate": 4.584556962025317e-06, + "loss": 0.0131, + "step": 21900 + }, + { + "epoch": 12.02, + "learning_rate": 4.578227848101266e-06, + "loss": 0.0156, + "step": 21925 + }, + { + "epoch": 12.02, + "learning_rate": 4.5718987341772155e-06, + "loss": 0.0211, + "step": 21950 + }, + { + "epoch": 12.02, + "learning_rate": 4.565569620253165e-06, + "loss": 0.0127, + "step": 21975 + }, + { + "epoch": 12.02, + "learning_rate": 4.559240506329114e-06, + "loss": 0.011, + "step": 22000 + }, + { + "epoch": 12.02, + "eval_loss": 0.26152506470680237, + "eval_runtime": 1497.9019, + "eval_samples_per_second": 6.95, + "eval_steps_per_second": 0.435, + "eval_wer": 26.236198962584755, + "step": 22000 + }, + { + "epoch": 12.03, + "learning_rate": 4.552911392405064e-06, + "loss": 0.0164, + "step": 22025 + }, + { + "epoch": 12.03, + "learning_rate": 4.546582278481013e-06, + "loss": 0.01, + "step": 22050 + }, + { + "epoch": 12.03, + "learning_rate": 4.540253164556963e-06, + "loss": 0.0127, + "step": 22075 + }, + { + "epoch": 12.03, + "learning_rate": 4.533924050632912e-06, + "loss": 0.0089, + "step": 22100 + }, + { + "epoch": 12.03, + "learning_rate": 4.5275949367088615e-06, + "loss": 0.0077, + "step": 22125 + }, + { + "epoch": 12.03, + "learning_rate": 4.521265822784811e-06, + "loss": 0.0098, + "step": 22150 + }, + { + "epoch": 12.03, + "learning_rate": 4.5149367088607595e-06, + "loss": 0.0084, + "step": 22175 + }, + { + "epoch": 12.03, + "learning_rate": 4.508607594936709e-06, + "loss": 0.0096, + "step": 22200 + }, + { + "epoch": 12.03, + "learning_rate": 4.502278481012658e-06, + "loss": 0.0097, + "step": 22225 + }, + { + "epoch": 12.03, + "learning_rate": 4.495949367088608e-06, + "loss": 0.0114, + "step": 22250 + }, + { + "epoch": 12.03, + "learning_rate": 4.489620253164557e-06, + "loss": 0.0109, + "step": 22275 + }, + { + "epoch": 12.03, + "learning_rate": 4.483291139240507e-06, + "loss": 0.0096, + "step": 22300 + }, + { + "epoch": 12.03, + "learning_rate": 4.476962025316456e-06, + "loss": 0.0117, + "step": 22325 + }, + { + "epoch": 12.03, + "learning_rate": 4.4706329113924055e-06, + "loss": 0.0105, + "step": 22350 + }, + { + "epoch": 12.03, + "learning_rate": 4.464303797468355e-06, + "loss": 0.0103, + "step": 22375 + }, + { + "epoch": 12.03, + "learning_rate": 4.457974683544304e-06, + "loss": 0.0085, + "step": 22400 + }, + { + "epoch": 12.04, + "learning_rate": 4.451645569620254e-06, + "loss": 0.0129, + "step": 22425 + }, + { + "epoch": 12.04, + "learning_rate": 4.445316455696203e-06, + "loss": 0.0188, + "step": 22450 + }, + { + "epoch": 12.04, + "learning_rate": 4.438987341772153e-06, + "loss": 0.013, + "step": 22475 + }, + { + "epoch": 12.04, + "learning_rate": 4.432658227848102e-06, + "loss": 0.0095, + "step": 22500 + }, + { + "epoch": 12.04, + "learning_rate": 4.4263291139240515e-06, + "loss": 0.0111, + "step": 22525 + }, + { + "epoch": 12.04, + "learning_rate": 4.42e-06, + "loss": 0.0115, + "step": 22550 + }, + { + "epoch": 12.04, + "learning_rate": 4.4136708860759495e-06, + "loss": 0.0096, + "step": 22575 + }, + { + "epoch": 12.04, + "learning_rate": 4.407341772151899e-06, + "loss": 0.0108, + "step": 22600 + }, + { + "epoch": 12.04, + "learning_rate": 4.401012658227848e-06, + "loss": 0.0118, + "step": 22625 + }, + { + "epoch": 12.04, + "learning_rate": 4.394683544303798e-06, + "loss": 0.0136, + "step": 22650 + }, + { + "epoch": 12.04, + "learning_rate": 4.388354430379747e-06, + "loss": 0.0176, + "step": 22675 + }, + { + "epoch": 12.04, + "learning_rate": 4.382025316455697e-06, + "loss": 0.011, + "step": 22700 + }, + { + "epoch": 12.04, + "learning_rate": 4.375696202531646e-06, + "loss": 0.0125, + "step": 22725 + }, + { + "epoch": 12.04, + "learning_rate": 4.3693670886075955e-06, + "loss": 0.0164, + "step": 22750 + }, + { + "epoch": 13.0, + "learning_rate": 4.363037974683544e-06, + "loss": 0.0192, + "step": 22775 + }, + { + "epoch": 13.0, + "learning_rate": 4.356708860759494e-06, + "loss": 0.0145, + "step": 22800 + }, + { + "epoch": 13.0, + "learning_rate": 4.350379746835444e-06, + "loss": 0.0174, + "step": 22825 + }, + { + "epoch": 13.0, + "learning_rate": 4.344050632911393e-06, + "loss": 0.0142, + "step": 22850 + }, + { + "epoch": 13.0, + "learning_rate": 4.337721518987343e-06, + "loss": 0.0228, + "step": 22875 + }, + { + "epoch": 13.0, + "learning_rate": 4.331392405063292e-06, + "loss": 0.0201, + "step": 22900 + }, + { + "epoch": 13.0, + "learning_rate": 4.325063291139241e-06, + "loss": 0.0188, + "step": 22925 + }, + { + "epoch": 13.0, + "learning_rate": 4.31873417721519e-06, + "loss": 0.0185, + "step": 22950 + }, + { + "epoch": 13.01, + "learning_rate": 4.3124050632911395e-06, + "loss": 0.0186, + "step": 22975 + }, + { + "epoch": 13.01, + "learning_rate": 4.306075949367089e-06, + "loss": 0.0191, + "step": 23000 + }, + { + "epoch": 13.01, + "eval_loss": 0.25467297434806824, + "eval_runtime": 1486.1182, + "eval_samples_per_second": 7.005, + "eval_steps_per_second": 0.438, + "eval_wer": 23.982503780549276, + "step": 23000 + }, + { + "epoch": 13.01, + "learning_rate": 4.299746835443038e-06, + "loss": 0.0145, + "step": 23025 + }, + { + "epoch": 13.01, + "learning_rate": 4.293417721518988e-06, + "loss": 0.0106, + "step": 23050 + }, + { + "epoch": 13.01, + "learning_rate": 4.287088607594937e-06, + "loss": 0.0135, + "step": 23075 + }, + { + "epoch": 13.01, + "learning_rate": 4.280759493670887e-06, + "loss": 0.0173, + "step": 23100 + }, + { + "epoch": 13.01, + "learning_rate": 4.274430379746836e-06, + "loss": 0.0126, + "step": 23125 + }, + { + "epoch": 13.01, + "learning_rate": 4.268101265822785e-06, + "loss": 0.0166, + "step": 23150 + }, + { + "epoch": 13.01, + "learning_rate": 4.261772151898734e-06, + "loss": 0.0097, + "step": 23175 + }, + { + "epoch": 13.01, + "learning_rate": 4.255443037974684e-06, + "loss": 0.0094, + "step": 23200 + }, + { + "epoch": 13.01, + "learning_rate": 4.249113924050634e-06, + "loss": 0.012, + "step": 23225 + }, + { + "epoch": 13.01, + "learning_rate": 4.242784810126583e-06, + "loss": 0.0153, + "step": 23250 + }, + { + "epoch": 13.01, + "learning_rate": 4.236455696202532e-06, + "loss": 0.0169, + "step": 23275 + }, + { + "epoch": 13.01, + "learning_rate": 4.230379746835443e-06, + "loss": 0.0097, + "step": 23300 + }, + { + "epoch": 13.01, + "learning_rate": 4.224050632911393e-06, + "loss": 0.0098, + "step": 23325 + }, + { + "epoch": 13.01, + "learning_rate": 4.217721518987342e-06, + "loss": 0.0129, + "step": 23350 + }, + { + "epoch": 13.02, + "learning_rate": 4.2113924050632915e-06, + "loss": 0.0157, + "step": 23375 + }, + { + "epoch": 13.02, + "learning_rate": 4.205063291139241e-06, + "loss": 0.0088, + "step": 23400 + }, + { + "epoch": 13.02, + "learning_rate": 4.19873417721519e-06, + "loss": 0.0105, + "step": 23425 + }, + { + "epoch": 13.02, + "learning_rate": 4.19240506329114e-06, + "loss": 0.0131, + "step": 23450 + }, + { + "epoch": 13.02, + "learning_rate": 4.186075949367089e-06, + "loss": 0.0133, + "step": 23475 + }, + { + "epoch": 13.02, + "learning_rate": 4.179746835443038e-06, + "loss": 0.0054, + "step": 23500 + }, + { + "epoch": 13.02, + "learning_rate": 4.173417721518987e-06, + "loss": 0.01, + "step": 23525 + }, + { + "epoch": 13.02, + "learning_rate": 4.167088607594937e-06, + "loss": 0.013, + "step": 23550 + }, + { + "epoch": 13.02, + "learning_rate": 4.160759493670886e-06, + "loss": 0.0141, + "step": 23575 + }, + { + "epoch": 13.02, + "learning_rate": 4.154430379746836e-06, + "loss": 0.0126, + "step": 23600 + }, + { + "epoch": 13.02, + "learning_rate": 4.148101265822786e-06, + "loss": 0.015, + "step": 23625 + }, + { + "epoch": 13.02, + "learning_rate": 4.141772151898734e-06, + "loss": 0.0164, + "step": 23650 + }, + { + "epoch": 13.02, + "learning_rate": 4.135443037974684e-06, + "loss": 0.0135, + "step": 23675 + }, + { + "epoch": 13.02, + "learning_rate": 4.129113924050633e-06, + "loss": 0.0151, + "step": 23700 + }, + { + "epoch": 13.02, + "learning_rate": 4.122784810126583e-06, + "loss": 0.0157, + "step": 23725 + }, + { + "epoch": 13.02, + "learning_rate": 4.116455696202532e-06, + "loss": 0.0087, + "step": 23750 + }, + { + "epoch": 13.03, + "learning_rate": 4.1101265822784815e-06, + "loss": 0.0097, + "step": 23775 + }, + { + "epoch": 13.03, + "learning_rate": 4.103797468354431e-06, + "loss": 0.0141, + "step": 23800 + }, + { + "epoch": 13.03, + "learning_rate": 4.09746835443038e-06, + "loss": 0.0079, + "step": 23825 + }, + { + "epoch": 13.03, + "learning_rate": 4.09113924050633e-06, + "loss": 0.0116, + "step": 23850 + }, + { + "epoch": 13.03, + "learning_rate": 4.084810126582278e-06, + "loss": 0.0093, + "step": 23875 + }, + { + "epoch": 13.03, + "learning_rate": 4.078481012658228e-06, + "loss": 0.0149, + "step": 23900 + }, + { + "epoch": 13.03, + "learning_rate": 4.072151898734177e-06, + "loss": 0.0125, + "step": 23925 + }, + { + "epoch": 13.03, + "learning_rate": 4.065822784810127e-06, + "loss": 0.0106, + "step": 23950 + }, + { + "epoch": 13.03, + "learning_rate": 4.059493670886076e-06, + "loss": 0.0124, + "step": 23975 + }, + { + "epoch": 13.03, + "learning_rate": 4.0531645569620255e-06, + "loss": 0.0074, + "step": 24000 + }, + { + "epoch": 13.03, + "eval_loss": 0.27036768198013306, + "eval_runtime": 1458.9195, + "eval_samples_per_second": 7.136, + "eval_steps_per_second": 0.446, + "eval_wer": 24.153238264036816, + "step": 24000 + }, + { + "epoch": 13.03, + "learning_rate": 4.046835443037975e-06, + "loss": 0.0128, + "step": 24025 + }, + { + "epoch": 13.03, + "learning_rate": 4.040506329113924e-06, + "loss": 0.012, + "step": 24050 + }, + { + "epoch": 13.03, + "learning_rate": 4.034177215189874e-06, + "loss": 0.0098, + "step": 24075 + }, + { + "epoch": 13.03, + "learning_rate": 4.027848101265823e-06, + "loss": 0.0091, + "step": 24100 + }, + { + "epoch": 13.03, + "learning_rate": 4.021518987341773e-06, + "loss": 0.0077, + "step": 24125 + }, + { + "epoch": 13.03, + "learning_rate": 4.015189873417722e-06, + "loss": 0.0135, + "step": 24150 + }, + { + "epoch": 13.04, + "learning_rate": 4.0088607594936715e-06, + "loss": 0.0124, + "step": 24175 + }, + { + "epoch": 13.04, + "learning_rate": 4.002531645569621e-06, + "loss": 0.0152, + "step": 24200 + }, + { + "epoch": 13.04, + "learning_rate": 3.99620253164557e-06, + "loss": 0.0127, + "step": 24225 + }, + { + "epoch": 13.04, + "learning_rate": 3.989873417721519e-06, + "loss": 0.0066, + "step": 24250 + }, + { + "epoch": 13.04, + "learning_rate": 3.983544303797468e-06, + "loss": 0.0163, + "step": 24275 + }, + { + "epoch": 13.04, + "learning_rate": 3.977215189873418e-06, + "loss": 0.0095, + "step": 24300 + }, + { + "epoch": 13.04, + "learning_rate": 3.970886075949367e-06, + "loss": 0.0103, + "step": 24325 + }, + { + "epoch": 13.04, + "learning_rate": 3.964556962025317e-06, + "loss": 0.0112, + "step": 24350 + }, + { + "epoch": 13.04, + "learning_rate": 3.958227848101266e-06, + "loss": 0.01, + "step": 24375 + }, + { + "epoch": 13.04, + "learning_rate": 3.9518987341772155e-06, + "loss": 0.0131, + "step": 24400 + }, + { + "epoch": 13.04, + "learning_rate": 3.945569620253165e-06, + "loss": 0.0143, + "step": 24425 + }, + { + "epoch": 13.04, + "learning_rate": 3.939240506329114e-06, + "loss": 0.0151, + "step": 24450 + }, + { + "epoch": 13.04, + "learning_rate": 3.932911392405064e-06, + "loss": 0.0129, + "step": 24475 + }, + { + "epoch": 13.04, + "learning_rate": 3.926582278481013e-06, + "loss": 0.0125, + "step": 24500 + }, + { + "epoch": 14.0, + "learning_rate": 3.920506329113925e-06, + "loss": 0.0165, + "step": 24525 + }, + { + "epoch": 14.0, + "learning_rate": 3.914177215189874e-06, + "loss": 0.0114, + "step": 24550 + }, + { + "epoch": 14.0, + "learning_rate": 3.9078481012658235e-06, + "loss": 0.0111, + "step": 24575 + }, + { + "epoch": 14.0, + "learning_rate": 3.901518987341772e-06, + "loss": 0.0134, + "step": 24600 + }, + { + "epoch": 14.0, + "learning_rate": 3.8951898734177215e-06, + "loss": 0.0138, + "step": 24625 + }, + { + "epoch": 14.0, + "learning_rate": 3.888860759493671e-06, + "loss": 0.0151, + "step": 24650 + }, + { + "epoch": 14.0, + "learning_rate": 3.88253164556962e-06, + "loss": 0.0101, + "step": 24675 + }, + { + "epoch": 14.0, + "learning_rate": 3.87620253164557e-06, + "loss": 0.0108, + "step": 24700 + }, + { + "epoch": 14.01, + "learning_rate": 3.869873417721519e-06, + "loss": 0.0151, + "step": 24725 + }, + { + "epoch": 14.01, + "learning_rate": 3.863544303797469e-06, + "loss": 0.0132, + "step": 24750 + }, + { + "epoch": 14.01, + "learning_rate": 3.857215189873418e-06, + "loss": 0.012, + "step": 24775 + }, + { + "epoch": 14.01, + "learning_rate": 3.8508860759493675e-06, + "loss": 0.017, + "step": 24800 + }, + { + "epoch": 14.01, + "learning_rate": 3.844556962025317e-06, + "loss": 0.0141, + "step": 24825 + }, + { + "epoch": 14.01, + "learning_rate": 3.838227848101266e-06, + "loss": 0.0166, + "step": 24850 + }, + { + "epoch": 14.01, + "learning_rate": 3.831898734177216e-06, + "loss": 0.0112, + "step": 24875 + }, + { + "epoch": 14.01, + "learning_rate": 3.825569620253165e-06, + "loss": 0.0097, + "step": 24900 + }, + { + "epoch": 14.01, + "learning_rate": 3.819240506329115e-06, + "loss": 0.0138, + "step": 24925 + }, + { + "epoch": 14.01, + "learning_rate": 3.8129113924050636e-06, + "loss": 0.0069, + "step": 24950 + }, + { + "epoch": 14.01, + "learning_rate": 3.806582278481013e-06, + "loss": 0.0111, + "step": 24975 + }, + { + "epoch": 14.01, + "learning_rate": 3.8002531645569625e-06, + "loss": 0.0149, + "step": 25000 + }, + { + "epoch": 14.01, + "eval_loss": 0.2640444040298462, + "eval_runtime": 1478.4977, + "eval_samples_per_second": 7.042, + "eval_steps_per_second": 0.44, + "eval_wer": 24.063805915543345, + "step": 25000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.153937581719552e+20, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-25000/training_args.bin b/checkpoint-25000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-25000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-26000/config.json b/checkpoint-26000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-26000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-26000/generation_config.json b/checkpoint-26000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-26000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-26000/optimizer.pt b/checkpoint-26000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5362b0b9b060d3521abd7e0cf5faf2d5fd616c67 --- /dev/null +++ b/checkpoint-26000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79f57ce2dbb87f694c8c2911ad3f2f3f2bf1703f8a1027f09168ada13c2c6de0 +size 1934161093 diff --git a/checkpoint-26000/preprocessor_config.json b/checkpoint-26000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-26000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-26000/pytorch_model.bin b/checkpoint-26000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..b39b592530660468699a784216107edd0fccced2 --- /dev/null +++ b/checkpoint-26000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9c471e31e40b1b31c3e3bf1f575459fc474a7bc9d64aba667d8c9f757dfa5a1 +size 967102601 diff --git a/checkpoint-26000/rng_state.pth b/checkpoint-26000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..03549dfa9d7df3d50589716868a3f5383be426e3 --- /dev/null +++ b/checkpoint-26000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bde7db5aa3266ef58b6fc439c2e8c1faa9009c34717e10deed197847b2bfe01f +size 14575 diff --git a/checkpoint-26000/scaler.pt b/checkpoint-26000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..28c5c7d3764ee6771eb6a3f5babded6351adbcf6 --- /dev/null +++ b/checkpoint-26000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73a4f480e066eab92f45ed81531976e87c42510980654ae79c1db08a9dbbdda7 +size 557 diff --git a/checkpoint-26000/scheduler.pt b/checkpoint-26000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..298d37c067a0f7428d60066c34bd1e085a520e39 --- /dev/null +++ b/checkpoint-26000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:494d47d142714a1830c15434d8d0b39a40254565fe740ab488c8d25b13a5b7fe +size 627 diff --git a/checkpoint-26000/trainer_state.json b/checkpoint-26000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..477dbb44cfba1a31d93020e666e0dd1b7702a300 --- /dev/null +++ b/checkpoint-26000/trainer_state.json @@ -0,0 +1,6490 @@ +{ + "best_metric": 23.982503780549276, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-23000", + "epoch": 14.03715, + "global_step": 26000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + }, + { + "epoch": 7.02, + "learning_rate": 6.830632911392405e-06, + "loss": 0.0159, + "step": 13025 + }, + { + "epoch": 7.02, + "learning_rate": 6.824303797468354e-06, + "loss": 0.0186, + "step": 13050 + }, + { + "epoch": 7.02, + "learning_rate": 6.817974683544304e-06, + "loss": 0.0246, + "step": 13075 + }, + { + "epoch": 7.02, + "learning_rate": 6.811645569620253e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 7.02, + "learning_rate": 6.805316455696203e-06, + "loss": 0.0287, + "step": 13125 + }, + { + "epoch": 7.02, + "learning_rate": 6.798987341772152e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 7.02, + "learning_rate": 6.792658227848102e-06, + "loss": 0.035, + "step": 13175 + }, + { + "epoch": 7.02, + "learning_rate": 6.786329113924052e-06, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 7.02, + "learning_rate": 6.780000000000001e-06, + "loss": 0.0283, + "step": 13225 + }, + { + "epoch": 7.02, + "learning_rate": 6.773670886075951e-06, + "loss": 0.0203, + "step": 13250 + }, + { + "epoch": 7.03, + "learning_rate": 6.7673417721519e-06, + "loss": 0.0193, + "step": 13275 + }, + { + "epoch": 7.03, + "learning_rate": 6.7610126582278494e-06, + "loss": 0.0174, + "step": 13300 + }, + { + "epoch": 7.03, + "learning_rate": 6.754683544303798e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 7.03, + "learning_rate": 6.7483544303797474e-06, + "loss": 0.023, + "step": 13350 + }, + { + "epoch": 7.03, + "learning_rate": 6.742025316455697e-06, + "loss": 0.0266, + "step": 13375 + }, + { + "epoch": 7.03, + "learning_rate": 6.735696202531646e-06, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 7.03, + "learning_rate": 6.729367088607596e-06, + "loss": 0.0164, + "step": 13425 + }, + { + "epoch": 7.03, + "learning_rate": 6.723037974683545e-06, + "loss": 0.019, + "step": 13450 + }, + { + "epoch": 7.03, + "learning_rate": 6.716708860759495e-06, + "loss": 0.0209, + "step": 13475 + }, + { + "epoch": 7.03, + "learning_rate": 6.710379746835444e-06, + "loss": 0.0241, + "step": 13500 + }, + { + "epoch": 7.03, + "learning_rate": 6.7040506329113934e-06, + "loss": 0.0233, + "step": 13525 + }, + { + "epoch": 7.03, + "learning_rate": 6.697721518987342e-06, + "loss": 0.0204, + "step": 13550 + }, + { + "epoch": 7.03, + "learning_rate": 6.6913924050632914e-06, + "loss": 0.0131, + "step": 13575 + }, + { + "epoch": 7.03, + "learning_rate": 6.685063291139241e-06, + "loss": 0.0268, + "step": 13600 + }, + { + "epoch": 7.03, + "learning_rate": 6.67873417721519e-06, + "loss": 0.0156, + "step": 13625 + }, + { + "epoch": 7.03, + "learning_rate": 6.67240506329114e-06, + "loss": 0.0205, + "step": 13650 + }, + { + "epoch": 7.04, + "learning_rate": 6.666075949367089e-06, + "loss": 0.0217, + "step": 13675 + }, + { + "epoch": 7.04, + "learning_rate": 6.659746835443039e-06, + "loss": 0.0258, + "step": 13700 + }, + { + "epoch": 7.04, + "learning_rate": 6.653417721518988e-06, + "loss": 0.0211, + "step": 13725 + }, + { + "epoch": 7.04, + "learning_rate": 6.647088607594937e-06, + "loss": 0.0188, + "step": 13750 + }, + { + "epoch": 7.04, + "learning_rate": 6.640759493670886e-06, + "loss": 0.0234, + "step": 13775 + }, + { + "epoch": 7.04, + "learning_rate": 6.6344303797468355e-06, + "loss": 0.0202, + "step": 13800 + }, + { + "epoch": 7.04, + "learning_rate": 6.628101265822785e-06, + "loss": 0.0251, + "step": 13825 + }, + { + "epoch": 7.04, + "learning_rate": 6.621772151898734e-06, + "loss": 0.017, + "step": 13850 + }, + { + "epoch": 7.04, + "learning_rate": 6.615443037974684e-06, + "loss": 0.0254, + "step": 13875 + }, + { + "epoch": 7.04, + "learning_rate": 6.609113924050633e-06, + "loss": 0.0289, + "step": 13900 + }, + { + "epoch": 7.04, + "learning_rate": 6.602784810126583e-06, + "loss": 0.0238, + "step": 13925 + }, + { + "epoch": 7.04, + "learning_rate": 6.596455696202532e-06, + "loss": 0.0218, + "step": 13950 + }, + { + "epoch": 7.04, + "learning_rate": 6.590126582278481e-06, + "loss": 0.0289, + "step": 13975 + }, + { + "epoch": 7.04, + "learning_rate": 6.583797468354432e-06, + "loss": 0.0271, + "step": 14000 + }, + { + "epoch": 7.04, + "eval_loss": 0.26684999465942383, + "eval_runtime": 1448.0228, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "eval_wer": 42.464105107400115, + "step": 14000 + }, + { + "epoch": 8.0, + "learning_rate": 6.577468354430381e-06, + "loss": 0.0292, + "step": 14025 + }, + { + "epoch": 8.0, + "learning_rate": 6.57113924050633e-06, + "loss": 0.0192, + "step": 14050 + }, + { + "epoch": 8.0, + "learning_rate": 6.564810126582279e-06, + "loss": 0.027, + "step": 14075 + }, + { + "epoch": 8.0, + "learning_rate": 6.558481012658229e-06, + "loss": 0.03, + "step": 14100 + }, + { + "epoch": 8.0, + "learning_rate": 6.552151898734178e-06, + "loss": 0.0375, + "step": 14125 + }, + { + "epoch": 8.0, + "learning_rate": 6.5458227848101274e-06, + "loss": 0.0338, + "step": 14150 + }, + { + "epoch": 8.0, + "learning_rate": 6.539493670886077e-06, + "loss": 0.0305, + "step": 14175 + }, + { + "epoch": 8.0, + "learning_rate": 6.533164556962026e-06, + "loss": 0.0266, + "step": 14200 + }, + { + "epoch": 8.01, + "learning_rate": 6.526835443037976e-06, + "loss": 0.0378, + "step": 14225 + }, + { + "epoch": 8.01, + "learning_rate": 6.520506329113925e-06, + "loss": 0.0256, + "step": 14250 + }, + { + "epoch": 8.01, + "learning_rate": 6.514177215189874e-06, + "loss": 0.0203, + "step": 14275 + }, + { + "epoch": 8.01, + "learning_rate": 6.507848101265823e-06, + "loss": 0.0212, + "step": 14300 + }, + { + "epoch": 8.01, + "learning_rate": 6.501518987341773e-06, + "loss": 0.0204, + "step": 14325 + }, + { + "epoch": 8.01, + "learning_rate": 6.495189873417722e-06, + "loss": 0.0259, + "step": 14350 + }, + { + "epoch": 8.01, + "learning_rate": 6.4888607594936714e-06, + "loss": 0.0214, + "step": 14375 + }, + { + "epoch": 8.01, + "learning_rate": 6.482531645569621e-06, + "loss": 0.021, + "step": 14400 + }, + { + "epoch": 8.01, + "learning_rate": 6.47620253164557e-06, + "loss": 0.0241, + "step": 14425 + }, + { + "epoch": 8.01, + "learning_rate": 6.46987341772152e-06, + "loss": 0.0216, + "step": 14450 + }, + { + "epoch": 8.01, + "learning_rate": 6.463544303797469e-06, + "loss": 0.0209, + "step": 14475 + }, + { + "epoch": 8.01, + "learning_rate": 6.457215189873418e-06, + "loss": 0.0292, + "step": 14500 + }, + { + "epoch": 8.01, + "learning_rate": 6.450886075949367e-06, + "loss": 0.0286, + "step": 14525 + }, + { + "epoch": 8.01, + "learning_rate": 6.444556962025317e-06, + "loss": 0.0218, + "step": 14550 + }, + { + "epoch": 8.01, + "learning_rate": 6.438227848101266e-06, + "loss": 0.0209, + "step": 14575 + }, + { + "epoch": 8.01, + "learning_rate": 6.4318987341772154e-06, + "loss": 0.0244, + "step": 14600 + }, + { + "epoch": 8.02, + "learning_rate": 6.425569620253165e-06, + "loss": 0.0212, + "step": 14625 + }, + { + "epoch": 8.02, + "learning_rate": 6.419240506329114e-06, + "loss": 0.0155, + "step": 14650 + }, + { + "epoch": 8.02, + "learning_rate": 6.412911392405064e-06, + "loss": 0.0209, + "step": 14675 + }, + { + "epoch": 8.02, + "learning_rate": 6.406582278481013e-06, + "loss": 0.0203, + "step": 14700 + }, + { + "epoch": 8.02, + "learning_rate": 6.400253164556962e-06, + "loss": 0.0192, + "step": 14725 + }, + { + "epoch": 8.02, + "learning_rate": 6.393924050632911e-06, + "loss": 0.0216, + "step": 14750 + }, + { + "epoch": 8.02, + "learning_rate": 6.387594936708861e-06, + "loss": 0.012, + "step": 14775 + }, + { + "epoch": 8.02, + "learning_rate": 6.38126582278481e-06, + "loss": 0.0241, + "step": 14800 + }, + { + "epoch": 8.02, + "learning_rate": 6.37493670886076e-06, + "loss": 0.0192, + "step": 14825 + }, + { + "epoch": 8.02, + "learning_rate": 6.36860759493671e-06, + "loss": 0.0202, + "step": 14850 + }, + { + "epoch": 8.02, + "learning_rate": 6.362278481012659e-06, + "loss": 0.0257, + "step": 14875 + }, + { + "epoch": 8.02, + "learning_rate": 6.3559493670886086e-06, + "loss": 0.0218, + "step": 14900 + }, + { + "epoch": 8.02, + "learning_rate": 6.349620253164558e-06, + "loss": 0.0232, + "step": 14925 + }, + { + "epoch": 8.02, + "learning_rate": 6.3432911392405074e-06, + "loss": 0.03, + "step": 14950 + }, + { + "epoch": 8.02, + "learning_rate": 6.337215189873418e-06, + "loss": 0.0252, + "step": 14975 + }, + { + "epoch": 8.02, + "learning_rate": 6.3308860759493675e-06, + "loss": 0.0176, + "step": 15000 + }, + { + "epoch": 8.02, + "eval_loss": 0.2596448063850403, + "eval_runtime": 1417.6481, + "eval_samples_per_second": 7.344, + "eval_steps_per_second": 0.459, + "eval_wer": 28.52241499861786, + "step": 15000 + }, + { + "epoch": 8.03, + "learning_rate": 6.324556962025317e-06, + "loss": 0.0166, + "step": 15025 + }, + { + "epoch": 8.03, + "learning_rate": 6.318227848101266e-06, + "loss": 0.017, + "step": 15050 + }, + { + "epoch": 8.03, + "learning_rate": 6.311898734177216e-06, + "loss": 0.0291, + "step": 15075 + }, + { + "epoch": 8.03, + "learning_rate": 6.305569620253164e-06, + "loss": 0.0149, + "step": 15100 + }, + { + "epoch": 8.03, + "learning_rate": 6.299240506329114e-06, + "loss": 0.0223, + "step": 15125 + }, + { + "epoch": 8.03, + "learning_rate": 6.292911392405063e-06, + "loss": 0.0212, + "step": 15150 + }, + { + "epoch": 8.03, + "learning_rate": 6.286582278481013e-06, + "loss": 0.0125, + "step": 15175 + }, + { + "epoch": 8.03, + "learning_rate": 6.280253164556962e-06, + "loss": 0.0169, + "step": 15200 + }, + { + "epoch": 8.03, + "learning_rate": 6.273924050632912e-06, + "loss": 0.0244, + "step": 15225 + }, + { + "epoch": 8.03, + "learning_rate": 6.267594936708862e-06, + "loss": 0.015, + "step": 15250 + }, + { + "epoch": 8.03, + "learning_rate": 6.261265822784811e-06, + "loss": 0.0171, + "step": 15275 + }, + { + "epoch": 8.03, + "learning_rate": 6.254936708860761e-06, + "loss": 0.017, + "step": 15300 + }, + { + "epoch": 8.03, + "learning_rate": 6.24860759493671e-06, + "loss": 0.0162, + "step": 15325 + }, + { + "epoch": 8.03, + "learning_rate": 6.2422784810126594e-06, + "loss": 0.0133, + "step": 15350 + }, + { + "epoch": 8.03, + "learning_rate": 6.235949367088608e-06, + "loss": 0.0139, + "step": 15375 + }, + { + "epoch": 8.03, + "learning_rate": 6.2296202531645575e-06, + "loss": 0.0177, + "step": 15400 + }, + { + "epoch": 8.04, + "learning_rate": 6.223291139240507e-06, + "loss": 0.0207, + "step": 15425 + }, + { + "epoch": 8.04, + "learning_rate": 6.216962025316456e-06, + "loss": 0.0192, + "step": 15450 + }, + { + "epoch": 8.04, + "learning_rate": 6.210632911392406e-06, + "loss": 0.0176, + "step": 15475 + }, + { + "epoch": 8.04, + "learning_rate": 6.204303797468355e-06, + "loss": 0.0133, + "step": 15500 + }, + { + "epoch": 8.04, + "learning_rate": 6.197974683544305e-06, + "loss": 0.0199, + "step": 15525 + }, + { + "epoch": 8.04, + "learning_rate": 6.191645569620254e-06, + "loss": 0.0135, + "step": 15550 + }, + { + "epoch": 8.04, + "learning_rate": 6.1853164556962035e-06, + "loss": 0.0121, + "step": 15575 + }, + { + "epoch": 8.04, + "learning_rate": 6.178987341772152e-06, + "loss": 0.0152, + "step": 15600 + }, + { + "epoch": 8.04, + "learning_rate": 6.1726582278481015e-06, + "loss": 0.0201, + "step": 15625 + }, + { + "epoch": 8.04, + "learning_rate": 6.166329113924051e-06, + "loss": 0.0226, + "step": 15650 + }, + { + "epoch": 8.04, + "learning_rate": 6.16e-06, + "loss": 0.0222, + "step": 15675 + }, + { + "epoch": 8.04, + "learning_rate": 6.15367088607595e-06, + "loss": 0.0184, + "step": 15700 + }, + { + "epoch": 8.04, + "learning_rate": 6.147341772151899e-06, + "loss": 0.0219, + "step": 15725 + }, + { + "epoch": 8.04, + "learning_rate": 6.141012658227849e-06, + "loss": 0.0219, + "step": 15750 + }, + { + "epoch": 9.0, + "learning_rate": 6.134683544303798e-06, + "loss": 0.0208, + "step": 15775 + }, + { + "epoch": 9.0, + "learning_rate": 6.1283544303797475e-06, + "loss": 0.0143, + "step": 15800 + }, + { + "epoch": 9.0, + "learning_rate": 6.122025316455696e-06, + "loss": 0.0254, + "step": 15825 + }, + { + "epoch": 9.0, + "learning_rate": 6.1156962025316455e-06, + "loss": 0.0287, + "step": 15850 + }, + { + "epoch": 9.0, + "learning_rate": 6.109367088607595e-06, + "loss": 0.0252, + "step": 15875 + }, + { + "epoch": 9.0, + "learning_rate": 6.103037974683544e-06, + "loss": 0.0275, + "step": 15900 + }, + { + "epoch": 9.0, + "learning_rate": 6.096708860759494e-06, + "loss": 0.0193, + "step": 15925 + }, + { + "epoch": 9.0, + "learning_rate": 6.090379746835443e-06, + "loss": 0.0255, + "step": 15950 + }, + { + "epoch": 9.01, + "learning_rate": 6.084050632911393e-06, + "loss": 0.0248, + "step": 15975 + }, + { + "epoch": 9.01, + "learning_rate": 6.077721518987342e-06, + "loss": 0.0328, + "step": 16000 + }, + { + "epoch": 9.01, + "eval_loss": 0.2512108087539673, + "eval_runtime": 1474.2883, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.442, + "eval_wer": 35.66236849379665, + "step": 16000 + }, + { + "epoch": 9.01, + "learning_rate": 6.0713924050632915e-06, + "loss": 0.0181, + "step": 16025 + }, + { + "epoch": 9.01, + "learning_rate": 6.065063291139242e-06, + "loss": 0.0134, + "step": 16050 + }, + { + "epoch": 9.01, + "learning_rate": 6.058734177215191e-06, + "loss": 0.0203, + "step": 16075 + }, + { + "epoch": 9.01, + "learning_rate": 6.052405063291141e-06, + "loss": 0.0224, + "step": 16100 + }, + { + "epoch": 9.01, + "learning_rate": 6.046075949367089e-06, + "loss": 0.0164, + "step": 16125 + }, + { + "epoch": 9.01, + "learning_rate": 6.039746835443039e-06, + "loss": 0.019, + "step": 16150 + }, + { + "epoch": 9.01, + "learning_rate": 6.033417721518988e-06, + "loss": 0.0152, + "step": 16175 + }, + { + "epoch": 9.01, + "learning_rate": 6.0270886075949374e-06, + "loss": 0.0197, + "step": 16200 + }, + { + "epoch": 9.01, + "learning_rate": 6.020759493670887e-06, + "loss": 0.0188, + "step": 16225 + }, + { + "epoch": 9.01, + "learning_rate": 6.014430379746836e-06, + "loss": 0.0245, + "step": 16250 + }, + { + "epoch": 9.01, + "learning_rate": 6.008101265822786e-06, + "loss": 0.0202, + "step": 16275 + }, + { + "epoch": 9.01, + "learning_rate": 6.001772151898735e-06, + "loss": 0.0184, + "step": 16300 + }, + { + "epoch": 9.01, + "learning_rate": 5.995443037974685e-06, + "loss": 0.0158, + "step": 16325 + }, + { + "epoch": 9.01, + "learning_rate": 5.989113924050633e-06, + "loss": 0.0175, + "step": 16350 + }, + { + "epoch": 9.02, + "learning_rate": 5.982784810126583e-06, + "loss": 0.014, + "step": 16375 + }, + { + "epoch": 9.02, + "learning_rate": 5.976455696202532e-06, + "loss": 0.0149, + "step": 16400 + }, + { + "epoch": 9.02, + "learning_rate": 5.9701265822784815e-06, + "loss": 0.0188, + "step": 16425 + }, + { + "epoch": 9.02, + "learning_rate": 5.963797468354431e-06, + "loss": 0.0215, + "step": 16450 + }, + { + "epoch": 9.02, + "learning_rate": 5.95746835443038e-06, + "loss": 0.017, + "step": 16475 + }, + { + "epoch": 9.02, + "learning_rate": 5.95113924050633e-06, + "loss": 0.0095, + "step": 16500 + }, + { + "epoch": 9.02, + "learning_rate": 5.944810126582279e-06, + "loss": 0.0112, + "step": 16525 + }, + { + "epoch": 9.02, + "learning_rate": 5.938481012658229e-06, + "loss": 0.0198, + "step": 16550 + }, + { + "epoch": 9.02, + "learning_rate": 5.932151898734177e-06, + "loss": 0.0252, + "step": 16575 + }, + { + "epoch": 9.02, + "learning_rate": 5.925822784810127e-06, + "loss": 0.0226, + "step": 16600 + }, + { + "epoch": 9.02, + "learning_rate": 5.919493670886076e-06, + "loss": 0.0167, + "step": 16625 + }, + { + "epoch": 9.02, + "learning_rate": 5.9131645569620255e-06, + "loss": 0.0259, + "step": 16650 + }, + { + "epoch": 9.02, + "learning_rate": 5.906835443037975e-06, + "loss": 0.0229, + "step": 16675 + }, + { + "epoch": 9.02, + "learning_rate": 5.900506329113924e-06, + "loss": 0.0238, + "step": 16700 + }, + { + "epoch": 9.02, + "learning_rate": 5.894177215189874e-06, + "loss": 0.02, + "step": 16725 + }, + { + "epoch": 9.02, + "learning_rate": 5.887848101265823e-06, + "loss": 0.0182, + "step": 16750 + }, + { + "epoch": 9.03, + "learning_rate": 5.881518987341773e-06, + "loss": 0.0157, + "step": 16775 + }, + { + "epoch": 9.03, + "learning_rate": 5.875189873417721e-06, + "loss": 0.0145, + "step": 16800 + }, + { + "epoch": 9.03, + "learning_rate": 5.868860759493671e-06, + "loss": 0.0166, + "step": 16825 + }, + { + "epoch": 9.03, + "learning_rate": 5.86253164556962e-06, + "loss": 0.0108, + "step": 16850 + }, + { + "epoch": 9.03, + "learning_rate": 5.85620253164557e-06, + "loss": 0.014, + "step": 16875 + }, + { + "epoch": 9.03, + "learning_rate": 5.84987341772152e-06, + "loss": 0.0167, + "step": 16900 + }, + { + "epoch": 9.03, + "learning_rate": 5.843544303797469e-06, + "loss": 0.0121, + "step": 16925 + }, + { + "epoch": 9.03, + "learning_rate": 5.837215189873419e-06, + "loss": 0.0149, + "step": 16950 + }, + { + "epoch": 9.03, + "learning_rate": 5.831139240506329e-06, + "loss": 0.0143, + "step": 16975 + }, + { + "epoch": 9.03, + "learning_rate": 5.824810126582279e-06, + "loss": 0.0139, + "step": 17000 + }, + { + "epoch": 9.03, + "eval_loss": 0.2688957154750824, + "eval_runtime": 1423.1196, + "eval_samples_per_second": 7.316, + "eval_steps_per_second": 0.457, + "eval_wer": 25.920746678807788, + "step": 17000 + }, + { + "epoch": 9.03, + "learning_rate": 5.818481012658228e-06, + "loss": 0.0169, + "step": 17025 + }, + { + "epoch": 9.03, + "learning_rate": 5.8121518987341775e-06, + "loss": 0.013, + "step": 17050 + }, + { + "epoch": 9.03, + "learning_rate": 5.805822784810127e-06, + "loss": 0.0123, + "step": 17075 + }, + { + "epoch": 9.03, + "learning_rate": 5.799493670886076e-06, + "loss": 0.0161, + "step": 17100 + }, + { + "epoch": 9.03, + "learning_rate": 5.793164556962026e-06, + "loss": 0.0114, + "step": 17125 + }, + { + "epoch": 9.03, + "learning_rate": 5.786835443037974e-06, + "loss": 0.0194, + "step": 17150 + }, + { + "epoch": 9.04, + "learning_rate": 5.780506329113924e-06, + "loss": 0.0179, + "step": 17175 + }, + { + "epoch": 9.04, + "learning_rate": 5.774177215189873e-06, + "loss": 0.0165, + "step": 17200 + }, + { + "epoch": 9.04, + "learning_rate": 5.767848101265823e-06, + "loss": 0.0165, + "step": 17225 + }, + { + "epoch": 9.04, + "learning_rate": 5.761518987341773e-06, + "loss": 0.0125, + "step": 17250 + }, + { + "epoch": 9.04, + "learning_rate": 5.755189873417722e-06, + "loss": 0.0172, + "step": 17275 + }, + { + "epoch": 9.04, + "learning_rate": 5.748860759493672e-06, + "loss": 0.0158, + "step": 17300 + }, + { + "epoch": 9.04, + "learning_rate": 5.742531645569621e-06, + "loss": 0.0174, + "step": 17325 + }, + { + "epoch": 9.04, + "learning_rate": 5.736202531645571e-06, + "loss": 0.0162, + "step": 17350 + }, + { + "epoch": 9.04, + "learning_rate": 5.72987341772152e-06, + "loss": 0.0203, + "step": 17375 + }, + { + "epoch": 9.04, + "learning_rate": 5.7235443037974695e-06, + "loss": 0.0157, + "step": 17400 + }, + { + "epoch": 9.04, + "learning_rate": 5.717215189873419e-06, + "loss": 0.018, + "step": 17425 + }, + { + "epoch": 9.04, + "learning_rate": 5.7108860759493675e-06, + "loss": 0.0199, + "step": 17450 + }, + { + "epoch": 9.04, + "learning_rate": 5.704556962025317e-06, + "loss": 0.0153, + "step": 17475 + }, + { + "epoch": 9.04, + "learning_rate": 5.698227848101266e-06, + "loss": 0.0234, + "step": 17500 + }, + { + "epoch": 10.0, + "learning_rate": 5.691898734177216e-06, + "loss": 0.0168, + "step": 17525 + }, + { + "epoch": 10.0, + "learning_rate": 5.685569620253165e-06, + "loss": 0.0214, + "step": 17550 + }, + { + "epoch": 10.0, + "learning_rate": 5.679240506329115e-06, + "loss": 0.0185, + "step": 17575 + }, + { + "epoch": 10.0, + "learning_rate": 5.672911392405064e-06, + "loss": 0.0228, + "step": 17600 + }, + { + "epoch": 10.0, + "learning_rate": 5.6665822784810135e-06, + "loss": 0.024, + "step": 17625 + }, + { + "epoch": 10.0, + "learning_rate": 5.660253164556963e-06, + "loss": 0.0235, + "step": 17650 + }, + { + "epoch": 10.0, + "learning_rate": 5.6539240506329115e-06, + "loss": 0.0262, + "step": 17675 + }, + { + "epoch": 10.0, + "learning_rate": 5.647594936708861e-06, + "loss": 0.0214, + "step": 17700 + }, + { + "epoch": 10.01, + "learning_rate": 5.64126582278481e-06, + "loss": 0.0279, + "step": 17725 + }, + { + "epoch": 10.01, + "learning_rate": 5.63493670886076e-06, + "loss": 0.0248, + "step": 17750 + }, + { + "epoch": 10.01, + "learning_rate": 5.628607594936709e-06, + "loss": 0.0155, + "step": 17775 + }, + { + "epoch": 10.01, + "learning_rate": 5.622278481012659e-06, + "loss": 0.017, + "step": 17800 + }, + { + "epoch": 10.01, + "learning_rate": 5.615949367088608e-06, + "loss": 0.0205, + "step": 17825 + }, + { + "epoch": 10.01, + "learning_rate": 5.6096202531645575e-06, + "loss": 0.021, + "step": 17850 + }, + { + "epoch": 10.01, + "learning_rate": 5.603291139240507e-06, + "loss": 0.0178, + "step": 17875 + }, + { + "epoch": 10.01, + "learning_rate": 5.5969620253164555e-06, + "loss": 0.0169, + "step": 17900 + }, + { + "epoch": 10.01, + "learning_rate": 5.590632911392405e-06, + "loss": 0.0158, + "step": 17925 + }, + { + "epoch": 10.01, + "learning_rate": 5.584303797468354e-06, + "loss": 0.0195, + "step": 17950 + }, + { + "epoch": 10.01, + "learning_rate": 5.577974683544304e-06, + "loss": 0.0195, + "step": 17975 + }, + { + "epoch": 10.01, + "learning_rate": 5.571645569620253e-06, + "loss": 0.0187, + "step": 18000 + }, + { + "epoch": 10.01, + "eval_loss": 0.2605174481868744, + "eval_runtime": 1553.7854, + "eval_samples_per_second": 6.7, + "eval_steps_per_second": 0.419, + "eval_wer": 25.330493178750874, + "step": 18000 + }, + { + "epoch": 10.01, + "learning_rate": 5.565316455696203e-06, + "loss": 0.0214, + "step": 18025 + }, + { + "epoch": 10.01, + "learning_rate": 5.558987341772152e-06, + "loss": 0.0193, + "step": 18050 + }, + { + "epoch": 10.01, + "learning_rate": 5.552658227848102e-06, + "loss": 0.0168, + "step": 18075 + }, + { + "epoch": 10.01, + "learning_rate": 5.546329113924052e-06, + "loss": 0.0147, + "step": 18100 + }, + { + "epoch": 10.02, + "learning_rate": 5.540000000000001e-06, + "loss": 0.0192, + "step": 18125 + }, + { + "epoch": 10.02, + "learning_rate": 5.533670886075951e-06, + "loss": 0.014, + "step": 18150 + }, + { + "epoch": 10.02, + "learning_rate": 5.527341772151899e-06, + "loss": 0.0163, + "step": 18175 + }, + { + "epoch": 10.02, + "learning_rate": 5.521012658227849e-06, + "loss": 0.0139, + "step": 18200 + }, + { + "epoch": 10.02, + "learning_rate": 5.514683544303798e-06, + "loss": 0.0168, + "step": 18225 + }, + { + "epoch": 10.02, + "learning_rate": 5.5083544303797475e-06, + "loss": 0.0118, + "step": 18250 + }, + { + "epoch": 10.02, + "learning_rate": 5.502025316455697e-06, + "loss": 0.0119, + "step": 18275 + }, + { + "epoch": 10.02, + "learning_rate": 5.495696202531646e-06, + "loss": 0.0136, + "step": 18300 + }, + { + "epoch": 10.02, + "learning_rate": 5.489367088607596e-06, + "loss": 0.0132, + "step": 18325 + }, + { + "epoch": 10.02, + "learning_rate": 5.483037974683545e-06, + "loss": 0.0148, + "step": 18350 + }, + { + "epoch": 10.02, + "learning_rate": 5.476708860759495e-06, + "loss": 0.015, + "step": 18375 + }, + { + "epoch": 10.02, + "learning_rate": 5.470379746835443e-06, + "loss": 0.0154, + "step": 18400 + }, + { + "epoch": 10.02, + "learning_rate": 5.464050632911393e-06, + "loss": 0.0139, + "step": 18425 + }, + { + "epoch": 10.02, + "learning_rate": 5.457721518987342e-06, + "loss": 0.0239, + "step": 18450 + }, + { + "epoch": 10.02, + "learning_rate": 5.4513924050632915e-06, + "loss": 0.0203, + "step": 18475 + }, + { + "epoch": 10.02, + "learning_rate": 5.445063291139241e-06, + "loss": 0.0199, + "step": 18500 + }, + { + "epoch": 10.03, + "learning_rate": 5.43873417721519e-06, + "loss": 0.013, + "step": 18525 + }, + { + "epoch": 10.03, + "learning_rate": 5.43240506329114e-06, + "loss": 0.0128, + "step": 18550 + }, + { + "epoch": 10.03, + "learning_rate": 5.426075949367089e-06, + "loss": 0.0161, + "step": 18575 + }, + { + "epoch": 10.03, + "learning_rate": 5.419746835443039e-06, + "loss": 0.0129, + "step": 18600 + }, + { + "epoch": 10.03, + "learning_rate": 5.413417721518987e-06, + "loss": 0.0124, + "step": 18625 + }, + { + "epoch": 10.03, + "learning_rate": 5.407088607594937e-06, + "loss": 0.0161, + "step": 18650 + }, + { + "epoch": 10.03, + "learning_rate": 5.400759493670886e-06, + "loss": 0.0104, + "step": 18675 + }, + { + "epoch": 10.03, + "learning_rate": 5.3944303797468355e-06, + "loss": 0.0109, + "step": 18700 + }, + { + "epoch": 10.03, + "learning_rate": 5.388101265822785e-06, + "loss": 0.0128, + "step": 18725 + }, + { + "epoch": 10.03, + "learning_rate": 5.381772151898734e-06, + "loss": 0.014, + "step": 18750 + }, + { + "epoch": 10.03, + "learning_rate": 5.375443037974684e-06, + "loss": 0.0159, + "step": 18775 + }, + { + "epoch": 10.03, + "learning_rate": 5.369113924050633e-06, + "loss": 0.017, + "step": 18800 + }, + { + "epoch": 10.03, + "learning_rate": 5.362784810126583e-06, + "loss": 0.012, + "step": 18825 + }, + { + "epoch": 10.03, + "learning_rate": 5.356455696202531e-06, + "loss": 0.0165, + "step": 18850 + }, + { + "epoch": 10.03, + "learning_rate": 5.350126582278481e-06, + "loss": 0.0153, + "step": 18875 + }, + { + "epoch": 10.03, + "learning_rate": 5.343797468354432e-06, + "loss": 0.0182, + "step": 18900 + }, + { + "epoch": 10.04, + "learning_rate": 5.33746835443038e-06, + "loss": 0.0194, + "step": 18925 + }, + { + "epoch": 10.04, + "learning_rate": 5.33113924050633e-06, + "loss": 0.0144, + "step": 18950 + }, + { + "epoch": 10.04, + "learning_rate": 5.324810126582279e-06, + "loss": 0.0157, + "step": 18975 + }, + { + "epoch": 10.04, + "learning_rate": 5.318481012658229e-06, + "loss": 0.0111, + "step": 19000 + }, + { + "epoch": 10.04, + "eval_loss": 0.2696678638458252, + "eval_runtime": 1463.5986, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.445, + "eval_wer": 24.79877721588969, + "step": 19000 + }, + { + "epoch": 10.04, + "learning_rate": 5.312151898734178e-06, + "loss": 0.0176, + "step": 19025 + }, + { + "epoch": 10.04, + "learning_rate": 5.3058227848101275e-06, + "loss": 0.0144, + "step": 19050 + }, + { + "epoch": 10.04, + "learning_rate": 5.299493670886077e-06, + "loss": 0.0129, + "step": 19075 + }, + { + "epoch": 10.04, + "learning_rate": 5.293164556962026e-06, + "loss": 0.0162, + "step": 19100 + }, + { + "epoch": 10.04, + "learning_rate": 5.286835443037976e-06, + "loss": 0.0128, + "step": 19125 + }, + { + "epoch": 10.04, + "learning_rate": 5.280506329113924e-06, + "loss": 0.0152, + "step": 19150 + }, + { + "epoch": 10.04, + "learning_rate": 5.274177215189874e-06, + "loss": 0.0183, + "step": 19175 + }, + { + "epoch": 10.04, + "learning_rate": 5.267848101265823e-06, + "loss": 0.0198, + "step": 19200 + }, + { + "epoch": 10.04, + "learning_rate": 5.261518987341773e-06, + "loss": 0.0179, + "step": 19225 + }, + { + "epoch": 10.04, + "learning_rate": 5.255443037974683e-06, + "loss": 0.019, + "step": 19250 + }, + { + "epoch": 11.0, + "learning_rate": 5.249113924050633e-06, + "loss": 0.0159, + "step": 19275 + }, + { + "epoch": 11.0, + "learning_rate": 5.242784810126583e-06, + "loss": 0.0156, + "step": 19300 + }, + { + "epoch": 11.0, + "learning_rate": 5.236455696202532e-06, + "loss": 0.015, + "step": 19325 + }, + { + "epoch": 11.0, + "learning_rate": 5.230126582278482e-06, + "loss": 0.0162, + "step": 19350 + }, + { + "epoch": 11.0, + "learning_rate": 5.223797468354431e-06, + "loss": 0.0262, + "step": 19375 + }, + { + "epoch": 11.0, + "learning_rate": 5.217468354430381e-06, + "loss": 0.0207, + "step": 19400 + }, + { + "epoch": 11.0, + "learning_rate": 5.21113924050633e-06, + "loss": 0.0222, + "step": 19425 + }, + { + "epoch": 11.0, + "learning_rate": 5.2048101265822795e-06, + "loss": 0.016, + "step": 19450 + }, + { + "epoch": 11.01, + "learning_rate": 5.198481012658229e-06, + "loss": 0.0176, + "step": 19475 + }, + { + "epoch": 11.01, + "learning_rate": 5.192151898734178e-06, + "loss": 0.0209, + "step": 19500 + }, + { + "epoch": 11.01, + "learning_rate": 5.185822784810127e-06, + "loss": 0.0212, + "step": 19525 + }, + { + "epoch": 11.01, + "learning_rate": 5.179493670886076e-06, + "loss": 0.0143, + "step": 19550 + }, + { + "epoch": 11.01, + "learning_rate": 5.173164556962026e-06, + "loss": 0.0224, + "step": 19575 + }, + { + "epoch": 11.01, + "learning_rate": 5.166835443037975e-06, + "loss": 0.0171, + "step": 19600 + }, + { + "epoch": 11.01, + "learning_rate": 5.160506329113925e-06, + "loss": 0.0166, + "step": 19625 + }, + { + "epoch": 11.01, + "learning_rate": 5.154177215189874e-06, + "loss": 0.0135, + "step": 19650 + }, + { + "epoch": 11.01, + "learning_rate": 5.1478481012658235e-06, + "loss": 0.0164, + "step": 19675 + }, + { + "epoch": 11.01, + "learning_rate": 5.141518987341773e-06, + "loss": 0.0154, + "step": 19700 + }, + { + "epoch": 11.01, + "learning_rate": 5.135189873417722e-06, + "loss": 0.0183, + "step": 19725 + }, + { + "epoch": 11.01, + "learning_rate": 5.128860759493671e-06, + "loss": 0.0149, + "step": 19750 + }, + { + "epoch": 11.01, + "learning_rate": 5.12253164556962e-06, + "loss": 0.0176, + "step": 19775 + }, + { + "epoch": 11.01, + "learning_rate": 5.11620253164557e-06, + "loss": 0.0135, + "step": 19800 + }, + { + "epoch": 11.01, + "learning_rate": 5.109873417721519e-06, + "loss": 0.0121, + "step": 19825 + }, + { + "epoch": 11.01, + "learning_rate": 5.103544303797469e-06, + "loss": 0.0158, + "step": 19850 + }, + { + "epoch": 11.02, + "learning_rate": 5.097215189873418e-06, + "loss": 0.0191, + "step": 19875 + }, + { + "epoch": 11.02, + "learning_rate": 5.0908860759493675e-06, + "loss": 0.0127, + "step": 19900 + }, + { + "epoch": 11.02, + "learning_rate": 5.084556962025317e-06, + "loss": 0.0149, + "step": 19925 + }, + { + "epoch": 11.02, + "learning_rate": 5.0782278481012655e-06, + "loss": 0.0152, + "step": 19950 + }, + { + "epoch": 11.02, + "learning_rate": 5.071898734177215e-06, + "loss": 0.0139, + "step": 19975 + }, + { + "epoch": 11.02, + "learning_rate": 5.065569620253164e-06, + "loss": 0.015, + "step": 20000 + }, + { + "epoch": 11.02, + "eval_loss": 0.2623814344406128, + "eval_runtime": 1462.1621, + "eval_samples_per_second": 7.12, + "eval_steps_per_second": 0.445, + "eval_wer": 24.392266540919362, + "step": 20000 + }, + { + "epoch": 11.02, + "learning_rate": 5.059240506329114e-06, + "loss": 0.0094, + "step": 20025 + }, + { + "epoch": 11.02, + "learning_rate": 5.052911392405063e-06, + "loss": 0.0108, + "step": 20050 + }, + { + "epoch": 11.02, + "learning_rate": 5.046582278481013e-06, + "loss": 0.0155, + "step": 20075 + }, + { + "epoch": 11.02, + "learning_rate": 5.040253164556962e-06, + "loss": 0.0103, + "step": 20100 + }, + { + "epoch": 11.02, + "learning_rate": 5.033924050632912e-06, + "loss": 0.0109, + "step": 20125 + }, + { + "epoch": 11.02, + "learning_rate": 5.027594936708862e-06, + "loss": 0.0188, + "step": 20150 + }, + { + "epoch": 11.02, + "learning_rate": 5.021265822784811e-06, + "loss": 0.0178, + "step": 20175 + }, + { + "epoch": 11.02, + "learning_rate": 5.014936708860761e-06, + "loss": 0.0262, + "step": 20200 + }, + { + "epoch": 11.02, + "learning_rate": 5.00860759493671e-06, + "loss": 0.0194, + "step": 20225 + }, + { + "epoch": 11.02, + "learning_rate": 5.002278481012659e-06, + "loss": 0.0107, + "step": 20250 + }, + { + "epoch": 11.03, + "learning_rate": 4.995949367088608e-06, + "loss": 0.0147, + "step": 20275 + }, + { + "epoch": 11.03, + "learning_rate": 4.9896202531645575e-06, + "loss": 0.0084, + "step": 20300 + }, + { + "epoch": 11.03, + "learning_rate": 4.983291139240507e-06, + "loss": 0.0214, + "step": 20325 + }, + { + "epoch": 11.03, + "learning_rate": 4.976962025316456e-06, + "loss": 0.014, + "step": 20350 + }, + { + "epoch": 11.03, + "learning_rate": 4.970632911392406e-06, + "loss": 0.0129, + "step": 20375 + }, + { + "epoch": 11.03, + "learning_rate": 4.964303797468355e-06, + "loss": 0.0145, + "step": 20400 + }, + { + "epoch": 11.03, + "learning_rate": 4.957974683544305e-06, + "loss": 0.0109, + "step": 20425 + }, + { + "epoch": 11.03, + "learning_rate": 4.951645569620254e-06, + "loss": 0.0117, + "step": 20450 + }, + { + "epoch": 11.03, + "learning_rate": 4.945316455696203e-06, + "loss": 0.0137, + "step": 20475 + }, + { + "epoch": 11.03, + "learning_rate": 4.938987341772152e-06, + "loss": 0.0086, + "step": 20500 + }, + { + "epoch": 11.03, + "learning_rate": 4.9326582278481015e-06, + "loss": 0.0128, + "step": 20525 + }, + { + "epoch": 11.03, + "learning_rate": 4.926329113924051e-06, + "loss": 0.0101, + "step": 20550 + }, + { + "epoch": 11.03, + "learning_rate": 4.92e-06, + "loss": 0.0132, + "step": 20575 + }, + { + "epoch": 11.03, + "learning_rate": 4.91367088607595e-06, + "loss": 0.0147, + "step": 20600 + }, + { + "epoch": 11.03, + "learning_rate": 4.907341772151899e-06, + "loss": 0.0077, + "step": 20625 + }, + { + "epoch": 11.03, + "learning_rate": 4.901012658227849e-06, + "loss": 0.0125, + "step": 20650 + }, + { + "epoch": 11.04, + "learning_rate": 4.894683544303798e-06, + "loss": 0.0115, + "step": 20675 + }, + { + "epoch": 11.04, + "learning_rate": 4.888354430379747e-06, + "loss": 0.0136, + "step": 20700 + }, + { + "epoch": 11.04, + "learning_rate": 4.882025316455696e-06, + "loss": 0.0098, + "step": 20725 + }, + { + "epoch": 11.04, + "learning_rate": 4.875696202531646e-06, + "loss": 0.0154, + "step": 20750 + }, + { + "epoch": 11.04, + "learning_rate": 4.869367088607596e-06, + "loss": 0.0126, + "step": 20775 + }, + { + "epoch": 11.04, + "learning_rate": 4.863037974683545e-06, + "loss": 0.0116, + "step": 20800 + }, + { + "epoch": 11.04, + "learning_rate": 4.856708860759495e-06, + "loss": 0.0133, + "step": 20825 + }, + { + "epoch": 11.04, + "learning_rate": 4.850379746835443e-06, + "loss": 0.0106, + "step": 20850 + }, + { + "epoch": 11.04, + "learning_rate": 4.844050632911393e-06, + "loss": 0.0151, + "step": 20875 + }, + { + "epoch": 11.04, + "learning_rate": 4.837721518987342e-06, + "loss": 0.0148, + "step": 20900 + }, + { + "epoch": 11.04, + "learning_rate": 4.8313924050632915e-06, + "loss": 0.0173, + "step": 20925 + }, + { + "epoch": 11.04, + "learning_rate": 4.825063291139241e-06, + "loss": 0.0126, + "step": 20950 + }, + { + "epoch": 11.04, + "learning_rate": 4.81873417721519e-06, + "loss": 0.0163, + "step": 20975 + }, + { + "epoch": 11.04, + "learning_rate": 4.81240506329114e-06, + "loss": 0.0203, + "step": 21000 + }, + { + "epoch": 11.04, + "eval_loss": 0.27788785099983215, + "eval_runtime": 1546.852, + "eval_samples_per_second": 6.73, + "eval_steps_per_second": 0.421, + "eval_wer": 25.284963983154196, + "step": 21000 + }, + { + "epoch": 12.0, + "learning_rate": 4.806075949367089e-06, + "loss": 0.0176, + "step": 21025 + }, + { + "epoch": 12.0, + "learning_rate": 4.799746835443039e-06, + "loss": 0.0157, + "step": 21050 + }, + { + "epoch": 12.0, + "learning_rate": 4.793417721518987e-06, + "loss": 0.0192, + "step": 21075 + }, + { + "epoch": 12.0, + "learning_rate": 4.787088607594937e-06, + "loss": 0.0124, + "step": 21100 + }, + { + "epoch": 12.0, + "learning_rate": 4.780759493670886e-06, + "loss": 0.0194, + "step": 21125 + }, + { + "epoch": 12.0, + "learning_rate": 4.774430379746836e-06, + "loss": 0.0138, + "step": 21150 + }, + { + "epoch": 12.0, + "learning_rate": 4.768101265822786e-06, + "loss": 0.0191, + "step": 21175 + }, + { + "epoch": 12.0, + "learning_rate": 4.761772151898735e-06, + "loss": 0.0124, + "step": 21200 + }, + { + "epoch": 12.01, + "learning_rate": 4.755443037974684e-06, + "loss": 0.0175, + "step": 21225 + }, + { + "epoch": 12.01, + "learning_rate": 4.749113924050633e-06, + "loss": 0.0118, + "step": 21250 + }, + { + "epoch": 12.01, + "learning_rate": 4.742784810126583e-06, + "loss": 0.0172, + "step": 21275 + }, + { + "epoch": 12.01, + "learning_rate": 4.736455696202532e-06, + "loss": 0.0136, + "step": 21300 + }, + { + "epoch": 12.01, + "learning_rate": 4.7301265822784815e-06, + "loss": 0.018, + "step": 21325 + }, + { + "epoch": 12.01, + "learning_rate": 4.723797468354431e-06, + "loss": 0.0184, + "step": 21350 + }, + { + "epoch": 12.01, + "learning_rate": 4.71746835443038e-06, + "loss": 0.0166, + "step": 21375 + }, + { + "epoch": 12.01, + "learning_rate": 4.71113924050633e-06, + "loss": 0.0107, + "step": 21400 + }, + { + "epoch": 12.01, + "learning_rate": 4.704810126582279e-06, + "loss": 0.0125, + "step": 21425 + }, + { + "epoch": 12.01, + "learning_rate": 4.698481012658228e-06, + "loss": 0.0098, + "step": 21450 + }, + { + "epoch": 12.01, + "learning_rate": 4.692151898734177e-06, + "loss": 0.0103, + "step": 21475 + }, + { + "epoch": 12.01, + "learning_rate": 4.685822784810127e-06, + "loss": 0.0211, + "step": 21500 + }, + { + "epoch": 12.01, + "learning_rate": 4.679493670886076e-06, + "loss": 0.0125, + "step": 21525 + }, + { + "epoch": 12.01, + "learning_rate": 4.6731645569620255e-06, + "loss": 0.0083, + "step": 21550 + }, + { + "epoch": 12.01, + "learning_rate": 4.666835443037975e-06, + "loss": 0.0156, + "step": 21575 + }, + { + "epoch": 12.01, + "learning_rate": 4.660506329113924e-06, + "loss": 0.0137, + "step": 21600 + }, + { + "epoch": 12.02, + "learning_rate": 4.654177215189874e-06, + "loss": 0.0137, + "step": 21625 + }, + { + "epoch": 12.02, + "learning_rate": 4.647848101265823e-06, + "loss": 0.0092, + "step": 21650 + }, + { + "epoch": 12.02, + "learning_rate": 4.641518987341773e-06, + "loss": 0.0101, + "step": 21675 + }, + { + "epoch": 12.02, + "learning_rate": 4.635189873417722e-06, + "loss": 0.0118, + "step": 21700 + }, + { + "epoch": 12.02, + "learning_rate": 4.6288607594936715e-06, + "loss": 0.0119, + "step": 21725 + }, + { + "epoch": 12.02, + "learning_rate": 4.622531645569621e-06, + "loss": 0.009, + "step": 21750 + }, + { + "epoch": 12.02, + "learning_rate": 4.61620253164557e-06, + "loss": 0.014, + "step": 21775 + }, + { + "epoch": 12.02, + "learning_rate": 4.609873417721519e-06, + "loss": 0.0108, + "step": 21800 + }, + { + "epoch": 12.02, + "learning_rate": 4.603544303797468e-06, + "loss": 0.0117, + "step": 21825 + }, + { + "epoch": 12.02, + "learning_rate": 4.597215189873418e-06, + "loss": 0.0147, + "step": 21850 + }, + { + "epoch": 12.02, + "learning_rate": 4.590886075949367e-06, + "loss": 0.0133, + "step": 21875 + }, + { + "epoch": 12.02, + "learning_rate": 4.584556962025317e-06, + "loss": 0.0131, + "step": 21900 + }, + { + "epoch": 12.02, + "learning_rate": 4.578227848101266e-06, + "loss": 0.0156, + "step": 21925 + }, + { + "epoch": 12.02, + "learning_rate": 4.5718987341772155e-06, + "loss": 0.0211, + "step": 21950 + }, + { + "epoch": 12.02, + "learning_rate": 4.565569620253165e-06, + "loss": 0.0127, + "step": 21975 + }, + { + "epoch": 12.02, + "learning_rate": 4.559240506329114e-06, + "loss": 0.011, + "step": 22000 + }, + { + "epoch": 12.02, + "eval_loss": 0.26152506470680237, + "eval_runtime": 1497.9019, + "eval_samples_per_second": 6.95, + "eval_steps_per_second": 0.435, + "eval_wer": 26.236198962584755, + "step": 22000 + }, + { + "epoch": 12.03, + "learning_rate": 4.552911392405064e-06, + "loss": 0.0164, + "step": 22025 + }, + { + "epoch": 12.03, + "learning_rate": 4.546582278481013e-06, + "loss": 0.01, + "step": 22050 + }, + { + "epoch": 12.03, + "learning_rate": 4.540253164556963e-06, + "loss": 0.0127, + "step": 22075 + }, + { + "epoch": 12.03, + "learning_rate": 4.533924050632912e-06, + "loss": 0.0089, + "step": 22100 + }, + { + "epoch": 12.03, + "learning_rate": 4.5275949367088615e-06, + "loss": 0.0077, + "step": 22125 + }, + { + "epoch": 12.03, + "learning_rate": 4.521265822784811e-06, + "loss": 0.0098, + "step": 22150 + }, + { + "epoch": 12.03, + "learning_rate": 4.5149367088607595e-06, + "loss": 0.0084, + "step": 22175 + }, + { + "epoch": 12.03, + "learning_rate": 4.508607594936709e-06, + "loss": 0.0096, + "step": 22200 + }, + { + "epoch": 12.03, + "learning_rate": 4.502278481012658e-06, + "loss": 0.0097, + "step": 22225 + }, + { + "epoch": 12.03, + "learning_rate": 4.495949367088608e-06, + "loss": 0.0114, + "step": 22250 + }, + { + "epoch": 12.03, + "learning_rate": 4.489620253164557e-06, + "loss": 0.0109, + "step": 22275 + }, + { + "epoch": 12.03, + "learning_rate": 4.483291139240507e-06, + "loss": 0.0096, + "step": 22300 + }, + { + "epoch": 12.03, + "learning_rate": 4.476962025316456e-06, + "loss": 0.0117, + "step": 22325 + }, + { + "epoch": 12.03, + "learning_rate": 4.4706329113924055e-06, + "loss": 0.0105, + "step": 22350 + }, + { + "epoch": 12.03, + "learning_rate": 4.464303797468355e-06, + "loss": 0.0103, + "step": 22375 + }, + { + "epoch": 12.03, + "learning_rate": 4.457974683544304e-06, + "loss": 0.0085, + "step": 22400 + }, + { + "epoch": 12.04, + "learning_rate": 4.451645569620254e-06, + "loss": 0.0129, + "step": 22425 + }, + { + "epoch": 12.04, + "learning_rate": 4.445316455696203e-06, + "loss": 0.0188, + "step": 22450 + }, + { + "epoch": 12.04, + "learning_rate": 4.438987341772153e-06, + "loss": 0.013, + "step": 22475 + }, + { + "epoch": 12.04, + "learning_rate": 4.432658227848102e-06, + "loss": 0.0095, + "step": 22500 + }, + { + "epoch": 12.04, + "learning_rate": 4.4263291139240515e-06, + "loss": 0.0111, + "step": 22525 + }, + { + "epoch": 12.04, + "learning_rate": 4.42e-06, + "loss": 0.0115, + "step": 22550 + }, + { + "epoch": 12.04, + "learning_rate": 4.4136708860759495e-06, + "loss": 0.0096, + "step": 22575 + }, + { + "epoch": 12.04, + "learning_rate": 4.407341772151899e-06, + "loss": 0.0108, + "step": 22600 + }, + { + "epoch": 12.04, + "learning_rate": 4.401012658227848e-06, + "loss": 0.0118, + "step": 22625 + }, + { + "epoch": 12.04, + "learning_rate": 4.394683544303798e-06, + "loss": 0.0136, + "step": 22650 + }, + { + "epoch": 12.04, + "learning_rate": 4.388354430379747e-06, + "loss": 0.0176, + "step": 22675 + }, + { + "epoch": 12.04, + "learning_rate": 4.382025316455697e-06, + "loss": 0.011, + "step": 22700 + }, + { + "epoch": 12.04, + "learning_rate": 4.375696202531646e-06, + "loss": 0.0125, + "step": 22725 + }, + { + "epoch": 12.04, + "learning_rate": 4.3693670886075955e-06, + "loss": 0.0164, + "step": 22750 + }, + { + "epoch": 13.0, + "learning_rate": 4.363037974683544e-06, + "loss": 0.0192, + "step": 22775 + }, + { + "epoch": 13.0, + "learning_rate": 4.356708860759494e-06, + "loss": 0.0145, + "step": 22800 + }, + { + "epoch": 13.0, + "learning_rate": 4.350379746835444e-06, + "loss": 0.0174, + "step": 22825 + }, + { + "epoch": 13.0, + "learning_rate": 4.344050632911393e-06, + "loss": 0.0142, + "step": 22850 + }, + { + "epoch": 13.0, + "learning_rate": 4.337721518987343e-06, + "loss": 0.0228, + "step": 22875 + }, + { + "epoch": 13.0, + "learning_rate": 4.331392405063292e-06, + "loss": 0.0201, + "step": 22900 + }, + { + "epoch": 13.0, + "learning_rate": 4.325063291139241e-06, + "loss": 0.0188, + "step": 22925 + }, + { + "epoch": 13.0, + "learning_rate": 4.31873417721519e-06, + "loss": 0.0185, + "step": 22950 + }, + { + "epoch": 13.01, + "learning_rate": 4.3124050632911395e-06, + "loss": 0.0186, + "step": 22975 + }, + { + "epoch": 13.01, + "learning_rate": 4.306075949367089e-06, + "loss": 0.0191, + "step": 23000 + }, + { + "epoch": 13.01, + "eval_loss": 0.25467297434806824, + "eval_runtime": 1486.1182, + "eval_samples_per_second": 7.005, + "eval_steps_per_second": 0.438, + "eval_wer": 23.982503780549276, + "step": 23000 + }, + { + "epoch": 13.01, + "learning_rate": 4.299746835443038e-06, + "loss": 0.0145, + "step": 23025 + }, + { + "epoch": 13.01, + "learning_rate": 4.293417721518988e-06, + "loss": 0.0106, + "step": 23050 + }, + { + "epoch": 13.01, + "learning_rate": 4.287088607594937e-06, + "loss": 0.0135, + "step": 23075 + }, + { + "epoch": 13.01, + "learning_rate": 4.280759493670887e-06, + "loss": 0.0173, + "step": 23100 + }, + { + "epoch": 13.01, + "learning_rate": 4.274430379746836e-06, + "loss": 0.0126, + "step": 23125 + }, + { + "epoch": 13.01, + "learning_rate": 4.268101265822785e-06, + "loss": 0.0166, + "step": 23150 + }, + { + "epoch": 13.01, + "learning_rate": 4.261772151898734e-06, + "loss": 0.0097, + "step": 23175 + }, + { + "epoch": 13.01, + "learning_rate": 4.255443037974684e-06, + "loss": 0.0094, + "step": 23200 + }, + { + "epoch": 13.01, + "learning_rate": 4.249113924050634e-06, + "loss": 0.012, + "step": 23225 + }, + { + "epoch": 13.01, + "learning_rate": 4.242784810126583e-06, + "loss": 0.0153, + "step": 23250 + }, + { + "epoch": 13.01, + "learning_rate": 4.236455696202532e-06, + "loss": 0.0169, + "step": 23275 + }, + { + "epoch": 13.01, + "learning_rate": 4.230379746835443e-06, + "loss": 0.0097, + "step": 23300 + }, + { + "epoch": 13.01, + "learning_rate": 4.224050632911393e-06, + "loss": 0.0098, + "step": 23325 + }, + { + "epoch": 13.01, + "learning_rate": 4.217721518987342e-06, + "loss": 0.0129, + "step": 23350 + }, + { + "epoch": 13.02, + "learning_rate": 4.2113924050632915e-06, + "loss": 0.0157, + "step": 23375 + }, + { + "epoch": 13.02, + "learning_rate": 4.205063291139241e-06, + "loss": 0.0088, + "step": 23400 + }, + { + "epoch": 13.02, + "learning_rate": 4.19873417721519e-06, + "loss": 0.0105, + "step": 23425 + }, + { + "epoch": 13.02, + "learning_rate": 4.19240506329114e-06, + "loss": 0.0131, + "step": 23450 + }, + { + "epoch": 13.02, + "learning_rate": 4.186075949367089e-06, + "loss": 0.0133, + "step": 23475 + }, + { + "epoch": 13.02, + "learning_rate": 4.179746835443038e-06, + "loss": 0.0054, + "step": 23500 + }, + { + "epoch": 13.02, + "learning_rate": 4.173417721518987e-06, + "loss": 0.01, + "step": 23525 + }, + { + "epoch": 13.02, + "learning_rate": 4.167088607594937e-06, + "loss": 0.013, + "step": 23550 + }, + { + "epoch": 13.02, + "learning_rate": 4.160759493670886e-06, + "loss": 0.0141, + "step": 23575 + }, + { + "epoch": 13.02, + "learning_rate": 4.154430379746836e-06, + "loss": 0.0126, + "step": 23600 + }, + { + "epoch": 13.02, + "learning_rate": 4.148101265822786e-06, + "loss": 0.015, + "step": 23625 + }, + { + "epoch": 13.02, + "learning_rate": 4.141772151898734e-06, + "loss": 0.0164, + "step": 23650 + }, + { + "epoch": 13.02, + "learning_rate": 4.135443037974684e-06, + "loss": 0.0135, + "step": 23675 + }, + { + "epoch": 13.02, + "learning_rate": 4.129113924050633e-06, + "loss": 0.0151, + "step": 23700 + }, + { + "epoch": 13.02, + "learning_rate": 4.122784810126583e-06, + "loss": 0.0157, + "step": 23725 + }, + { + "epoch": 13.02, + "learning_rate": 4.116455696202532e-06, + "loss": 0.0087, + "step": 23750 + }, + { + "epoch": 13.03, + "learning_rate": 4.1101265822784815e-06, + "loss": 0.0097, + "step": 23775 + }, + { + "epoch": 13.03, + "learning_rate": 4.103797468354431e-06, + "loss": 0.0141, + "step": 23800 + }, + { + "epoch": 13.03, + "learning_rate": 4.09746835443038e-06, + "loss": 0.0079, + "step": 23825 + }, + { + "epoch": 13.03, + "learning_rate": 4.09113924050633e-06, + "loss": 0.0116, + "step": 23850 + }, + { + "epoch": 13.03, + "learning_rate": 4.084810126582278e-06, + "loss": 0.0093, + "step": 23875 + }, + { + "epoch": 13.03, + "learning_rate": 4.078481012658228e-06, + "loss": 0.0149, + "step": 23900 + }, + { + "epoch": 13.03, + "learning_rate": 4.072151898734177e-06, + "loss": 0.0125, + "step": 23925 + }, + { + "epoch": 13.03, + "learning_rate": 4.065822784810127e-06, + "loss": 0.0106, + "step": 23950 + }, + { + "epoch": 13.03, + "learning_rate": 4.059493670886076e-06, + "loss": 0.0124, + "step": 23975 + }, + { + "epoch": 13.03, + "learning_rate": 4.0531645569620255e-06, + "loss": 0.0074, + "step": 24000 + }, + { + "epoch": 13.03, + "eval_loss": 0.27036768198013306, + "eval_runtime": 1458.9195, + "eval_samples_per_second": 7.136, + "eval_steps_per_second": 0.446, + "eval_wer": 24.153238264036816, + "step": 24000 + }, + { + "epoch": 13.03, + "learning_rate": 4.046835443037975e-06, + "loss": 0.0128, + "step": 24025 + }, + { + "epoch": 13.03, + "learning_rate": 4.040506329113924e-06, + "loss": 0.012, + "step": 24050 + }, + { + "epoch": 13.03, + "learning_rate": 4.034177215189874e-06, + "loss": 0.0098, + "step": 24075 + }, + { + "epoch": 13.03, + "learning_rate": 4.027848101265823e-06, + "loss": 0.0091, + "step": 24100 + }, + { + "epoch": 13.03, + "learning_rate": 4.021518987341773e-06, + "loss": 0.0077, + "step": 24125 + }, + { + "epoch": 13.03, + "learning_rate": 4.015189873417722e-06, + "loss": 0.0135, + "step": 24150 + }, + { + "epoch": 13.04, + "learning_rate": 4.0088607594936715e-06, + "loss": 0.0124, + "step": 24175 + }, + { + "epoch": 13.04, + "learning_rate": 4.002531645569621e-06, + "loss": 0.0152, + "step": 24200 + }, + { + "epoch": 13.04, + "learning_rate": 3.99620253164557e-06, + "loss": 0.0127, + "step": 24225 + }, + { + "epoch": 13.04, + "learning_rate": 3.989873417721519e-06, + "loss": 0.0066, + "step": 24250 + }, + { + "epoch": 13.04, + "learning_rate": 3.983544303797468e-06, + "loss": 0.0163, + "step": 24275 + }, + { + "epoch": 13.04, + "learning_rate": 3.977215189873418e-06, + "loss": 0.0095, + "step": 24300 + }, + { + "epoch": 13.04, + "learning_rate": 3.970886075949367e-06, + "loss": 0.0103, + "step": 24325 + }, + { + "epoch": 13.04, + "learning_rate": 3.964556962025317e-06, + "loss": 0.0112, + "step": 24350 + }, + { + "epoch": 13.04, + "learning_rate": 3.958227848101266e-06, + "loss": 0.01, + "step": 24375 + }, + { + "epoch": 13.04, + "learning_rate": 3.9518987341772155e-06, + "loss": 0.0131, + "step": 24400 + }, + { + "epoch": 13.04, + "learning_rate": 3.945569620253165e-06, + "loss": 0.0143, + "step": 24425 + }, + { + "epoch": 13.04, + "learning_rate": 3.939240506329114e-06, + "loss": 0.0151, + "step": 24450 + }, + { + "epoch": 13.04, + "learning_rate": 3.932911392405064e-06, + "loss": 0.0129, + "step": 24475 + }, + { + "epoch": 13.04, + "learning_rate": 3.926582278481013e-06, + "loss": 0.0125, + "step": 24500 + }, + { + "epoch": 14.0, + "learning_rate": 3.920506329113925e-06, + "loss": 0.0165, + "step": 24525 + }, + { + "epoch": 14.0, + "learning_rate": 3.914177215189874e-06, + "loss": 0.0114, + "step": 24550 + }, + { + "epoch": 14.0, + "learning_rate": 3.9078481012658235e-06, + "loss": 0.0111, + "step": 24575 + }, + { + "epoch": 14.0, + "learning_rate": 3.901518987341772e-06, + "loss": 0.0134, + "step": 24600 + }, + { + "epoch": 14.0, + "learning_rate": 3.8951898734177215e-06, + "loss": 0.0138, + "step": 24625 + }, + { + "epoch": 14.0, + "learning_rate": 3.888860759493671e-06, + "loss": 0.0151, + "step": 24650 + }, + { + "epoch": 14.0, + "learning_rate": 3.88253164556962e-06, + "loss": 0.0101, + "step": 24675 + }, + { + "epoch": 14.0, + "learning_rate": 3.87620253164557e-06, + "loss": 0.0108, + "step": 24700 + }, + { + "epoch": 14.01, + "learning_rate": 3.869873417721519e-06, + "loss": 0.0151, + "step": 24725 + }, + { + "epoch": 14.01, + "learning_rate": 3.863544303797469e-06, + "loss": 0.0132, + "step": 24750 + }, + { + "epoch": 14.01, + "learning_rate": 3.857215189873418e-06, + "loss": 0.012, + "step": 24775 + }, + { + "epoch": 14.01, + "learning_rate": 3.8508860759493675e-06, + "loss": 0.017, + "step": 24800 + }, + { + "epoch": 14.01, + "learning_rate": 3.844556962025317e-06, + "loss": 0.0141, + "step": 24825 + }, + { + "epoch": 14.01, + "learning_rate": 3.838227848101266e-06, + "loss": 0.0166, + "step": 24850 + }, + { + "epoch": 14.01, + "learning_rate": 3.831898734177216e-06, + "loss": 0.0112, + "step": 24875 + }, + { + "epoch": 14.01, + "learning_rate": 3.825569620253165e-06, + "loss": 0.0097, + "step": 24900 + }, + { + "epoch": 14.01, + "learning_rate": 3.819240506329115e-06, + "loss": 0.0138, + "step": 24925 + }, + { + "epoch": 14.01, + "learning_rate": 3.8129113924050636e-06, + "loss": 0.0069, + "step": 24950 + }, + { + "epoch": 14.01, + "learning_rate": 3.806582278481013e-06, + "loss": 0.0111, + "step": 24975 + }, + { + "epoch": 14.01, + "learning_rate": 3.8002531645569625e-06, + "loss": 0.0149, + "step": 25000 + }, + { + "epoch": 14.01, + "eval_loss": 0.2640444040298462, + "eval_runtime": 1478.4977, + "eval_samples_per_second": 7.042, + "eval_steps_per_second": 0.44, + "eval_wer": 24.063805915543345, + "step": 25000 + }, + { + "epoch": 14.01, + "learning_rate": 3.7939240506329115e-06, + "loss": 0.0176, + "step": 25025 + }, + { + "epoch": 14.01, + "learning_rate": 3.787594936708861e-06, + "loss": 0.0104, + "step": 25050 + }, + { + "epoch": 14.01, + "learning_rate": 3.7812658227848103e-06, + "loss": 0.0084, + "step": 25075 + }, + { + "epoch": 14.01, + "learning_rate": 3.7749367088607598e-06, + "loss": 0.0118, + "step": 25100 + }, + { + "epoch": 14.02, + "learning_rate": 3.768607594936709e-06, + "loss": 0.0099, + "step": 25125 + }, + { + "epoch": 14.02, + "learning_rate": 3.762278481012658e-06, + "loss": 0.0089, + "step": 25150 + }, + { + "epoch": 14.02, + "learning_rate": 3.7559493670886076e-06, + "loss": 0.0119, + "step": 25175 + }, + { + "epoch": 14.02, + "learning_rate": 3.749620253164557e-06, + "loss": 0.0118, + "step": 25200 + }, + { + "epoch": 14.02, + "learning_rate": 3.743291139240507e-06, + "loss": 0.0108, + "step": 25225 + }, + { + "epoch": 14.02, + "learning_rate": 3.7369620253164563e-06, + "loss": 0.0101, + "step": 25250 + }, + { + "epoch": 14.02, + "learning_rate": 3.7306329113924058e-06, + "loss": 0.0095, + "step": 25275 + }, + { + "epoch": 14.02, + "learning_rate": 3.7243037974683548e-06, + "loss": 0.0106, + "step": 25300 + }, + { + "epoch": 14.02, + "learning_rate": 3.717974683544304e-06, + "loss": 0.0135, + "step": 25325 + }, + { + "epoch": 14.02, + "learning_rate": 3.7116455696202536e-06, + "loss": 0.0115, + "step": 25350 + }, + { + "epoch": 14.02, + "learning_rate": 3.705316455696203e-06, + "loss": 0.0114, + "step": 25375 + }, + { + "epoch": 14.02, + "learning_rate": 3.698987341772152e-06, + "loss": 0.0119, + "step": 25400 + }, + { + "epoch": 14.02, + "learning_rate": 3.6926582278481015e-06, + "loss": 0.0123, + "step": 25425 + }, + { + "epoch": 14.02, + "learning_rate": 3.686329113924051e-06, + "loss": 0.0138, + "step": 25450 + }, + { + "epoch": 14.02, + "learning_rate": 3.6800000000000003e-06, + "loss": 0.0133, + "step": 25475 + }, + { + "epoch": 14.02, + "learning_rate": 3.6736708860759498e-06, + "loss": 0.0106, + "step": 25500 + }, + { + "epoch": 14.03, + "learning_rate": 3.6673417721518988e-06, + "loss": 0.0099, + "step": 25525 + }, + { + "epoch": 14.03, + "learning_rate": 3.661012658227848e-06, + "loss": 0.0104, + "step": 25550 + }, + { + "epoch": 14.03, + "learning_rate": 3.6546835443037976e-06, + "loss": 0.0143, + "step": 25575 + }, + { + "epoch": 14.03, + "learning_rate": 3.648354430379747e-06, + "loss": 0.0096, + "step": 25600 + }, + { + "epoch": 14.03, + "learning_rate": 3.642025316455696e-06, + "loss": 0.0094, + "step": 25625 + }, + { + "epoch": 14.03, + "learning_rate": 3.6356962025316463e-06, + "loss": 0.0162, + "step": 25650 + }, + { + "epoch": 14.03, + "learning_rate": 3.6293670886075953e-06, + "loss": 0.008, + "step": 25675 + }, + { + "epoch": 14.03, + "learning_rate": 3.6230379746835448e-06, + "loss": 0.0094, + "step": 25700 + }, + { + "epoch": 14.03, + "learning_rate": 3.616708860759494e-06, + "loss": 0.0122, + "step": 25725 + }, + { + "epoch": 14.03, + "learning_rate": 3.6103797468354436e-06, + "loss": 0.0128, + "step": 25750 + }, + { + "epoch": 14.03, + "learning_rate": 3.6040506329113926e-06, + "loss": 0.0117, + "step": 25775 + }, + { + "epoch": 14.03, + "learning_rate": 3.597721518987342e-06, + "loss": 0.0067, + "step": 25800 + }, + { + "epoch": 14.03, + "learning_rate": 3.5913924050632915e-06, + "loss": 0.0078, + "step": 25825 + }, + { + "epoch": 14.03, + "learning_rate": 3.585063291139241e-06, + "loss": 0.0071, + "step": 25850 + }, + { + "epoch": 14.03, + "learning_rate": 3.57873417721519e-06, + "loss": 0.0093, + "step": 25875 + }, + { + "epoch": 14.03, + "learning_rate": 3.5724050632911393e-06, + "loss": 0.0064, + "step": 25900 + }, + { + "epoch": 14.04, + "learning_rate": 3.5660759493670888e-06, + "loss": 0.0067, + "step": 25925 + }, + { + "epoch": 14.04, + "learning_rate": 3.559746835443038e-06, + "loss": 0.0076, + "step": 25950 + }, + { + "epoch": 14.04, + "learning_rate": 3.5534177215189876e-06, + "loss": 0.0099, + "step": 25975 + }, + { + "epoch": 14.04, + "learning_rate": 3.5470886075949366e-06, + "loss": 0.0111, + "step": 26000 + }, + { + "epoch": 14.04, + "eval_loss": 0.2734878659248352, + "eval_runtime": 1456.7487, + "eval_samples_per_second": 7.147, + "eval_steps_per_second": 0.447, + "eval_wer": 24.359745686921737, + "step": 26000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.200111245770752e+20, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-26000/training_args.bin b/checkpoint-26000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-26000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-27000/config.json b/checkpoint-27000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-27000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-27000/generation_config.json b/checkpoint-27000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-27000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-27000/optimizer.pt b/checkpoint-27000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cfd70675a51037b8749356c2e23815a7f6cdea77 --- /dev/null +++ b/checkpoint-27000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99e040423839f1eab6589f85db8fc64aa8e3e349512479b77baf2fff8541292f +size 1934161093 diff --git a/checkpoint-27000/preprocessor_config.json b/checkpoint-27000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-27000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-27000/pytorch_model.bin b/checkpoint-27000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..025d009fba9b9fcf26f12aab8d91246f446e64dc --- /dev/null +++ b/checkpoint-27000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d27e47e09fcac325fedc3dad0228216c40d4ae97ee233b2bffcce128ce8d52e +size 967102601 diff --git a/checkpoint-27000/rng_state.pth b/checkpoint-27000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d0383943c8f62c3d9fddf7e448b1d21bcf003315 --- /dev/null +++ b/checkpoint-27000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2e48897d68bdda9ceeb5c8b1f868c0edbe34b433549a499ab3ec72b6e5a53c6 +size 14575 diff --git a/checkpoint-27000/scaler.pt b/checkpoint-27000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..140eacd34fe5ff0f9d4cc314ceb4f6d1fec5e19e --- /dev/null +++ b/checkpoint-27000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a3af14ff2d0d7fd0ff67c6618be2f657abc1390d881453f59696dec5df9e102 +size 557 diff --git a/checkpoint-27000/scheduler.pt b/checkpoint-27000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2af8448ac71747c31bc7657c3ec76360d6221891 --- /dev/null +++ b/checkpoint-27000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f22a37aa827088813761c2a1421bfeb35e03e62132e42f1c5e4ea094c5e23c9 +size 627 diff --git a/checkpoint-27000/trainer_state.json b/checkpoint-27000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b93e0dd3251c25a4498c64251132585e60180a4f --- /dev/null +++ b/checkpoint-27000/trainer_state.json @@ -0,0 +1,6739 @@ +{ + "best_metric": 23.982503780549276, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-23000", + "epoch": 15.018375, + "global_step": 27000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + }, + { + "epoch": 7.02, + "learning_rate": 6.830632911392405e-06, + "loss": 0.0159, + "step": 13025 + }, + { + "epoch": 7.02, + "learning_rate": 6.824303797468354e-06, + "loss": 0.0186, + "step": 13050 + }, + { + "epoch": 7.02, + "learning_rate": 6.817974683544304e-06, + "loss": 0.0246, + "step": 13075 + }, + { + "epoch": 7.02, + "learning_rate": 6.811645569620253e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 7.02, + "learning_rate": 6.805316455696203e-06, + "loss": 0.0287, + "step": 13125 + }, + { + "epoch": 7.02, + "learning_rate": 6.798987341772152e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 7.02, + "learning_rate": 6.792658227848102e-06, + "loss": 0.035, + "step": 13175 + }, + { + "epoch": 7.02, + "learning_rate": 6.786329113924052e-06, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 7.02, + "learning_rate": 6.780000000000001e-06, + "loss": 0.0283, + "step": 13225 + }, + { + "epoch": 7.02, + "learning_rate": 6.773670886075951e-06, + "loss": 0.0203, + "step": 13250 + }, + { + "epoch": 7.03, + "learning_rate": 6.7673417721519e-06, + "loss": 0.0193, + "step": 13275 + }, + { + "epoch": 7.03, + "learning_rate": 6.7610126582278494e-06, + "loss": 0.0174, + "step": 13300 + }, + { + "epoch": 7.03, + "learning_rate": 6.754683544303798e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 7.03, + "learning_rate": 6.7483544303797474e-06, + "loss": 0.023, + "step": 13350 + }, + { + "epoch": 7.03, + "learning_rate": 6.742025316455697e-06, + "loss": 0.0266, + "step": 13375 + }, + { + "epoch": 7.03, + "learning_rate": 6.735696202531646e-06, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 7.03, + "learning_rate": 6.729367088607596e-06, + "loss": 0.0164, + "step": 13425 + }, + { + "epoch": 7.03, + "learning_rate": 6.723037974683545e-06, + "loss": 0.019, + "step": 13450 + }, + { + "epoch": 7.03, + "learning_rate": 6.716708860759495e-06, + "loss": 0.0209, + "step": 13475 + }, + { + "epoch": 7.03, + "learning_rate": 6.710379746835444e-06, + "loss": 0.0241, + "step": 13500 + }, + { + "epoch": 7.03, + "learning_rate": 6.7040506329113934e-06, + "loss": 0.0233, + "step": 13525 + }, + { + "epoch": 7.03, + "learning_rate": 6.697721518987342e-06, + "loss": 0.0204, + "step": 13550 + }, + { + "epoch": 7.03, + "learning_rate": 6.6913924050632914e-06, + "loss": 0.0131, + "step": 13575 + }, + { + "epoch": 7.03, + "learning_rate": 6.685063291139241e-06, + "loss": 0.0268, + "step": 13600 + }, + { + "epoch": 7.03, + "learning_rate": 6.67873417721519e-06, + "loss": 0.0156, + "step": 13625 + }, + { + "epoch": 7.03, + "learning_rate": 6.67240506329114e-06, + "loss": 0.0205, + "step": 13650 + }, + { + "epoch": 7.04, + "learning_rate": 6.666075949367089e-06, + "loss": 0.0217, + "step": 13675 + }, + { + "epoch": 7.04, + "learning_rate": 6.659746835443039e-06, + "loss": 0.0258, + "step": 13700 + }, + { + "epoch": 7.04, + "learning_rate": 6.653417721518988e-06, + "loss": 0.0211, + "step": 13725 + }, + { + "epoch": 7.04, + "learning_rate": 6.647088607594937e-06, + "loss": 0.0188, + "step": 13750 + }, + { + "epoch": 7.04, + "learning_rate": 6.640759493670886e-06, + "loss": 0.0234, + "step": 13775 + }, + { + "epoch": 7.04, + "learning_rate": 6.6344303797468355e-06, + "loss": 0.0202, + "step": 13800 + }, + { + "epoch": 7.04, + "learning_rate": 6.628101265822785e-06, + "loss": 0.0251, + "step": 13825 + }, + { + "epoch": 7.04, + "learning_rate": 6.621772151898734e-06, + "loss": 0.017, + "step": 13850 + }, + { + "epoch": 7.04, + "learning_rate": 6.615443037974684e-06, + "loss": 0.0254, + "step": 13875 + }, + { + "epoch": 7.04, + "learning_rate": 6.609113924050633e-06, + "loss": 0.0289, + "step": 13900 + }, + { + "epoch": 7.04, + "learning_rate": 6.602784810126583e-06, + "loss": 0.0238, + "step": 13925 + }, + { + "epoch": 7.04, + "learning_rate": 6.596455696202532e-06, + "loss": 0.0218, + "step": 13950 + }, + { + "epoch": 7.04, + "learning_rate": 6.590126582278481e-06, + "loss": 0.0289, + "step": 13975 + }, + { + "epoch": 7.04, + "learning_rate": 6.583797468354432e-06, + "loss": 0.0271, + "step": 14000 + }, + { + "epoch": 7.04, + "eval_loss": 0.26684999465942383, + "eval_runtime": 1448.0228, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "eval_wer": 42.464105107400115, + "step": 14000 + }, + { + "epoch": 8.0, + "learning_rate": 6.577468354430381e-06, + "loss": 0.0292, + "step": 14025 + }, + { + "epoch": 8.0, + "learning_rate": 6.57113924050633e-06, + "loss": 0.0192, + "step": 14050 + }, + { + "epoch": 8.0, + "learning_rate": 6.564810126582279e-06, + "loss": 0.027, + "step": 14075 + }, + { + "epoch": 8.0, + "learning_rate": 6.558481012658229e-06, + "loss": 0.03, + "step": 14100 + }, + { + "epoch": 8.0, + "learning_rate": 6.552151898734178e-06, + "loss": 0.0375, + "step": 14125 + }, + { + "epoch": 8.0, + "learning_rate": 6.5458227848101274e-06, + "loss": 0.0338, + "step": 14150 + }, + { + "epoch": 8.0, + "learning_rate": 6.539493670886077e-06, + "loss": 0.0305, + "step": 14175 + }, + { + "epoch": 8.0, + "learning_rate": 6.533164556962026e-06, + "loss": 0.0266, + "step": 14200 + }, + { + "epoch": 8.01, + "learning_rate": 6.526835443037976e-06, + "loss": 0.0378, + "step": 14225 + }, + { + "epoch": 8.01, + "learning_rate": 6.520506329113925e-06, + "loss": 0.0256, + "step": 14250 + }, + { + "epoch": 8.01, + "learning_rate": 6.514177215189874e-06, + "loss": 0.0203, + "step": 14275 + }, + { + "epoch": 8.01, + "learning_rate": 6.507848101265823e-06, + "loss": 0.0212, + "step": 14300 + }, + { + "epoch": 8.01, + "learning_rate": 6.501518987341773e-06, + "loss": 0.0204, + "step": 14325 + }, + { + "epoch": 8.01, + "learning_rate": 6.495189873417722e-06, + "loss": 0.0259, + "step": 14350 + }, + { + "epoch": 8.01, + "learning_rate": 6.4888607594936714e-06, + "loss": 0.0214, + "step": 14375 + }, + { + "epoch": 8.01, + "learning_rate": 6.482531645569621e-06, + "loss": 0.021, + "step": 14400 + }, + { + "epoch": 8.01, + "learning_rate": 6.47620253164557e-06, + "loss": 0.0241, + "step": 14425 + }, + { + "epoch": 8.01, + "learning_rate": 6.46987341772152e-06, + "loss": 0.0216, + "step": 14450 + }, + { + "epoch": 8.01, + "learning_rate": 6.463544303797469e-06, + "loss": 0.0209, + "step": 14475 + }, + { + "epoch": 8.01, + "learning_rate": 6.457215189873418e-06, + "loss": 0.0292, + "step": 14500 + }, + { + "epoch": 8.01, + "learning_rate": 6.450886075949367e-06, + "loss": 0.0286, + "step": 14525 + }, + { + "epoch": 8.01, + "learning_rate": 6.444556962025317e-06, + "loss": 0.0218, + "step": 14550 + }, + { + "epoch": 8.01, + "learning_rate": 6.438227848101266e-06, + "loss": 0.0209, + "step": 14575 + }, + { + "epoch": 8.01, + "learning_rate": 6.4318987341772154e-06, + "loss": 0.0244, + "step": 14600 + }, + { + "epoch": 8.02, + "learning_rate": 6.425569620253165e-06, + "loss": 0.0212, + "step": 14625 + }, + { + "epoch": 8.02, + "learning_rate": 6.419240506329114e-06, + "loss": 0.0155, + "step": 14650 + }, + { + "epoch": 8.02, + "learning_rate": 6.412911392405064e-06, + "loss": 0.0209, + "step": 14675 + }, + { + "epoch": 8.02, + "learning_rate": 6.406582278481013e-06, + "loss": 0.0203, + "step": 14700 + }, + { + "epoch": 8.02, + "learning_rate": 6.400253164556962e-06, + "loss": 0.0192, + "step": 14725 + }, + { + "epoch": 8.02, + "learning_rate": 6.393924050632911e-06, + "loss": 0.0216, + "step": 14750 + }, + { + "epoch": 8.02, + "learning_rate": 6.387594936708861e-06, + "loss": 0.012, + "step": 14775 + }, + { + "epoch": 8.02, + "learning_rate": 6.38126582278481e-06, + "loss": 0.0241, + "step": 14800 + }, + { + "epoch": 8.02, + "learning_rate": 6.37493670886076e-06, + "loss": 0.0192, + "step": 14825 + }, + { + "epoch": 8.02, + "learning_rate": 6.36860759493671e-06, + "loss": 0.0202, + "step": 14850 + }, + { + "epoch": 8.02, + "learning_rate": 6.362278481012659e-06, + "loss": 0.0257, + "step": 14875 + }, + { + "epoch": 8.02, + "learning_rate": 6.3559493670886086e-06, + "loss": 0.0218, + "step": 14900 + }, + { + "epoch": 8.02, + "learning_rate": 6.349620253164558e-06, + "loss": 0.0232, + "step": 14925 + }, + { + "epoch": 8.02, + "learning_rate": 6.3432911392405074e-06, + "loss": 0.03, + "step": 14950 + }, + { + "epoch": 8.02, + "learning_rate": 6.337215189873418e-06, + "loss": 0.0252, + "step": 14975 + }, + { + "epoch": 8.02, + "learning_rate": 6.3308860759493675e-06, + "loss": 0.0176, + "step": 15000 + }, + { + "epoch": 8.02, + "eval_loss": 0.2596448063850403, + "eval_runtime": 1417.6481, + "eval_samples_per_second": 7.344, + "eval_steps_per_second": 0.459, + "eval_wer": 28.52241499861786, + "step": 15000 + }, + { + "epoch": 8.03, + "learning_rate": 6.324556962025317e-06, + "loss": 0.0166, + "step": 15025 + }, + { + "epoch": 8.03, + "learning_rate": 6.318227848101266e-06, + "loss": 0.017, + "step": 15050 + }, + { + "epoch": 8.03, + "learning_rate": 6.311898734177216e-06, + "loss": 0.0291, + "step": 15075 + }, + { + "epoch": 8.03, + "learning_rate": 6.305569620253164e-06, + "loss": 0.0149, + "step": 15100 + }, + { + "epoch": 8.03, + "learning_rate": 6.299240506329114e-06, + "loss": 0.0223, + "step": 15125 + }, + { + "epoch": 8.03, + "learning_rate": 6.292911392405063e-06, + "loss": 0.0212, + "step": 15150 + }, + { + "epoch": 8.03, + "learning_rate": 6.286582278481013e-06, + "loss": 0.0125, + "step": 15175 + }, + { + "epoch": 8.03, + "learning_rate": 6.280253164556962e-06, + "loss": 0.0169, + "step": 15200 + }, + { + "epoch": 8.03, + "learning_rate": 6.273924050632912e-06, + "loss": 0.0244, + "step": 15225 + }, + { + "epoch": 8.03, + "learning_rate": 6.267594936708862e-06, + "loss": 0.015, + "step": 15250 + }, + { + "epoch": 8.03, + "learning_rate": 6.261265822784811e-06, + "loss": 0.0171, + "step": 15275 + }, + { + "epoch": 8.03, + "learning_rate": 6.254936708860761e-06, + "loss": 0.017, + "step": 15300 + }, + { + "epoch": 8.03, + "learning_rate": 6.24860759493671e-06, + "loss": 0.0162, + "step": 15325 + }, + { + "epoch": 8.03, + "learning_rate": 6.2422784810126594e-06, + "loss": 0.0133, + "step": 15350 + }, + { + "epoch": 8.03, + "learning_rate": 6.235949367088608e-06, + "loss": 0.0139, + "step": 15375 + }, + { + "epoch": 8.03, + "learning_rate": 6.2296202531645575e-06, + "loss": 0.0177, + "step": 15400 + }, + { + "epoch": 8.04, + "learning_rate": 6.223291139240507e-06, + "loss": 0.0207, + "step": 15425 + }, + { + "epoch": 8.04, + "learning_rate": 6.216962025316456e-06, + "loss": 0.0192, + "step": 15450 + }, + { + "epoch": 8.04, + "learning_rate": 6.210632911392406e-06, + "loss": 0.0176, + "step": 15475 + }, + { + "epoch": 8.04, + "learning_rate": 6.204303797468355e-06, + "loss": 0.0133, + "step": 15500 + }, + { + "epoch": 8.04, + "learning_rate": 6.197974683544305e-06, + "loss": 0.0199, + "step": 15525 + }, + { + "epoch": 8.04, + "learning_rate": 6.191645569620254e-06, + "loss": 0.0135, + "step": 15550 + }, + { + "epoch": 8.04, + "learning_rate": 6.1853164556962035e-06, + "loss": 0.0121, + "step": 15575 + }, + { + "epoch": 8.04, + "learning_rate": 6.178987341772152e-06, + "loss": 0.0152, + "step": 15600 + }, + { + "epoch": 8.04, + "learning_rate": 6.1726582278481015e-06, + "loss": 0.0201, + "step": 15625 + }, + { + "epoch": 8.04, + "learning_rate": 6.166329113924051e-06, + "loss": 0.0226, + "step": 15650 + }, + { + "epoch": 8.04, + "learning_rate": 6.16e-06, + "loss": 0.0222, + "step": 15675 + }, + { + "epoch": 8.04, + "learning_rate": 6.15367088607595e-06, + "loss": 0.0184, + "step": 15700 + }, + { + "epoch": 8.04, + "learning_rate": 6.147341772151899e-06, + "loss": 0.0219, + "step": 15725 + }, + { + "epoch": 8.04, + "learning_rate": 6.141012658227849e-06, + "loss": 0.0219, + "step": 15750 + }, + { + "epoch": 9.0, + "learning_rate": 6.134683544303798e-06, + "loss": 0.0208, + "step": 15775 + }, + { + "epoch": 9.0, + "learning_rate": 6.1283544303797475e-06, + "loss": 0.0143, + "step": 15800 + }, + { + "epoch": 9.0, + "learning_rate": 6.122025316455696e-06, + "loss": 0.0254, + "step": 15825 + }, + { + "epoch": 9.0, + "learning_rate": 6.1156962025316455e-06, + "loss": 0.0287, + "step": 15850 + }, + { + "epoch": 9.0, + "learning_rate": 6.109367088607595e-06, + "loss": 0.0252, + "step": 15875 + }, + { + "epoch": 9.0, + "learning_rate": 6.103037974683544e-06, + "loss": 0.0275, + "step": 15900 + }, + { + "epoch": 9.0, + "learning_rate": 6.096708860759494e-06, + "loss": 0.0193, + "step": 15925 + }, + { + "epoch": 9.0, + "learning_rate": 6.090379746835443e-06, + "loss": 0.0255, + "step": 15950 + }, + { + "epoch": 9.01, + "learning_rate": 6.084050632911393e-06, + "loss": 0.0248, + "step": 15975 + }, + { + "epoch": 9.01, + "learning_rate": 6.077721518987342e-06, + "loss": 0.0328, + "step": 16000 + }, + { + "epoch": 9.01, + "eval_loss": 0.2512108087539673, + "eval_runtime": 1474.2883, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.442, + "eval_wer": 35.66236849379665, + "step": 16000 + }, + { + "epoch": 9.01, + "learning_rate": 6.0713924050632915e-06, + "loss": 0.0181, + "step": 16025 + }, + { + "epoch": 9.01, + "learning_rate": 6.065063291139242e-06, + "loss": 0.0134, + "step": 16050 + }, + { + "epoch": 9.01, + "learning_rate": 6.058734177215191e-06, + "loss": 0.0203, + "step": 16075 + }, + { + "epoch": 9.01, + "learning_rate": 6.052405063291141e-06, + "loss": 0.0224, + "step": 16100 + }, + { + "epoch": 9.01, + "learning_rate": 6.046075949367089e-06, + "loss": 0.0164, + "step": 16125 + }, + { + "epoch": 9.01, + "learning_rate": 6.039746835443039e-06, + "loss": 0.019, + "step": 16150 + }, + { + "epoch": 9.01, + "learning_rate": 6.033417721518988e-06, + "loss": 0.0152, + "step": 16175 + }, + { + "epoch": 9.01, + "learning_rate": 6.0270886075949374e-06, + "loss": 0.0197, + "step": 16200 + }, + { + "epoch": 9.01, + "learning_rate": 6.020759493670887e-06, + "loss": 0.0188, + "step": 16225 + }, + { + "epoch": 9.01, + "learning_rate": 6.014430379746836e-06, + "loss": 0.0245, + "step": 16250 + }, + { + "epoch": 9.01, + "learning_rate": 6.008101265822786e-06, + "loss": 0.0202, + "step": 16275 + }, + { + "epoch": 9.01, + "learning_rate": 6.001772151898735e-06, + "loss": 0.0184, + "step": 16300 + }, + { + "epoch": 9.01, + "learning_rate": 5.995443037974685e-06, + "loss": 0.0158, + "step": 16325 + }, + { + "epoch": 9.01, + "learning_rate": 5.989113924050633e-06, + "loss": 0.0175, + "step": 16350 + }, + { + "epoch": 9.02, + "learning_rate": 5.982784810126583e-06, + "loss": 0.014, + "step": 16375 + }, + { + "epoch": 9.02, + "learning_rate": 5.976455696202532e-06, + "loss": 0.0149, + "step": 16400 + }, + { + "epoch": 9.02, + "learning_rate": 5.9701265822784815e-06, + "loss": 0.0188, + "step": 16425 + }, + { + "epoch": 9.02, + "learning_rate": 5.963797468354431e-06, + "loss": 0.0215, + "step": 16450 + }, + { + "epoch": 9.02, + "learning_rate": 5.95746835443038e-06, + "loss": 0.017, + "step": 16475 + }, + { + "epoch": 9.02, + "learning_rate": 5.95113924050633e-06, + "loss": 0.0095, + "step": 16500 + }, + { + "epoch": 9.02, + "learning_rate": 5.944810126582279e-06, + "loss": 0.0112, + "step": 16525 + }, + { + "epoch": 9.02, + "learning_rate": 5.938481012658229e-06, + "loss": 0.0198, + "step": 16550 + }, + { + "epoch": 9.02, + "learning_rate": 5.932151898734177e-06, + "loss": 0.0252, + "step": 16575 + }, + { + "epoch": 9.02, + "learning_rate": 5.925822784810127e-06, + "loss": 0.0226, + "step": 16600 + }, + { + "epoch": 9.02, + "learning_rate": 5.919493670886076e-06, + "loss": 0.0167, + "step": 16625 + }, + { + "epoch": 9.02, + "learning_rate": 5.9131645569620255e-06, + "loss": 0.0259, + "step": 16650 + }, + { + "epoch": 9.02, + "learning_rate": 5.906835443037975e-06, + "loss": 0.0229, + "step": 16675 + }, + { + "epoch": 9.02, + "learning_rate": 5.900506329113924e-06, + "loss": 0.0238, + "step": 16700 + }, + { + "epoch": 9.02, + "learning_rate": 5.894177215189874e-06, + "loss": 0.02, + "step": 16725 + }, + { + "epoch": 9.02, + "learning_rate": 5.887848101265823e-06, + "loss": 0.0182, + "step": 16750 + }, + { + "epoch": 9.03, + "learning_rate": 5.881518987341773e-06, + "loss": 0.0157, + "step": 16775 + }, + { + "epoch": 9.03, + "learning_rate": 5.875189873417721e-06, + "loss": 0.0145, + "step": 16800 + }, + { + "epoch": 9.03, + "learning_rate": 5.868860759493671e-06, + "loss": 0.0166, + "step": 16825 + }, + { + "epoch": 9.03, + "learning_rate": 5.86253164556962e-06, + "loss": 0.0108, + "step": 16850 + }, + { + "epoch": 9.03, + "learning_rate": 5.85620253164557e-06, + "loss": 0.014, + "step": 16875 + }, + { + "epoch": 9.03, + "learning_rate": 5.84987341772152e-06, + "loss": 0.0167, + "step": 16900 + }, + { + "epoch": 9.03, + "learning_rate": 5.843544303797469e-06, + "loss": 0.0121, + "step": 16925 + }, + { + "epoch": 9.03, + "learning_rate": 5.837215189873419e-06, + "loss": 0.0149, + "step": 16950 + }, + { + "epoch": 9.03, + "learning_rate": 5.831139240506329e-06, + "loss": 0.0143, + "step": 16975 + }, + { + "epoch": 9.03, + "learning_rate": 5.824810126582279e-06, + "loss": 0.0139, + "step": 17000 + }, + { + "epoch": 9.03, + "eval_loss": 0.2688957154750824, + "eval_runtime": 1423.1196, + "eval_samples_per_second": 7.316, + "eval_steps_per_second": 0.457, + "eval_wer": 25.920746678807788, + "step": 17000 + }, + { + "epoch": 9.03, + "learning_rate": 5.818481012658228e-06, + "loss": 0.0169, + "step": 17025 + }, + { + "epoch": 9.03, + "learning_rate": 5.8121518987341775e-06, + "loss": 0.013, + "step": 17050 + }, + { + "epoch": 9.03, + "learning_rate": 5.805822784810127e-06, + "loss": 0.0123, + "step": 17075 + }, + { + "epoch": 9.03, + "learning_rate": 5.799493670886076e-06, + "loss": 0.0161, + "step": 17100 + }, + { + "epoch": 9.03, + "learning_rate": 5.793164556962026e-06, + "loss": 0.0114, + "step": 17125 + }, + { + "epoch": 9.03, + "learning_rate": 5.786835443037974e-06, + "loss": 0.0194, + "step": 17150 + }, + { + "epoch": 9.04, + "learning_rate": 5.780506329113924e-06, + "loss": 0.0179, + "step": 17175 + }, + { + "epoch": 9.04, + "learning_rate": 5.774177215189873e-06, + "loss": 0.0165, + "step": 17200 + }, + { + "epoch": 9.04, + "learning_rate": 5.767848101265823e-06, + "loss": 0.0165, + "step": 17225 + }, + { + "epoch": 9.04, + "learning_rate": 5.761518987341773e-06, + "loss": 0.0125, + "step": 17250 + }, + { + "epoch": 9.04, + "learning_rate": 5.755189873417722e-06, + "loss": 0.0172, + "step": 17275 + }, + { + "epoch": 9.04, + "learning_rate": 5.748860759493672e-06, + "loss": 0.0158, + "step": 17300 + }, + { + "epoch": 9.04, + "learning_rate": 5.742531645569621e-06, + "loss": 0.0174, + "step": 17325 + }, + { + "epoch": 9.04, + "learning_rate": 5.736202531645571e-06, + "loss": 0.0162, + "step": 17350 + }, + { + "epoch": 9.04, + "learning_rate": 5.72987341772152e-06, + "loss": 0.0203, + "step": 17375 + }, + { + "epoch": 9.04, + "learning_rate": 5.7235443037974695e-06, + "loss": 0.0157, + "step": 17400 + }, + { + "epoch": 9.04, + "learning_rate": 5.717215189873419e-06, + "loss": 0.018, + "step": 17425 + }, + { + "epoch": 9.04, + "learning_rate": 5.7108860759493675e-06, + "loss": 0.0199, + "step": 17450 + }, + { + "epoch": 9.04, + "learning_rate": 5.704556962025317e-06, + "loss": 0.0153, + "step": 17475 + }, + { + "epoch": 9.04, + "learning_rate": 5.698227848101266e-06, + "loss": 0.0234, + "step": 17500 + }, + { + "epoch": 10.0, + "learning_rate": 5.691898734177216e-06, + "loss": 0.0168, + "step": 17525 + }, + { + "epoch": 10.0, + "learning_rate": 5.685569620253165e-06, + "loss": 0.0214, + "step": 17550 + }, + { + "epoch": 10.0, + "learning_rate": 5.679240506329115e-06, + "loss": 0.0185, + "step": 17575 + }, + { + "epoch": 10.0, + "learning_rate": 5.672911392405064e-06, + "loss": 0.0228, + "step": 17600 + }, + { + "epoch": 10.0, + "learning_rate": 5.6665822784810135e-06, + "loss": 0.024, + "step": 17625 + }, + { + "epoch": 10.0, + "learning_rate": 5.660253164556963e-06, + "loss": 0.0235, + "step": 17650 + }, + { + "epoch": 10.0, + "learning_rate": 5.6539240506329115e-06, + "loss": 0.0262, + "step": 17675 + }, + { + "epoch": 10.0, + "learning_rate": 5.647594936708861e-06, + "loss": 0.0214, + "step": 17700 + }, + { + "epoch": 10.01, + "learning_rate": 5.64126582278481e-06, + "loss": 0.0279, + "step": 17725 + }, + { + "epoch": 10.01, + "learning_rate": 5.63493670886076e-06, + "loss": 0.0248, + "step": 17750 + }, + { + "epoch": 10.01, + "learning_rate": 5.628607594936709e-06, + "loss": 0.0155, + "step": 17775 + }, + { + "epoch": 10.01, + "learning_rate": 5.622278481012659e-06, + "loss": 0.017, + "step": 17800 + }, + { + "epoch": 10.01, + "learning_rate": 5.615949367088608e-06, + "loss": 0.0205, + "step": 17825 + }, + { + "epoch": 10.01, + "learning_rate": 5.6096202531645575e-06, + "loss": 0.021, + "step": 17850 + }, + { + "epoch": 10.01, + "learning_rate": 5.603291139240507e-06, + "loss": 0.0178, + "step": 17875 + }, + { + "epoch": 10.01, + "learning_rate": 5.5969620253164555e-06, + "loss": 0.0169, + "step": 17900 + }, + { + "epoch": 10.01, + "learning_rate": 5.590632911392405e-06, + "loss": 0.0158, + "step": 17925 + }, + { + "epoch": 10.01, + "learning_rate": 5.584303797468354e-06, + "loss": 0.0195, + "step": 17950 + }, + { + "epoch": 10.01, + "learning_rate": 5.577974683544304e-06, + "loss": 0.0195, + "step": 17975 + }, + { + "epoch": 10.01, + "learning_rate": 5.571645569620253e-06, + "loss": 0.0187, + "step": 18000 + }, + { + "epoch": 10.01, + "eval_loss": 0.2605174481868744, + "eval_runtime": 1553.7854, + "eval_samples_per_second": 6.7, + "eval_steps_per_second": 0.419, + "eval_wer": 25.330493178750874, + "step": 18000 + }, + { + "epoch": 10.01, + "learning_rate": 5.565316455696203e-06, + "loss": 0.0214, + "step": 18025 + }, + { + "epoch": 10.01, + "learning_rate": 5.558987341772152e-06, + "loss": 0.0193, + "step": 18050 + }, + { + "epoch": 10.01, + "learning_rate": 5.552658227848102e-06, + "loss": 0.0168, + "step": 18075 + }, + { + "epoch": 10.01, + "learning_rate": 5.546329113924052e-06, + "loss": 0.0147, + "step": 18100 + }, + { + "epoch": 10.02, + "learning_rate": 5.540000000000001e-06, + "loss": 0.0192, + "step": 18125 + }, + { + "epoch": 10.02, + "learning_rate": 5.533670886075951e-06, + "loss": 0.014, + "step": 18150 + }, + { + "epoch": 10.02, + "learning_rate": 5.527341772151899e-06, + "loss": 0.0163, + "step": 18175 + }, + { + "epoch": 10.02, + "learning_rate": 5.521012658227849e-06, + "loss": 0.0139, + "step": 18200 + }, + { + "epoch": 10.02, + "learning_rate": 5.514683544303798e-06, + "loss": 0.0168, + "step": 18225 + }, + { + "epoch": 10.02, + "learning_rate": 5.5083544303797475e-06, + "loss": 0.0118, + "step": 18250 + }, + { + "epoch": 10.02, + "learning_rate": 5.502025316455697e-06, + "loss": 0.0119, + "step": 18275 + }, + { + "epoch": 10.02, + "learning_rate": 5.495696202531646e-06, + "loss": 0.0136, + "step": 18300 + }, + { + "epoch": 10.02, + "learning_rate": 5.489367088607596e-06, + "loss": 0.0132, + "step": 18325 + }, + { + "epoch": 10.02, + "learning_rate": 5.483037974683545e-06, + "loss": 0.0148, + "step": 18350 + }, + { + "epoch": 10.02, + "learning_rate": 5.476708860759495e-06, + "loss": 0.015, + "step": 18375 + }, + { + "epoch": 10.02, + "learning_rate": 5.470379746835443e-06, + "loss": 0.0154, + "step": 18400 + }, + { + "epoch": 10.02, + "learning_rate": 5.464050632911393e-06, + "loss": 0.0139, + "step": 18425 + }, + { + "epoch": 10.02, + "learning_rate": 5.457721518987342e-06, + "loss": 0.0239, + "step": 18450 + }, + { + "epoch": 10.02, + "learning_rate": 5.4513924050632915e-06, + "loss": 0.0203, + "step": 18475 + }, + { + "epoch": 10.02, + "learning_rate": 5.445063291139241e-06, + "loss": 0.0199, + "step": 18500 + }, + { + "epoch": 10.03, + "learning_rate": 5.43873417721519e-06, + "loss": 0.013, + "step": 18525 + }, + { + "epoch": 10.03, + "learning_rate": 5.43240506329114e-06, + "loss": 0.0128, + "step": 18550 + }, + { + "epoch": 10.03, + "learning_rate": 5.426075949367089e-06, + "loss": 0.0161, + "step": 18575 + }, + { + "epoch": 10.03, + "learning_rate": 5.419746835443039e-06, + "loss": 0.0129, + "step": 18600 + }, + { + "epoch": 10.03, + "learning_rate": 5.413417721518987e-06, + "loss": 0.0124, + "step": 18625 + }, + { + "epoch": 10.03, + "learning_rate": 5.407088607594937e-06, + "loss": 0.0161, + "step": 18650 + }, + { + "epoch": 10.03, + "learning_rate": 5.400759493670886e-06, + "loss": 0.0104, + "step": 18675 + }, + { + "epoch": 10.03, + "learning_rate": 5.3944303797468355e-06, + "loss": 0.0109, + "step": 18700 + }, + { + "epoch": 10.03, + "learning_rate": 5.388101265822785e-06, + "loss": 0.0128, + "step": 18725 + }, + { + "epoch": 10.03, + "learning_rate": 5.381772151898734e-06, + "loss": 0.014, + "step": 18750 + }, + { + "epoch": 10.03, + "learning_rate": 5.375443037974684e-06, + "loss": 0.0159, + "step": 18775 + }, + { + "epoch": 10.03, + "learning_rate": 5.369113924050633e-06, + "loss": 0.017, + "step": 18800 + }, + { + "epoch": 10.03, + "learning_rate": 5.362784810126583e-06, + "loss": 0.012, + "step": 18825 + }, + { + "epoch": 10.03, + "learning_rate": 5.356455696202531e-06, + "loss": 0.0165, + "step": 18850 + }, + { + "epoch": 10.03, + "learning_rate": 5.350126582278481e-06, + "loss": 0.0153, + "step": 18875 + }, + { + "epoch": 10.03, + "learning_rate": 5.343797468354432e-06, + "loss": 0.0182, + "step": 18900 + }, + { + "epoch": 10.04, + "learning_rate": 5.33746835443038e-06, + "loss": 0.0194, + "step": 18925 + }, + { + "epoch": 10.04, + "learning_rate": 5.33113924050633e-06, + "loss": 0.0144, + "step": 18950 + }, + { + "epoch": 10.04, + "learning_rate": 5.324810126582279e-06, + "loss": 0.0157, + "step": 18975 + }, + { + "epoch": 10.04, + "learning_rate": 5.318481012658229e-06, + "loss": 0.0111, + "step": 19000 + }, + { + "epoch": 10.04, + "eval_loss": 0.2696678638458252, + "eval_runtime": 1463.5986, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.445, + "eval_wer": 24.79877721588969, + "step": 19000 + }, + { + "epoch": 10.04, + "learning_rate": 5.312151898734178e-06, + "loss": 0.0176, + "step": 19025 + }, + { + "epoch": 10.04, + "learning_rate": 5.3058227848101275e-06, + "loss": 0.0144, + "step": 19050 + }, + { + "epoch": 10.04, + "learning_rate": 5.299493670886077e-06, + "loss": 0.0129, + "step": 19075 + }, + { + "epoch": 10.04, + "learning_rate": 5.293164556962026e-06, + "loss": 0.0162, + "step": 19100 + }, + { + "epoch": 10.04, + "learning_rate": 5.286835443037976e-06, + "loss": 0.0128, + "step": 19125 + }, + { + "epoch": 10.04, + "learning_rate": 5.280506329113924e-06, + "loss": 0.0152, + "step": 19150 + }, + { + "epoch": 10.04, + "learning_rate": 5.274177215189874e-06, + "loss": 0.0183, + "step": 19175 + }, + { + "epoch": 10.04, + "learning_rate": 5.267848101265823e-06, + "loss": 0.0198, + "step": 19200 + }, + { + "epoch": 10.04, + "learning_rate": 5.261518987341773e-06, + "loss": 0.0179, + "step": 19225 + }, + { + "epoch": 10.04, + "learning_rate": 5.255443037974683e-06, + "loss": 0.019, + "step": 19250 + }, + { + "epoch": 11.0, + "learning_rate": 5.249113924050633e-06, + "loss": 0.0159, + "step": 19275 + }, + { + "epoch": 11.0, + "learning_rate": 5.242784810126583e-06, + "loss": 0.0156, + "step": 19300 + }, + { + "epoch": 11.0, + "learning_rate": 5.236455696202532e-06, + "loss": 0.015, + "step": 19325 + }, + { + "epoch": 11.0, + "learning_rate": 5.230126582278482e-06, + "loss": 0.0162, + "step": 19350 + }, + { + "epoch": 11.0, + "learning_rate": 5.223797468354431e-06, + "loss": 0.0262, + "step": 19375 + }, + { + "epoch": 11.0, + "learning_rate": 5.217468354430381e-06, + "loss": 0.0207, + "step": 19400 + }, + { + "epoch": 11.0, + "learning_rate": 5.21113924050633e-06, + "loss": 0.0222, + "step": 19425 + }, + { + "epoch": 11.0, + "learning_rate": 5.2048101265822795e-06, + "loss": 0.016, + "step": 19450 + }, + { + "epoch": 11.01, + "learning_rate": 5.198481012658229e-06, + "loss": 0.0176, + "step": 19475 + }, + { + "epoch": 11.01, + "learning_rate": 5.192151898734178e-06, + "loss": 0.0209, + "step": 19500 + }, + { + "epoch": 11.01, + "learning_rate": 5.185822784810127e-06, + "loss": 0.0212, + "step": 19525 + }, + { + "epoch": 11.01, + "learning_rate": 5.179493670886076e-06, + "loss": 0.0143, + "step": 19550 + }, + { + "epoch": 11.01, + "learning_rate": 5.173164556962026e-06, + "loss": 0.0224, + "step": 19575 + }, + { + "epoch": 11.01, + "learning_rate": 5.166835443037975e-06, + "loss": 0.0171, + "step": 19600 + }, + { + "epoch": 11.01, + "learning_rate": 5.160506329113925e-06, + "loss": 0.0166, + "step": 19625 + }, + { + "epoch": 11.01, + "learning_rate": 5.154177215189874e-06, + "loss": 0.0135, + "step": 19650 + }, + { + "epoch": 11.01, + "learning_rate": 5.1478481012658235e-06, + "loss": 0.0164, + "step": 19675 + }, + { + "epoch": 11.01, + "learning_rate": 5.141518987341773e-06, + "loss": 0.0154, + "step": 19700 + }, + { + "epoch": 11.01, + "learning_rate": 5.135189873417722e-06, + "loss": 0.0183, + "step": 19725 + }, + { + "epoch": 11.01, + "learning_rate": 5.128860759493671e-06, + "loss": 0.0149, + "step": 19750 + }, + { + "epoch": 11.01, + "learning_rate": 5.12253164556962e-06, + "loss": 0.0176, + "step": 19775 + }, + { + "epoch": 11.01, + "learning_rate": 5.11620253164557e-06, + "loss": 0.0135, + "step": 19800 + }, + { + "epoch": 11.01, + "learning_rate": 5.109873417721519e-06, + "loss": 0.0121, + "step": 19825 + }, + { + "epoch": 11.01, + "learning_rate": 5.103544303797469e-06, + "loss": 0.0158, + "step": 19850 + }, + { + "epoch": 11.02, + "learning_rate": 5.097215189873418e-06, + "loss": 0.0191, + "step": 19875 + }, + { + "epoch": 11.02, + "learning_rate": 5.0908860759493675e-06, + "loss": 0.0127, + "step": 19900 + }, + { + "epoch": 11.02, + "learning_rate": 5.084556962025317e-06, + "loss": 0.0149, + "step": 19925 + }, + { + "epoch": 11.02, + "learning_rate": 5.0782278481012655e-06, + "loss": 0.0152, + "step": 19950 + }, + { + "epoch": 11.02, + "learning_rate": 5.071898734177215e-06, + "loss": 0.0139, + "step": 19975 + }, + { + "epoch": 11.02, + "learning_rate": 5.065569620253164e-06, + "loss": 0.015, + "step": 20000 + }, + { + "epoch": 11.02, + "eval_loss": 0.2623814344406128, + "eval_runtime": 1462.1621, + "eval_samples_per_second": 7.12, + "eval_steps_per_second": 0.445, + "eval_wer": 24.392266540919362, + "step": 20000 + }, + { + "epoch": 11.02, + "learning_rate": 5.059240506329114e-06, + "loss": 0.0094, + "step": 20025 + }, + { + "epoch": 11.02, + "learning_rate": 5.052911392405063e-06, + "loss": 0.0108, + "step": 20050 + }, + { + "epoch": 11.02, + "learning_rate": 5.046582278481013e-06, + "loss": 0.0155, + "step": 20075 + }, + { + "epoch": 11.02, + "learning_rate": 5.040253164556962e-06, + "loss": 0.0103, + "step": 20100 + }, + { + "epoch": 11.02, + "learning_rate": 5.033924050632912e-06, + "loss": 0.0109, + "step": 20125 + }, + { + "epoch": 11.02, + "learning_rate": 5.027594936708862e-06, + "loss": 0.0188, + "step": 20150 + }, + { + "epoch": 11.02, + "learning_rate": 5.021265822784811e-06, + "loss": 0.0178, + "step": 20175 + }, + { + "epoch": 11.02, + "learning_rate": 5.014936708860761e-06, + "loss": 0.0262, + "step": 20200 + }, + { + "epoch": 11.02, + "learning_rate": 5.00860759493671e-06, + "loss": 0.0194, + "step": 20225 + }, + { + "epoch": 11.02, + "learning_rate": 5.002278481012659e-06, + "loss": 0.0107, + "step": 20250 + }, + { + "epoch": 11.03, + "learning_rate": 4.995949367088608e-06, + "loss": 0.0147, + "step": 20275 + }, + { + "epoch": 11.03, + "learning_rate": 4.9896202531645575e-06, + "loss": 0.0084, + "step": 20300 + }, + { + "epoch": 11.03, + "learning_rate": 4.983291139240507e-06, + "loss": 0.0214, + "step": 20325 + }, + { + "epoch": 11.03, + "learning_rate": 4.976962025316456e-06, + "loss": 0.014, + "step": 20350 + }, + { + "epoch": 11.03, + "learning_rate": 4.970632911392406e-06, + "loss": 0.0129, + "step": 20375 + }, + { + "epoch": 11.03, + "learning_rate": 4.964303797468355e-06, + "loss": 0.0145, + "step": 20400 + }, + { + "epoch": 11.03, + "learning_rate": 4.957974683544305e-06, + "loss": 0.0109, + "step": 20425 + }, + { + "epoch": 11.03, + "learning_rate": 4.951645569620254e-06, + "loss": 0.0117, + "step": 20450 + }, + { + "epoch": 11.03, + "learning_rate": 4.945316455696203e-06, + "loss": 0.0137, + "step": 20475 + }, + { + "epoch": 11.03, + "learning_rate": 4.938987341772152e-06, + "loss": 0.0086, + "step": 20500 + }, + { + "epoch": 11.03, + "learning_rate": 4.9326582278481015e-06, + "loss": 0.0128, + "step": 20525 + }, + { + "epoch": 11.03, + "learning_rate": 4.926329113924051e-06, + "loss": 0.0101, + "step": 20550 + }, + { + "epoch": 11.03, + "learning_rate": 4.92e-06, + "loss": 0.0132, + "step": 20575 + }, + { + "epoch": 11.03, + "learning_rate": 4.91367088607595e-06, + "loss": 0.0147, + "step": 20600 + }, + { + "epoch": 11.03, + "learning_rate": 4.907341772151899e-06, + "loss": 0.0077, + "step": 20625 + }, + { + "epoch": 11.03, + "learning_rate": 4.901012658227849e-06, + "loss": 0.0125, + "step": 20650 + }, + { + "epoch": 11.04, + "learning_rate": 4.894683544303798e-06, + "loss": 0.0115, + "step": 20675 + }, + { + "epoch": 11.04, + "learning_rate": 4.888354430379747e-06, + "loss": 0.0136, + "step": 20700 + }, + { + "epoch": 11.04, + "learning_rate": 4.882025316455696e-06, + "loss": 0.0098, + "step": 20725 + }, + { + "epoch": 11.04, + "learning_rate": 4.875696202531646e-06, + "loss": 0.0154, + "step": 20750 + }, + { + "epoch": 11.04, + "learning_rate": 4.869367088607596e-06, + "loss": 0.0126, + "step": 20775 + }, + { + "epoch": 11.04, + "learning_rate": 4.863037974683545e-06, + "loss": 0.0116, + "step": 20800 + }, + { + "epoch": 11.04, + "learning_rate": 4.856708860759495e-06, + "loss": 0.0133, + "step": 20825 + }, + { + "epoch": 11.04, + "learning_rate": 4.850379746835443e-06, + "loss": 0.0106, + "step": 20850 + }, + { + "epoch": 11.04, + "learning_rate": 4.844050632911393e-06, + "loss": 0.0151, + "step": 20875 + }, + { + "epoch": 11.04, + "learning_rate": 4.837721518987342e-06, + "loss": 0.0148, + "step": 20900 + }, + { + "epoch": 11.04, + "learning_rate": 4.8313924050632915e-06, + "loss": 0.0173, + "step": 20925 + }, + { + "epoch": 11.04, + "learning_rate": 4.825063291139241e-06, + "loss": 0.0126, + "step": 20950 + }, + { + "epoch": 11.04, + "learning_rate": 4.81873417721519e-06, + "loss": 0.0163, + "step": 20975 + }, + { + "epoch": 11.04, + "learning_rate": 4.81240506329114e-06, + "loss": 0.0203, + "step": 21000 + }, + { + "epoch": 11.04, + "eval_loss": 0.27788785099983215, + "eval_runtime": 1546.852, + "eval_samples_per_second": 6.73, + "eval_steps_per_second": 0.421, + "eval_wer": 25.284963983154196, + "step": 21000 + }, + { + "epoch": 12.0, + "learning_rate": 4.806075949367089e-06, + "loss": 0.0176, + "step": 21025 + }, + { + "epoch": 12.0, + "learning_rate": 4.799746835443039e-06, + "loss": 0.0157, + "step": 21050 + }, + { + "epoch": 12.0, + "learning_rate": 4.793417721518987e-06, + "loss": 0.0192, + "step": 21075 + }, + { + "epoch": 12.0, + "learning_rate": 4.787088607594937e-06, + "loss": 0.0124, + "step": 21100 + }, + { + "epoch": 12.0, + "learning_rate": 4.780759493670886e-06, + "loss": 0.0194, + "step": 21125 + }, + { + "epoch": 12.0, + "learning_rate": 4.774430379746836e-06, + "loss": 0.0138, + "step": 21150 + }, + { + "epoch": 12.0, + "learning_rate": 4.768101265822786e-06, + "loss": 0.0191, + "step": 21175 + }, + { + "epoch": 12.0, + "learning_rate": 4.761772151898735e-06, + "loss": 0.0124, + "step": 21200 + }, + { + "epoch": 12.01, + "learning_rate": 4.755443037974684e-06, + "loss": 0.0175, + "step": 21225 + }, + { + "epoch": 12.01, + "learning_rate": 4.749113924050633e-06, + "loss": 0.0118, + "step": 21250 + }, + { + "epoch": 12.01, + "learning_rate": 4.742784810126583e-06, + "loss": 0.0172, + "step": 21275 + }, + { + "epoch": 12.01, + "learning_rate": 4.736455696202532e-06, + "loss": 0.0136, + "step": 21300 + }, + { + "epoch": 12.01, + "learning_rate": 4.7301265822784815e-06, + "loss": 0.018, + "step": 21325 + }, + { + "epoch": 12.01, + "learning_rate": 4.723797468354431e-06, + "loss": 0.0184, + "step": 21350 + }, + { + "epoch": 12.01, + "learning_rate": 4.71746835443038e-06, + "loss": 0.0166, + "step": 21375 + }, + { + "epoch": 12.01, + "learning_rate": 4.71113924050633e-06, + "loss": 0.0107, + "step": 21400 + }, + { + "epoch": 12.01, + "learning_rate": 4.704810126582279e-06, + "loss": 0.0125, + "step": 21425 + }, + { + "epoch": 12.01, + "learning_rate": 4.698481012658228e-06, + "loss": 0.0098, + "step": 21450 + }, + { + "epoch": 12.01, + "learning_rate": 4.692151898734177e-06, + "loss": 0.0103, + "step": 21475 + }, + { + "epoch": 12.01, + "learning_rate": 4.685822784810127e-06, + "loss": 0.0211, + "step": 21500 + }, + { + "epoch": 12.01, + "learning_rate": 4.679493670886076e-06, + "loss": 0.0125, + "step": 21525 + }, + { + "epoch": 12.01, + "learning_rate": 4.6731645569620255e-06, + "loss": 0.0083, + "step": 21550 + }, + { + "epoch": 12.01, + "learning_rate": 4.666835443037975e-06, + "loss": 0.0156, + "step": 21575 + }, + { + "epoch": 12.01, + "learning_rate": 4.660506329113924e-06, + "loss": 0.0137, + "step": 21600 + }, + { + "epoch": 12.02, + "learning_rate": 4.654177215189874e-06, + "loss": 0.0137, + "step": 21625 + }, + { + "epoch": 12.02, + "learning_rate": 4.647848101265823e-06, + "loss": 0.0092, + "step": 21650 + }, + { + "epoch": 12.02, + "learning_rate": 4.641518987341773e-06, + "loss": 0.0101, + "step": 21675 + }, + { + "epoch": 12.02, + "learning_rate": 4.635189873417722e-06, + "loss": 0.0118, + "step": 21700 + }, + { + "epoch": 12.02, + "learning_rate": 4.6288607594936715e-06, + "loss": 0.0119, + "step": 21725 + }, + { + "epoch": 12.02, + "learning_rate": 4.622531645569621e-06, + "loss": 0.009, + "step": 21750 + }, + { + "epoch": 12.02, + "learning_rate": 4.61620253164557e-06, + "loss": 0.014, + "step": 21775 + }, + { + "epoch": 12.02, + "learning_rate": 4.609873417721519e-06, + "loss": 0.0108, + "step": 21800 + }, + { + "epoch": 12.02, + "learning_rate": 4.603544303797468e-06, + "loss": 0.0117, + "step": 21825 + }, + { + "epoch": 12.02, + "learning_rate": 4.597215189873418e-06, + "loss": 0.0147, + "step": 21850 + }, + { + "epoch": 12.02, + "learning_rate": 4.590886075949367e-06, + "loss": 0.0133, + "step": 21875 + }, + { + "epoch": 12.02, + "learning_rate": 4.584556962025317e-06, + "loss": 0.0131, + "step": 21900 + }, + { + "epoch": 12.02, + "learning_rate": 4.578227848101266e-06, + "loss": 0.0156, + "step": 21925 + }, + { + "epoch": 12.02, + "learning_rate": 4.5718987341772155e-06, + "loss": 0.0211, + "step": 21950 + }, + { + "epoch": 12.02, + "learning_rate": 4.565569620253165e-06, + "loss": 0.0127, + "step": 21975 + }, + { + "epoch": 12.02, + "learning_rate": 4.559240506329114e-06, + "loss": 0.011, + "step": 22000 + }, + { + "epoch": 12.02, + "eval_loss": 0.26152506470680237, + "eval_runtime": 1497.9019, + "eval_samples_per_second": 6.95, + "eval_steps_per_second": 0.435, + "eval_wer": 26.236198962584755, + "step": 22000 + }, + { + "epoch": 12.03, + "learning_rate": 4.552911392405064e-06, + "loss": 0.0164, + "step": 22025 + }, + { + "epoch": 12.03, + "learning_rate": 4.546582278481013e-06, + "loss": 0.01, + "step": 22050 + }, + { + "epoch": 12.03, + "learning_rate": 4.540253164556963e-06, + "loss": 0.0127, + "step": 22075 + }, + { + "epoch": 12.03, + "learning_rate": 4.533924050632912e-06, + "loss": 0.0089, + "step": 22100 + }, + { + "epoch": 12.03, + "learning_rate": 4.5275949367088615e-06, + "loss": 0.0077, + "step": 22125 + }, + { + "epoch": 12.03, + "learning_rate": 4.521265822784811e-06, + "loss": 0.0098, + "step": 22150 + }, + { + "epoch": 12.03, + "learning_rate": 4.5149367088607595e-06, + "loss": 0.0084, + "step": 22175 + }, + { + "epoch": 12.03, + "learning_rate": 4.508607594936709e-06, + "loss": 0.0096, + "step": 22200 + }, + { + "epoch": 12.03, + "learning_rate": 4.502278481012658e-06, + "loss": 0.0097, + "step": 22225 + }, + { + "epoch": 12.03, + "learning_rate": 4.495949367088608e-06, + "loss": 0.0114, + "step": 22250 + }, + { + "epoch": 12.03, + "learning_rate": 4.489620253164557e-06, + "loss": 0.0109, + "step": 22275 + }, + { + "epoch": 12.03, + "learning_rate": 4.483291139240507e-06, + "loss": 0.0096, + "step": 22300 + }, + { + "epoch": 12.03, + "learning_rate": 4.476962025316456e-06, + "loss": 0.0117, + "step": 22325 + }, + { + "epoch": 12.03, + "learning_rate": 4.4706329113924055e-06, + "loss": 0.0105, + "step": 22350 + }, + { + "epoch": 12.03, + "learning_rate": 4.464303797468355e-06, + "loss": 0.0103, + "step": 22375 + }, + { + "epoch": 12.03, + "learning_rate": 4.457974683544304e-06, + "loss": 0.0085, + "step": 22400 + }, + { + "epoch": 12.04, + "learning_rate": 4.451645569620254e-06, + "loss": 0.0129, + "step": 22425 + }, + { + "epoch": 12.04, + "learning_rate": 4.445316455696203e-06, + "loss": 0.0188, + "step": 22450 + }, + { + "epoch": 12.04, + "learning_rate": 4.438987341772153e-06, + "loss": 0.013, + "step": 22475 + }, + { + "epoch": 12.04, + "learning_rate": 4.432658227848102e-06, + "loss": 0.0095, + "step": 22500 + }, + { + "epoch": 12.04, + "learning_rate": 4.4263291139240515e-06, + "loss": 0.0111, + "step": 22525 + }, + { + "epoch": 12.04, + "learning_rate": 4.42e-06, + "loss": 0.0115, + "step": 22550 + }, + { + "epoch": 12.04, + "learning_rate": 4.4136708860759495e-06, + "loss": 0.0096, + "step": 22575 + }, + { + "epoch": 12.04, + "learning_rate": 4.407341772151899e-06, + "loss": 0.0108, + "step": 22600 + }, + { + "epoch": 12.04, + "learning_rate": 4.401012658227848e-06, + "loss": 0.0118, + "step": 22625 + }, + { + "epoch": 12.04, + "learning_rate": 4.394683544303798e-06, + "loss": 0.0136, + "step": 22650 + }, + { + "epoch": 12.04, + "learning_rate": 4.388354430379747e-06, + "loss": 0.0176, + "step": 22675 + }, + { + "epoch": 12.04, + "learning_rate": 4.382025316455697e-06, + "loss": 0.011, + "step": 22700 + }, + { + "epoch": 12.04, + "learning_rate": 4.375696202531646e-06, + "loss": 0.0125, + "step": 22725 + }, + { + "epoch": 12.04, + "learning_rate": 4.3693670886075955e-06, + "loss": 0.0164, + "step": 22750 + }, + { + "epoch": 13.0, + "learning_rate": 4.363037974683544e-06, + "loss": 0.0192, + "step": 22775 + }, + { + "epoch": 13.0, + "learning_rate": 4.356708860759494e-06, + "loss": 0.0145, + "step": 22800 + }, + { + "epoch": 13.0, + "learning_rate": 4.350379746835444e-06, + "loss": 0.0174, + "step": 22825 + }, + { + "epoch": 13.0, + "learning_rate": 4.344050632911393e-06, + "loss": 0.0142, + "step": 22850 + }, + { + "epoch": 13.0, + "learning_rate": 4.337721518987343e-06, + "loss": 0.0228, + "step": 22875 + }, + { + "epoch": 13.0, + "learning_rate": 4.331392405063292e-06, + "loss": 0.0201, + "step": 22900 + }, + { + "epoch": 13.0, + "learning_rate": 4.325063291139241e-06, + "loss": 0.0188, + "step": 22925 + }, + { + "epoch": 13.0, + "learning_rate": 4.31873417721519e-06, + "loss": 0.0185, + "step": 22950 + }, + { + "epoch": 13.01, + "learning_rate": 4.3124050632911395e-06, + "loss": 0.0186, + "step": 22975 + }, + { + "epoch": 13.01, + "learning_rate": 4.306075949367089e-06, + "loss": 0.0191, + "step": 23000 + }, + { + "epoch": 13.01, + "eval_loss": 0.25467297434806824, + "eval_runtime": 1486.1182, + "eval_samples_per_second": 7.005, + "eval_steps_per_second": 0.438, + "eval_wer": 23.982503780549276, + "step": 23000 + }, + { + "epoch": 13.01, + "learning_rate": 4.299746835443038e-06, + "loss": 0.0145, + "step": 23025 + }, + { + "epoch": 13.01, + "learning_rate": 4.293417721518988e-06, + "loss": 0.0106, + "step": 23050 + }, + { + "epoch": 13.01, + "learning_rate": 4.287088607594937e-06, + "loss": 0.0135, + "step": 23075 + }, + { + "epoch": 13.01, + "learning_rate": 4.280759493670887e-06, + "loss": 0.0173, + "step": 23100 + }, + { + "epoch": 13.01, + "learning_rate": 4.274430379746836e-06, + "loss": 0.0126, + "step": 23125 + }, + { + "epoch": 13.01, + "learning_rate": 4.268101265822785e-06, + "loss": 0.0166, + "step": 23150 + }, + { + "epoch": 13.01, + "learning_rate": 4.261772151898734e-06, + "loss": 0.0097, + "step": 23175 + }, + { + "epoch": 13.01, + "learning_rate": 4.255443037974684e-06, + "loss": 0.0094, + "step": 23200 + }, + { + "epoch": 13.01, + "learning_rate": 4.249113924050634e-06, + "loss": 0.012, + "step": 23225 + }, + { + "epoch": 13.01, + "learning_rate": 4.242784810126583e-06, + "loss": 0.0153, + "step": 23250 + }, + { + "epoch": 13.01, + "learning_rate": 4.236455696202532e-06, + "loss": 0.0169, + "step": 23275 + }, + { + "epoch": 13.01, + "learning_rate": 4.230379746835443e-06, + "loss": 0.0097, + "step": 23300 + }, + { + "epoch": 13.01, + "learning_rate": 4.224050632911393e-06, + "loss": 0.0098, + "step": 23325 + }, + { + "epoch": 13.01, + "learning_rate": 4.217721518987342e-06, + "loss": 0.0129, + "step": 23350 + }, + { + "epoch": 13.02, + "learning_rate": 4.2113924050632915e-06, + "loss": 0.0157, + "step": 23375 + }, + { + "epoch": 13.02, + "learning_rate": 4.205063291139241e-06, + "loss": 0.0088, + "step": 23400 + }, + { + "epoch": 13.02, + "learning_rate": 4.19873417721519e-06, + "loss": 0.0105, + "step": 23425 + }, + { + "epoch": 13.02, + "learning_rate": 4.19240506329114e-06, + "loss": 0.0131, + "step": 23450 + }, + { + "epoch": 13.02, + "learning_rate": 4.186075949367089e-06, + "loss": 0.0133, + "step": 23475 + }, + { + "epoch": 13.02, + "learning_rate": 4.179746835443038e-06, + "loss": 0.0054, + "step": 23500 + }, + { + "epoch": 13.02, + "learning_rate": 4.173417721518987e-06, + "loss": 0.01, + "step": 23525 + }, + { + "epoch": 13.02, + "learning_rate": 4.167088607594937e-06, + "loss": 0.013, + "step": 23550 + }, + { + "epoch": 13.02, + "learning_rate": 4.160759493670886e-06, + "loss": 0.0141, + "step": 23575 + }, + { + "epoch": 13.02, + "learning_rate": 4.154430379746836e-06, + "loss": 0.0126, + "step": 23600 + }, + { + "epoch": 13.02, + "learning_rate": 4.148101265822786e-06, + "loss": 0.015, + "step": 23625 + }, + { + "epoch": 13.02, + "learning_rate": 4.141772151898734e-06, + "loss": 0.0164, + "step": 23650 + }, + { + "epoch": 13.02, + "learning_rate": 4.135443037974684e-06, + "loss": 0.0135, + "step": 23675 + }, + { + "epoch": 13.02, + "learning_rate": 4.129113924050633e-06, + "loss": 0.0151, + "step": 23700 + }, + { + "epoch": 13.02, + "learning_rate": 4.122784810126583e-06, + "loss": 0.0157, + "step": 23725 + }, + { + "epoch": 13.02, + "learning_rate": 4.116455696202532e-06, + "loss": 0.0087, + "step": 23750 + }, + { + "epoch": 13.03, + "learning_rate": 4.1101265822784815e-06, + "loss": 0.0097, + "step": 23775 + }, + { + "epoch": 13.03, + "learning_rate": 4.103797468354431e-06, + "loss": 0.0141, + "step": 23800 + }, + { + "epoch": 13.03, + "learning_rate": 4.09746835443038e-06, + "loss": 0.0079, + "step": 23825 + }, + { + "epoch": 13.03, + "learning_rate": 4.09113924050633e-06, + "loss": 0.0116, + "step": 23850 + }, + { + "epoch": 13.03, + "learning_rate": 4.084810126582278e-06, + "loss": 0.0093, + "step": 23875 + }, + { + "epoch": 13.03, + "learning_rate": 4.078481012658228e-06, + "loss": 0.0149, + "step": 23900 + }, + { + "epoch": 13.03, + "learning_rate": 4.072151898734177e-06, + "loss": 0.0125, + "step": 23925 + }, + { + "epoch": 13.03, + "learning_rate": 4.065822784810127e-06, + "loss": 0.0106, + "step": 23950 + }, + { + "epoch": 13.03, + "learning_rate": 4.059493670886076e-06, + "loss": 0.0124, + "step": 23975 + }, + { + "epoch": 13.03, + "learning_rate": 4.0531645569620255e-06, + "loss": 0.0074, + "step": 24000 + }, + { + "epoch": 13.03, + "eval_loss": 0.27036768198013306, + "eval_runtime": 1458.9195, + "eval_samples_per_second": 7.136, + "eval_steps_per_second": 0.446, + "eval_wer": 24.153238264036816, + "step": 24000 + }, + { + "epoch": 13.03, + "learning_rate": 4.046835443037975e-06, + "loss": 0.0128, + "step": 24025 + }, + { + "epoch": 13.03, + "learning_rate": 4.040506329113924e-06, + "loss": 0.012, + "step": 24050 + }, + { + "epoch": 13.03, + "learning_rate": 4.034177215189874e-06, + "loss": 0.0098, + "step": 24075 + }, + { + "epoch": 13.03, + "learning_rate": 4.027848101265823e-06, + "loss": 0.0091, + "step": 24100 + }, + { + "epoch": 13.03, + "learning_rate": 4.021518987341773e-06, + "loss": 0.0077, + "step": 24125 + }, + { + "epoch": 13.03, + "learning_rate": 4.015189873417722e-06, + "loss": 0.0135, + "step": 24150 + }, + { + "epoch": 13.04, + "learning_rate": 4.0088607594936715e-06, + "loss": 0.0124, + "step": 24175 + }, + { + "epoch": 13.04, + "learning_rate": 4.002531645569621e-06, + "loss": 0.0152, + "step": 24200 + }, + { + "epoch": 13.04, + "learning_rate": 3.99620253164557e-06, + "loss": 0.0127, + "step": 24225 + }, + { + "epoch": 13.04, + "learning_rate": 3.989873417721519e-06, + "loss": 0.0066, + "step": 24250 + }, + { + "epoch": 13.04, + "learning_rate": 3.983544303797468e-06, + "loss": 0.0163, + "step": 24275 + }, + { + "epoch": 13.04, + "learning_rate": 3.977215189873418e-06, + "loss": 0.0095, + "step": 24300 + }, + { + "epoch": 13.04, + "learning_rate": 3.970886075949367e-06, + "loss": 0.0103, + "step": 24325 + }, + { + "epoch": 13.04, + "learning_rate": 3.964556962025317e-06, + "loss": 0.0112, + "step": 24350 + }, + { + "epoch": 13.04, + "learning_rate": 3.958227848101266e-06, + "loss": 0.01, + "step": 24375 + }, + { + "epoch": 13.04, + "learning_rate": 3.9518987341772155e-06, + "loss": 0.0131, + "step": 24400 + }, + { + "epoch": 13.04, + "learning_rate": 3.945569620253165e-06, + "loss": 0.0143, + "step": 24425 + }, + { + "epoch": 13.04, + "learning_rate": 3.939240506329114e-06, + "loss": 0.0151, + "step": 24450 + }, + { + "epoch": 13.04, + "learning_rate": 3.932911392405064e-06, + "loss": 0.0129, + "step": 24475 + }, + { + "epoch": 13.04, + "learning_rate": 3.926582278481013e-06, + "loss": 0.0125, + "step": 24500 + }, + { + "epoch": 14.0, + "learning_rate": 3.920506329113925e-06, + "loss": 0.0165, + "step": 24525 + }, + { + "epoch": 14.0, + "learning_rate": 3.914177215189874e-06, + "loss": 0.0114, + "step": 24550 + }, + { + "epoch": 14.0, + "learning_rate": 3.9078481012658235e-06, + "loss": 0.0111, + "step": 24575 + }, + { + "epoch": 14.0, + "learning_rate": 3.901518987341772e-06, + "loss": 0.0134, + "step": 24600 + }, + { + "epoch": 14.0, + "learning_rate": 3.8951898734177215e-06, + "loss": 0.0138, + "step": 24625 + }, + { + "epoch": 14.0, + "learning_rate": 3.888860759493671e-06, + "loss": 0.0151, + "step": 24650 + }, + { + "epoch": 14.0, + "learning_rate": 3.88253164556962e-06, + "loss": 0.0101, + "step": 24675 + }, + { + "epoch": 14.0, + "learning_rate": 3.87620253164557e-06, + "loss": 0.0108, + "step": 24700 + }, + { + "epoch": 14.01, + "learning_rate": 3.869873417721519e-06, + "loss": 0.0151, + "step": 24725 + }, + { + "epoch": 14.01, + "learning_rate": 3.863544303797469e-06, + "loss": 0.0132, + "step": 24750 + }, + { + "epoch": 14.01, + "learning_rate": 3.857215189873418e-06, + "loss": 0.012, + "step": 24775 + }, + { + "epoch": 14.01, + "learning_rate": 3.8508860759493675e-06, + "loss": 0.017, + "step": 24800 + }, + { + "epoch": 14.01, + "learning_rate": 3.844556962025317e-06, + "loss": 0.0141, + "step": 24825 + }, + { + "epoch": 14.01, + "learning_rate": 3.838227848101266e-06, + "loss": 0.0166, + "step": 24850 + }, + { + "epoch": 14.01, + "learning_rate": 3.831898734177216e-06, + "loss": 0.0112, + "step": 24875 + }, + { + "epoch": 14.01, + "learning_rate": 3.825569620253165e-06, + "loss": 0.0097, + "step": 24900 + }, + { + "epoch": 14.01, + "learning_rate": 3.819240506329115e-06, + "loss": 0.0138, + "step": 24925 + }, + { + "epoch": 14.01, + "learning_rate": 3.8129113924050636e-06, + "loss": 0.0069, + "step": 24950 + }, + { + "epoch": 14.01, + "learning_rate": 3.806582278481013e-06, + "loss": 0.0111, + "step": 24975 + }, + { + "epoch": 14.01, + "learning_rate": 3.8002531645569625e-06, + "loss": 0.0149, + "step": 25000 + }, + { + "epoch": 14.01, + "eval_loss": 0.2640444040298462, + "eval_runtime": 1478.4977, + "eval_samples_per_second": 7.042, + "eval_steps_per_second": 0.44, + "eval_wer": 24.063805915543345, + "step": 25000 + }, + { + "epoch": 14.01, + "learning_rate": 3.7939240506329115e-06, + "loss": 0.0176, + "step": 25025 + }, + { + "epoch": 14.01, + "learning_rate": 3.787594936708861e-06, + "loss": 0.0104, + "step": 25050 + }, + { + "epoch": 14.01, + "learning_rate": 3.7812658227848103e-06, + "loss": 0.0084, + "step": 25075 + }, + { + "epoch": 14.01, + "learning_rate": 3.7749367088607598e-06, + "loss": 0.0118, + "step": 25100 + }, + { + "epoch": 14.02, + "learning_rate": 3.768607594936709e-06, + "loss": 0.0099, + "step": 25125 + }, + { + "epoch": 14.02, + "learning_rate": 3.762278481012658e-06, + "loss": 0.0089, + "step": 25150 + }, + { + "epoch": 14.02, + "learning_rate": 3.7559493670886076e-06, + "loss": 0.0119, + "step": 25175 + }, + { + "epoch": 14.02, + "learning_rate": 3.749620253164557e-06, + "loss": 0.0118, + "step": 25200 + }, + { + "epoch": 14.02, + "learning_rate": 3.743291139240507e-06, + "loss": 0.0108, + "step": 25225 + }, + { + "epoch": 14.02, + "learning_rate": 3.7369620253164563e-06, + "loss": 0.0101, + "step": 25250 + }, + { + "epoch": 14.02, + "learning_rate": 3.7306329113924058e-06, + "loss": 0.0095, + "step": 25275 + }, + { + "epoch": 14.02, + "learning_rate": 3.7243037974683548e-06, + "loss": 0.0106, + "step": 25300 + }, + { + "epoch": 14.02, + "learning_rate": 3.717974683544304e-06, + "loss": 0.0135, + "step": 25325 + }, + { + "epoch": 14.02, + "learning_rate": 3.7116455696202536e-06, + "loss": 0.0115, + "step": 25350 + }, + { + "epoch": 14.02, + "learning_rate": 3.705316455696203e-06, + "loss": 0.0114, + "step": 25375 + }, + { + "epoch": 14.02, + "learning_rate": 3.698987341772152e-06, + "loss": 0.0119, + "step": 25400 + }, + { + "epoch": 14.02, + "learning_rate": 3.6926582278481015e-06, + "loss": 0.0123, + "step": 25425 + }, + { + "epoch": 14.02, + "learning_rate": 3.686329113924051e-06, + "loss": 0.0138, + "step": 25450 + }, + { + "epoch": 14.02, + "learning_rate": 3.6800000000000003e-06, + "loss": 0.0133, + "step": 25475 + }, + { + "epoch": 14.02, + "learning_rate": 3.6736708860759498e-06, + "loss": 0.0106, + "step": 25500 + }, + { + "epoch": 14.03, + "learning_rate": 3.6673417721518988e-06, + "loss": 0.0099, + "step": 25525 + }, + { + "epoch": 14.03, + "learning_rate": 3.661012658227848e-06, + "loss": 0.0104, + "step": 25550 + }, + { + "epoch": 14.03, + "learning_rate": 3.6546835443037976e-06, + "loss": 0.0143, + "step": 25575 + }, + { + "epoch": 14.03, + "learning_rate": 3.648354430379747e-06, + "loss": 0.0096, + "step": 25600 + }, + { + "epoch": 14.03, + "learning_rate": 3.642025316455696e-06, + "loss": 0.0094, + "step": 25625 + }, + { + "epoch": 14.03, + "learning_rate": 3.6356962025316463e-06, + "loss": 0.0162, + "step": 25650 + }, + { + "epoch": 14.03, + "learning_rate": 3.6293670886075953e-06, + "loss": 0.008, + "step": 25675 + }, + { + "epoch": 14.03, + "learning_rate": 3.6230379746835448e-06, + "loss": 0.0094, + "step": 25700 + }, + { + "epoch": 14.03, + "learning_rate": 3.616708860759494e-06, + "loss": 0.0122, + "step": 25725 + }, + { + "epoch": 14.03, + "learning_rate": 3.6103797468354436e-06, + "loss": 0.0128, + "step": 25750 + }, + { + "epoch": 14.03, + "learning_rate": 3.6040506329113926e-06, + "loss": 0.0117, + "step": 25775 + }, + { + "epoch": 14.03, + "learning_rate": 3.597721518987342e-06, + "loss": 0.0067, + "step": 25800 + }, + { + "epoch": 14.03, + "learning_rate": 3.5913924050632915e-06, + "loss": 0.0078, + "step": 25825 + }, + { + "epoch": 14.03, + "learning_rate": 3.585063291139241e-06, + "loss": 0.0071, + "step": 25850 + }, + { + "epoch": 14.03, + "learning_rate": 3.57873417721519e-06, + "loss": 0.0093, + "step": 25875 + }, + { + "epoch": 14.03, + "learning_rate": 3.5724050632911393e-06, + "loss": 0.0064, + "step": 25900 + }, + { + "epoch": 14.04, + "learning_rate": 3.5660759493670888e-06, + "loss": 0.0067, + "step": 25925 + }, + { + "epoch": 14.04, + "learning_rate": 3.559746835443038e-06, + "loss": 0.0076, + "step": 25950 + }, + { + "epoch": 14.04, + "learning_rate": 3.5534177215189876e-06, + "loss": 0.0099, + "step": 25975 + }, + { + "epoch": 14.04, + "learning_rate": 3.5470886075949366e-06, + "loss": 0.0111, + "step": 26000 + }, + { + "epoch": 14.04, + "eval_loss": 0.2734878659248352, + "eval_runtime": 1456.7487, + "eval_samples_per_second": 7.147, + "eval_steps_per_second": 0.447, + "eval_wer": 24.359745686921737, + "step": 26000 + }, + { + "epoch": 14.04, + "learning_rate": 3.540759493670886e-06, + "loss": 0.0134, + "step": 26025 + }, + { + "epoch": 14.04, + "learning_rate": 3.534430379746836e-06, + "loss": 0.008, + "step": 26050 + }, + { + "epoch": 14.04, + "learning_rate": 3.5281012658227853e-06, + "loss": 0.0138, + "step": 26075 + }, + { + "epoch": 14.04, + "learning_rate": 3.5217721518987348e-06, + "loss": 0.0075, + "step": 26100 + }, + { + "epoch": 14.04, + "learning_rate": 3.515443037974684e-06, + "loss": 0.0097, + "step": 26125 + }, + { + "epoch": 14.04, + "learning_rate": 3.509113924050633e-06, + "loss": 0.0139, + "step": 26150 + }, + { + "epoch": 14.04, + "learning_rate": 3.5027848101265826e-06, + "loss": 0.0099, + "step": 26175 + }, + { + "epoch": 14.04, + "learning_rate": 3.496455696202532e-06, + "loss": 0.0148, + "step": 26200 + }, + { + "epoch": 14.04, + "learning_rate": 3.4901265822784815e-06, + "loss": 0.0119, + "step": 26225 + }, + { + "epoch": 14.04, + "learning_rate": 3.4837974683544305e-06, + "loss": 0.0086, + "step": 26250 + }, + { + "epoch": 15.0, + "learning_rate": 3.47746835443038e-06, + "loss": 0.0091, + "step": 26275 + }, + { + "epoch": 15.0, + "learning_rate": 3.4711392405063293e-06, + "loss": 0.0105, + "step": 26300 + }, + { + "epoch": 15.0, + "learning_rate": 3.465063291139241e-06, + "loss": 0.0138, + "step": 26325 + }, + { + "epoch": 15.0, + "learning_rate": 3.45873417721519e-06, + "loss": 0.0185, + "step": 26350 + }, + { + "epoch": 15.0, + "learning_rate": 3.4524050632911392e-06, + "loss": 0.015, + "step": 26375 + }, + { + "epoch": 15.0, + "learning_rate": 3.4460759493670886e-06, + "loss": 0.0177, + "step": 26400 + }, + { + "epoch": 15.0, + "learning_rate": 3.439746835443038e-06, + "loss": 0.014, + "step": 26425 + }, + { + "epoch": 15.0, + "learning_rate": 3.433417721518988e-06, + "loss": 0.0127, + "step": 26450 + }, + { + "epoch": 15.01, + "learning_rate": 3.4270886075949374e-06, + "loss": 0.0135, + "step": 26475 + }, + { + "epoch": 15.01, + "learning_rate": 3.4207594936708864e-06, + "loss": 0.0171, + "step": 26500 + }, + { + "epoch": 15.01, + "learning_rate": 3.4144303797468358e-06, + "loss": 0.0114, + "step": 26525 + }, + { + "epoch": 15.01, + "learning_rate": 3.4081012658227852e-06, + "loss": 0.0099, + "step": 26550 + }, + { + "epoch": 15.01, + "learning_rate": 3.4017721518987346e-06, + "loss": 0.0126, + "step": 26575 + }, + { + "epoch": 15.01, + "learning_rate": 3.395443037974684e-06, + "loss": 0.0116, + "step": 26600 + }, + { + "epoch": 15.01, + "learning_rate": 3.389113924050633e-06, + "loss": 0.0087, + "step": 26625 + }, + { + "epoch": 15.01, + "learning_rate": 3.3827848101265825e-06, + "loss": 0.0073, + "step": 26650 + }, + { + "epoch": 15.01, + "learning_rate": 3.376455696202532e-06, + "loss": 0.0102, + "step": 26675 + }, + { + "epoch": 15.01, + "learning_rate": 3.3701265822784814e-06, + "loss": 0.0106, + "step": 26700 + }, + { + "epoch": 15.01, + "learning_rate": 3.3637974683544304e-06, + "loss": 0.0094, + "step": 26725 + }, + { + "epoch": 15.01, + "learning_rate": 3.35746835443038e-06, + "loss": 0.0113, + "step": 26750 + }, + { + "epoch": 15.01, + "learning_rate": 3.3511392405063292e-06, + "loss": 0.0169, + "step": 26775 + }, + { + "epoch": 15.01, + "learning_rate": 3.3448101265822786e-06, + "loss": 0.0084, + "step": 26800 + }, + { + "epoch": 15.01, + "learning_rate": 3.338481012658228e-06, + "loss": 0.0153, + "step": 26825 + }, + { + "epoch": 15.01, + "learning_rate": 3.332151898734177e-06, + "loss": 0.012, + "step": 26850 + }, + { + "epoch": 15.02, + "learning_rate": 3.325822784810127e-06, + "loss": 0.0106, + "step": 26875 + }, + { + "epoch": 15.02, + "learning_rate": 3.3194936708860764e-06, + "loss": 0.0099, + "step": 26900 + }, + { + "epoch": 15.02, + "learning_rate": 3.3131645569620258e-06, + "loss": 0.008, + "step": 26925 + }, + { + "epoch": 15.02, + "learning_rate": 3.306835443037975e-06, + "loss": 0.0076, + "step": 26950 + }, + { + "epoch": 15.02, + "learning_rate": 3.3005063291139246e-06, + "loss": 0.0088, + "step": 26975 + }, + { + "epoch": 15.02, + "learning_rate": 3.2941772151898736e-06, + "loss": 0.009, + "step": 27000 + }, + { + "epoch": 15.02, + "eval_loss": 0.2693029046058655, + "eval_runtime": 1479.1374, + "eval_samples_per_second": 7.039, + "eval_steps_per_second": 0.44, + "eval_wer": 24.28819980812696, + "step": 27000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.24625605128192e+20, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-27000/training_args.bin b/checkpoint-27000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-27000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-28000/config.json b/checkpoint-28000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-28000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-28000/generation_config.json b/checkpoint-28000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-28000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-28000/optimizer.pt b/checkpoint-28000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6488fdf35aef93cdd4291ab5f71bf3902dee1c47 --- /dev/null +++ b/checkpoint-28000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f009d02ae1da0753699c2363d8a54ea5fc89748c630f9ecd13910cc733860d8 +size 1934161093 diff --git a/checkpoint-28000/preprocessor_config.json b/checkpoint-28000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-28000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-28000/pytorch_model.bin b/checkpoint-28000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..95c7d05362ac69547b263c1b87f7266ae101a1d3 --- /dev/null +++ b/checkpoint-28000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef9fe5e1b89801faa745e60e8867e72c3252ba6e90878c94e7fee6c89635d424 +size 967102601 diff --git a/checkpoint-28000/rng_state.pth b/checkpoint-28000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c6a4315d5702042595188e76f5ed265356d06a58 --- /dev/null +++ b/checkpoint-28000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f1bdd1c4aff35549677d776ea1d8f6abffbc473837694db96b6f7c321f3f695 +size 14575 diff --git a/checkpoint-28000/scaler.pt b/checkpoint-28000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..447212a7f42fee78420d5611d13275de94653da8 --- /dev/null +++ b/checkpoint-28000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dc203cc6aed801a50b90014f7a2d8a23f2422319a3b1bec0f8d6dd671ff269b +size 557 diff --git a/checkpoint-28000/scheduler.pt b/checkpoint-28000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a3dafd888992e32b3dee92235897996b46780fc7 --- /dev/null +++ b/checkpoint-28000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a68813fa148a3d11ff25e3fa866a4dd110dfa5d728e21c7ea8311828ec5fd1e3 +size 627 diff --git a/checkpoint-28000/trainer_state.json b/checkpoint-28000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..610a082f5fda60dae9ab5bc92509ca6dd3da0805 --- /dev/null +++ b/checkpoint-28000/trainer_state.json @@ -0,0 +1,6988 @@ +{ + "best_metric": 23.982503780549276, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-23000", + "epoch": 15.043375, + "global_step": 28000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + }, + { + "epoch": 7.02, + "learning_rate": 6.830632911392405e-06, + "loss": 0.0159, + "step": 13025 + }, + { + "epoch": 7.02, + "learning_rate": 6.824303797468354e-06, + "loss": 0.0186, + "step": 13050 + }, + { + "epoch": 7.02, + "learning_rate": 6.817974683544304e-06, + "loss": 0.0246, + "step": 13075 + }, + { + "epoch": 7.02, + "learning_rate": 6.811645569620253e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 7.02, + "learning_rate": 6.805316455696203e-06, + "loss": 0.0287, + "step": 13125 + }, + { + "epoch": 7.02, + "learning_rate": 6.798987341772152e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 7.02, + "learning_rate": 6.792658227848102e-06, + "loss": 0.035, + "step": 13175 + }, + { + "epoch": 7.02, + "learning_rate": 6.786329113924052e-06, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 7.02, + "learning_rate": 6.780000000000001e-06, + "loss": 0.0283, + "step": 13225 + }, + { + "epoch": 7.02, + "learning_rate": 6.773670886075951e-06, + "loss": 0.0203, + "step": 13250 + }, + { + "epoch": 7.03, + "learning_rate": 6.7673417721519e-06, + "loss": 0.0193, + "step": 13275 + }, + { + "epoch": 7.03, + "learning_rate": 6.7610126582278494e-06, + "loss": 0.0174, + "step": 13300 + }, + { + "epoch": 7.03, + "learning_rate": 6.754683544303798e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 7.03, + "learning_rate": 6.7483544303797474e-06, + "loss": 0.023, + "step": 13350 + }, + { + "epoch": 7.03, + "learning_rate": 6.742025316455697e-06, + "loss": 0.0266, + "step": 13375 + }, + { + "epoch": 7.03, + "learning_rate": 6.735696202531646e-06, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 7.03, + "learning_rate": 6.729367088607596e-06, + "loss": 0.0164, + "step": 13425 + }, + { + "epoch": 7.03, + "learning_rate": 6.723037974683545e-06, + "loss": 0.019, + "step": 13450 + }, + { + "epoch": 7.03, + "learning_rate": 6.716708860759495e-06, + "loss": 0.0209, + "step": 13475 + }, + { + "epoch": 7.03, + "learning_rate": 6.710379746835444e-06, + "loss": 0.0241, + "step": 13500 + }, + { + "epoch": 7.03, + "learning_rate": 6.7040506329113934e-06, + "loss": 0.0233, + "step": 13525 + }, + { + "epoch": 7.03, + "learning_rate": 6.697721518987342e-06, + "loss": 0.0204, + "step": 13550 + }, + { + "epoch": 7.03, + "learning_rate": 6.6913924050632914e-06, + "loss": 0.0131, + "step": 13575 + }, + { + "epoch": 7.03, + "learning_rate": 6.685063291139241e-06, + "loss": 0.0268, + "step": 13600 + }, + { + "epoch": 7.03, + "learning_rate": 6.67873417721519e-06, + "loss": 0.0156, + "step": 13625 + }, + { + "epoch": 7.03, + "learning_rate": 6.67240506329114e-06, + "loss": 0.0205, + "step": 13650 + }, + { + "epoch": 7.04, + "learning_rate": 6.666075949367089e-06, + "loss": 0.0217, + "step": 13675 + }, + { + "epoch": 7.04, + "learning_rate": 6.659746835443039e-06, + "loss": 0.0258, + "step": 13700 + }, + { + "epoch": 7.04, + "learning_rate": 6.653417721518988e-06, + "loss": 0.0211, + "step": 13725 + }, + { + "epoch": 7.04, + "learning_rate": 6.647088607594937e-06, + "loss": 0.0188, + "step": 13750 + }, + { + "epoch": 7.04, + "learning_rate": 6.640759493670886e-06, + "loss": 0.0234, + "step": 13775 + }, + { + "epoch": 7.04, + "learning_rate": 6.6344303797468355e-06, + "loss": 0.0202, + "step": 13800 + }, + { + "epoch": 7.04, + "learning_rate": 6.628101265822785e-06, + "loss": 0.0251, + "step": 13825 + }, + { + "epoch": 7.04, + "learning_rate": 6.621772151898734e-06, + "loss": 0.017, + "step": 13850 + }, + { + "epoch": 7.04, + "learning_rate": 6.615443037974684e-06, + "loss": 0.0254, + "step": 13875 + }, + { + "epoch": 7.04, + "learning_rate": 6.609113924050633e-06, + "loss": 0.0289, + "step": 13900 + }, + { + "epoch": 7.04, + "learning_rate": 6.602784810126583e-06, + "loss": 0.0238, + "step": 13925 + }, + { + "epoch": 7.04, + "learning_rate": 6.596455696202532e-06, + "loss": 0.0218, + "step": 13950 + }, + { + "epoch": 7.04, + "learning_rate": 6.590126582278481e-06, + "loss": 0.0289, + "step": 13975 + }, + { + "epoch": 7.04, + "learning_rate": 6.583797468354432e-06, + "loss": 0.0271, + "step": 14000 + }, + { + "epoch": 7.04, + "eval_loss": 0.26684999465942383, + "eval_runtime": 1448.0228, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "eval_wer": 42.464105107400115, + "step": 14000 + }, + { + "epoch": 8.0, + "learning_rate": 6.577468354430381e-06, + "loss": 0.0292, + "step": 14025 + }, + { + "epoch": 8.0, + "learning_rate": 6.57113924050633e-06, + "loss": 0.0192, + "step": 14050 + }, + { + "epoch": 8.0, + "learning_rate": 6.564810126582279e-06, + "loss": 0.027, + "step": 14075 + }, + { + "epoch": 8.0, + "learning_rate": 6.558481012658229e-06, + "loss": 0.03, + "step": 14100 + }, + { + "epoch": 8.0, + "learning_rate": 6.552151898734178e-06, + "loss": 0.0375, + "step": 14125 + }, + { + "epoch": 8.0, + "learning_rate": 6.5458227848101274e-06, + "loss": 0.0338, + "step": 14150 + }, + { + "epoch": 8.0, + "learning_rate": 6.539493670886077e-06, + "loss": 0.0305, + "step": 14175 + }, + { + "epoch": 8.0, + "learning_rate": 6.533164556962026e-06, + "loss": 0.0266, + "step": 14200 + }, + { + "epoch": 8.01, + "learning_rate": 6.526835443037976e-06, + "loss": 0.0378, + "step": 14225 + }, + { + "epoch": 8.01, + "learning_rate": 6.520506329113925e-06, + "loss": 0.0256, + "step": 14250 + }, + { + "epoch": 8.01, + "learning_rate": 6.514177215189874e-06, + "loss": 0.0203, + "step": 14275 + }, + { + "epoch": 8.01, + "learning_rate": 6.507848101265823e-06, + "loss": 0.0212, + "step": 14300 + }, + { + "epoch": 8.01, + "learning_rate": 6.501518987341773e-06, + "loss": 0.0204, + "step": 14325 + }, + { + "epoch": 8.01, + "learning_rate": 6.495189873417722e-06, + "loss": 0.0259, + "step": 14350 + }, + { + "epoch": 8.01, + "learning_rate": 6.4888607594936714e-06, + "loss": 0.0214, + "step": 14375 + }, + { + "epoch": 8.01, + "learning_rate": 6.482531645569621e-06, + "loss": 0.021, + "step": 14400 + }, + { + "epoch": 8.01, + "learning_rate": 6.47620253164557e-06, + "loss": 0.0241, + "step": 14425 + }, + { + "epoch": 8.01, + "learning_rate": 6.46987341772152e-06, + "loss": 0.0216, + "step": 14450 + }, + { + "epoch": 8.01, + "learning_rate": 6.463544303797469e-06, + "loss": 0.0209, + "step": 14475 + }, + { + "epoch": 8.01, + "learning_rate": 6.457215189873418e-06, + "loss": 0.0292, + "step": 14500 + }, + { + "epoch": 8.01, + "learning_rate": 6.450886075949367e-06, + "loss": 0.0286, + "step": 14525 + }, + { + "epoch": 8.01, + "learning_rate": 6.444556962025317e-06, + "loss": 0.0218, + "step": 14550 + }, + { + "epoch": 8.01, + "learning_rate": 6.438227848101266e-06, + "loss": 0.0209, + "step": 14575 + }, + { + "epoch": 8.01, + "learning_rate": 6.4318987341772154e-06, + "loss": 0.0244, + "step": 14600 + }, + { + "epoch": 8.02, + "learning_rate": 6.425569620253165e-06, + "loss": 0.0212, + "step": 14625 + }, + { + "epoch": 8.02, + "learning_rate": 6.419240506329114e-06, + "loss": 0.0155, + "step": 14650 + }, + { + "epoch": 8.02, + "learning_rate": 6.412911392405064e-06, + "loss": 0.0209, + "step": 14675 + }, + { + "epoch": 8.02, + "learning_rate": 6.406582278481013e-06, + "loss": 0.0203, + "step": 14700 + }, + { + "epoch": 8.02, + "learning_rate": 6.400253164556962e-06, + "loss": 0.0192, + "step": 14725 + }, + { + "epoch": 8.02, + "learning_rate": 6.393924050632911e-06, + "loss": 0.0216, + "step": 14750 + }, + { + "epoch": 8.02, + "learning_rate": 6.387594936708861e-06, + "loss": 0.012, + "step": 14775 + }, + { + "epoch": 8.02, + "learning_rate": 6.38126582278481e-06, + "loss": 0.0241, + "step": 14800 + }, + { + "epoch": 8.02, + "learning_rate": 6.37493670886076e-06, + "loss": 0.0192, + "step": 14825 + }, + { + "epoch": 8.02, + "learning_rate": 6.36860759493671e-06, + "loss": 0.0202, + "step": 14850 + }, + { + "epoch": 8.02, + "learning_rate": 6.362278481012659e-06, + "loss": 0.0257, + "step": 14875 + }, + { + "epoch": 8.02, + "learning_rate": 6.3559493670886086e-06, + "loss": 0.0218, + "step": 14900 + }, + { + "epoch": 8.02, + "learning_rate": 6.349620253164558e-06, + "loss": 0.0232, + "step": 14925 + }, + { + "epoch": 8.02, + "learning_rate": 6.3432911392405074e-06, + "loss": 0.03, + "step": 14950 + }, + { + "epoch": 8.02, + "learning_rate": 6.337215189873418e-06, + "loss": 0.0252, + "step": 14975 + }, + { + "epoch": 8.02, + "learning_rate": 6.3308860759493675e-06, + "loss": 0.0176, + "step": 15000 + }, + { + "epoch": 8.02, + "eval_loss": 0.2596448063850403, + "eval_runtime": 1417.6481, + "eval_samples_per_second": 7.344, + "eval_steps_per_second": 0.459, + "eval_wer": 28.52241499861786, + "step": 15000 + }, + { + "epoch": 8.03, + "learning_rate": 6.324556962025317e-06, + "loss": 0.0166, + "step": 15025 + }, + { + "epoch": 8.03, + "learning_rate": 6.318227848101266e-06, + "loss": 0.017, + "step": 15050 + }, + { + "epoch": 8.03, + "learning_rate": 6.311898734177216e-06, + "loss": 0.0291, + "step": 15075 + }, + { + "epoch": 8.03, + "learning_rate": 6.305569620253164e-06, + "loss": 0.0149, + "step": 15100 + }, + { + "epoch": 8.03, + "learning_rate": 6.299240506329114e-06, + "loss": 0.0223, + "step": 15125 + }, + { + "epoch": 8.03, + "learning_rate": 6.292911392405063e-06, + "loss": 0.0212, + "step": 15150 + }, + { + "epoch": 8.03, + "learning_rate": 6.286582278481013e-06, + "loss": 0.0125, + "step": 15175 + }, + { + "epoch": 8.03, + "learning_rate": 6.280253164556962e-06, + "loss": 0.0169, + "step": 15200 + }, + { + "epoch": 8.03, + "learning_rate": 6.273924050632912e-06, + "loss": 0.0244, + "step": 15225 + }, + { + "epoch": 8.03, + "learning_rate": 6.267594936708862e-06, + "loss": 0.015, + "step": 15250 + }, + { + "epoch": 8.03, + "learning_rate": 6.261265822784811e-06, + "loss": 0.0171, + "step": 15275 + }, + { + "epoch": 8.03, + "learning_rate": 6.254936708860761e-06, + "loss": 0.017, + "step": 15300 + }, + { + "epoch": 8.03, + "learning_rate": 6.24860759493671e-06, + "loss": 0.0162, + "step": 15325 + }, + { + "epoch": 8.03, + "learning_rate": 6.2422784810126594e-06, + "loss": 0.0133, + "step": 15350 + }, + { + "epoch": 8.03, + "learning_rate": 6.235949367088608e-06, + "loss": 0.0139, + "step": 15375 + }, + { + "epoch": 8.03, + "learning_rate": 6.2296202531645575e-06, + "loss": 0.0177, + "step": 15400 + }, + { + "epoch": 8.04, + "learning_rate": 6.223291139240507e-06, + "loss": 0.0207, + "step": 15425 + }, + { + "epoch": 8.04, + "learning_rate": 6.216962025316456e-06, + "loss": 0.0192, + "step": 15450 + }, + { + "epoch": 8.04, + "learning_rate": 6.210632911392406e-06, + "loss": 0.0176, + "step": 15475 + }, + { + "epoch": 8.04, + "learning_rate": 6.204303797468355e-06, + "loss": 0.0133, + "step": 15500 + }, + { + "epoch": 8.04, + "learning_rate": 6.197974683544305e-06, + "loss": 0.0199, + "step": 15525 + }, + { + "epoch": 8.04, + "learning_rate": 6.191645569620254e-06, + "loss": 0.0135, + "step": 15550 + }, + { + "epoch": 8.04, + "learning_rate": 6.1853164556962035e-06, + "loss": 0.0121, + "step": 15575 + }, + { + "epoch": 8.04, + "learning_rate": 6.178987341772152e-06, + "loss": 0.0152, + "step": 15600 + }, + { + "epoch": 8.04, + "learning_rate": 6.1726582278481015e-06, + "loss": 0.0201, + "step": 15625 + }, + { + "epoch": 8.04, + "learning_rate": 6.166329113924051e-06, + "loss": 0.0226, + "step": 15650 + }, + { + "epoch": 8.04, + "learning_rate": 6.16e-06, + "loss": 0.0222, + "step": 15675 + }, + { + "epoch": 8.04, + "learning_rate": 6.15367088607595e-06, + "loss": 0.0184, + "step": 15700 + }, + { + "epoch": 8.04, + "learning_rate": 6.147341772151899e-06, + "loss": 0.0219, + "step": 15725 + }, + { + "epoch": 8.04, + "learning_rate": 6.141012658227849e-06, + "loss": 0.0219, + "step": 15750 + }, + { + "epoch": 9.0, + "learning_rate": 6.134683544303798e-06, + "loss": 0.0208, + "step": 15775 + }, + { + "epoch": 9.0, + "learning_rate": 6.1283544303797475e-06, + "loss": 0.0143, + "step": 15800 + }, + { + "epoch": 9.0, + "learning_rate": 6.122025316455696e-06, + "loss": 0.0254, + "step": 15825 + }, + { + "epoch": 9.0, + "learning_rate": 6.1156962025316455e-06, + "loss": 0.0287, + "step": 15850 + }, + { + "epoch": 9.0, + "learning_rate": 6.109367088607595e-06, + "loss": 0.0252, + "step": 15875 + }, + { + "epoch": 9.0, + "learning_rate": 6.103037974683544e-06, + "loss": 0.0275, + "step": 15900 + }, + { + "epoch": 9.0, + "learning_rate": 6.096708860759494e-06, + "loss": 0.0193, + "step": 15925 + }, + { + "epoch": 9.0, + "learning_rate": 6.090379746835443e-06, + "loss": 0.0255, + "step": 15950 + }, + { + "epoch": 9.01, + "learning_rate": 6.084050632911393e-06, + "loss": 0.0248, + "step": 15975 + }, + { + "epoch": 9.01, + "learning_rate": 6.077721518987342e-06, + "loss": 0.0328, + "step": 16000 + }, + { + "epoch": 9.01, + "eval_loss": 0.2512108087539673, + "eval_runtime": 1474.2883, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.442, + "eval_wer": 35.66236849379665, + "step": 16000 + }, + { + "epoch": 9.01, + "learning_rate": 6.0713924050632915e-06, + "loss": 0.0181, + "step": 16025 + }, + { + "epoch": 9.01, + "learning_rate": 6.065063291139242e-06, + "loss": 0.0134, + "step": 16050 + }, + { + "epoch": 9.01, + "learning_rate": 6.058734177215191e-06, + "loss": 0.0203, + "step": 16075 + }, + { + "epoch": 9.01, + "learning_rate": 6.052405063291141e-06, + "loss": 0.0224, + "step": 16100 + }, + { + "epoch": 9.01, + "learning_rate": 6.046075949367089e-06, + "loss": 0.0164, + "step": 16125 + }, + { + "epoch": 9.01, + "learning_rate": 6.039746835443039e-06, + "loss": 0.019, + "step": 16150 + }, + { + "epoch": 9.01, + "learning_rate": 6.033417721518988e-06, + "loss": 0.0152, + "step": 16175 + }, + { + "epoch": 9.01, + "learning_rate": 6.0270886075949374e-06, + "loss": 0.0197, + "step": 16200 + }, + { + "epoch": 9.01, + "learning_rate": 6.020759493670887e-06, + "loss": 0.0188, + "step": 16225 + }, + { + "epoch": 9.01, + "learning_rate": 6.014430379746836e-06, + "loss": 0.0245, + "step": 16250 + }, + { + "epoch": 9.01, + "learning_rate": 6.008101265822786e-06, + "loss": 0.0202, + "step": 16275 + }, + { + "epoch": 9.01, + "learning_rate": 6.001772151898735e-06, + "loss": 0.0184, + "step": 16300 + }, + { + "epoch": 9.01, + "learning_rate": 5.995443037974685e-06, + "loss": 0.0158, + "step": 16325 + }, + { + "epoch": 9.01, + "learning_rate": 5.989113924050633e-06, + "loss": 0.0175, + "step": 16350 + }, + { + "epoch": 9.02, + "learning_rate": 5.982784810126583e-06, + "loss": 0.014, + "step": 16375 + }, + { + "epoch": 9.02, + "learning_rate": 5.976455696202532e-06, + "loss": 0.0149, + "step": 16400 + }, + { + "epoch": 9.02, + "learning_rate": 5.9701265822784815e-06, + "loss": 0.0188, + "step": 16425 + }, + { + "epoch": 9.02, + "learning_rate": 5.963797468354431e-06, + "loss": 0.0215, + "step": 16450 + }, + { + "epoch": 9.02, + "learning_rate": 5.95746835443038e-06, + "loss": 0.017, + "step": 16475 + }, + { + "epoch": 9.02, + "learning_rate": 5.95113924050633e-06, + "loss": 0.0095, + "step": 16500 + }, + { + "epoch": 9.02, + "learning_rate": 5.944810126582279e-06, + "loss": 0.0112, + "step": 16525 + }, + { + "epoch": 9.02, + "learning_rate": 5.938481012658229e-06, + "loss": 0.0198, + "step": 16550 + }, + { + "epoch": 9.02, + "learning_rate": 5.932151898734177e-06, + "loss": 0.0252, + "step": 16575 + }, + { + "epoch": 9.02, + "learning_rate": 5.925822784810127e-06, + "loss": 0.0226, + "step": 16600 + }, + { + "epoch": 9.02, + "learning_rate": 5.919493670886076e-06, + "loss": 0.0167, + "step": 16625 + }, + { + "epoch": 9.02, + "learning_rate": 5.9131645569620255e-06, + "loss": 0.0259, + "step": 16650 + }, + { + "epoch": 9.02, + "learning_rate": 5.906835443037975e-06, + "loss": 0.0229, + "step": 16675 + }, + { + "epoch": 9.02, + "learning_rate": 5.900506329113924e-06, + "loss": 0.0238, + "step": 16700 + }, + { + "epoch": 9.02, + "learning_rate": 5.894177215189874e-06, + "loss": 0.02, + "step": 16725 + }, + { + "epoch": 9.02, + "learning_rate": 5.887848101265823e-06, + "loss": 0.0182, + "step": 16750 + }, + { + "epoch": 9.03, + "learning_rate": 5.881518987341773e-06, + "loss": 0.0157, + "step": 16775 + }, + { + "epoch": 9.03, + "learning_rate": 5.875189873417721e-06, + "loss": 0.0145, + "step": 16800 + }, + { + "epoch": 9.03, + "learning_rate": 5.868860759493671e-06, + "loss": 0.0166, + "step": 16825 + }, + { + "epoch": 9.03, + "learning_rate": 5.86253164556962e-06, + "loss": 0.0108, + "step": 16850 + }, + { + "epoch": 9.03, + "learning_rate": 5.85620253164557e-06, + "loss": 0.014, + "step": 16875 + }, + { + "epoch": 9.03, + "learning_rate": 5.84987341772152e-06, + "loss": 0.0167, + "step": 16900 + }, + { + "epoch": 9.03, + "learning_rate": 5.843544303797469e-06, + "loss": 0.0121, + "step": 16925 + }, + { + "epoch": 9.03, + "learning_rate": 5.837215189873419e-06, + "loss": 0.0149, + "step": 16950 + }, + { + "epoch": 9.03, + "learning_rate": 5.831139240506329e-06, + "loss": 0.0143, + "step": 16975 + }, + { + "epoch": 9.03, + "learning_rate": 5.824810126582279e-06, + "loss": 0.0139, + "step": 17000 + }, + { + "epoch": 9.03, + "eval_loss": 0.2688957154750824, + "eval_runtime": 1423.1196, + "eval_samples_per_second": 7.316, + "eval_steps_per_second": 0.457, + "eval_wer": 25.920746678807788, + "step": 17000 + }, + { + "epoch": 9.03, + "learning_rate": 5.818481012658228e-06, + "loss": 0.0169, + "step": 17025 + }, + { + "epoch": 9.03, + "learning_rate": 5.8121518987341775e-06, + "loss": 0.013, + "step": 17050 + }, + { + "epoch": 9.03, + "learning_rate": 5.805822784810127e-06, + "loss": 0.0123, + "step": 17075 + }, + { + "epoch": 9.03, + "learning_rate": 5.799493670886076e-06, + "loss": 0.0161, + "step": 17100 + }, + { + "epoch": 9.03, + "learning_rate": 5.793164556962026e-06, + "loss": 0.0114, + "step": 17125 + }, + { + "epoch": 9.03, + "learning_rate": 5.786835443037974e-06, + "loss": 0.0194, + "step": 17150 + }, + { + "epoch": 9.04, + "learning_rate": 5.780506329113924e-06, + "loss": 0.0179, + "step": 17175 + }, + { + "epoch": 9.04, + "learning_rate": 5.774177215189873e-06, + "loss": 0.0165, + "step": 17200 + }, + { + "epoch": 9.04, + "learning_rate": 5.767848101265823e-06, + "loss": 0.0165, + "step": 17225 + }, + { + "epoch": 9.04, + "learning_rate": 5.761518987341773e-06, + "loss": 0.0125, + "step": 17250 + }, + { + "epoch": 9.04, + "learning_rate": 5.755189873417722e-06, + "loss": 0.0172, + "step": 17275 + }, + { + "epoch": 9.04, + "learning_rate": 5.748860759493672e-06, + "loss": 0.0158, + "step": 17300 + }, + { + "epoch": 9.04, + "learning_rate": 5.742531645569621e-06, + "loss": 0.0174, + "step": 17325 + }, + { + "epoch": 9.04, + "learning_rate": 5.736202531645571e-06, + "loss": 0.0162, + "step": 17350 + }, + { + "epoch": 9.04, + "learning_rate": 5.72987341772152e-06, + "loss": 0.0203, + "step": 17375 + }, + { + "epoch": 9.04, + "learning_rate": 5.7235443037974695e-06, + "loss": 0.0157, + "step": 17400 + }, + { + "epoch": 9.04, + "learning_rate": 5.717215189873419e-06, + "loss": 0.018, + "step": 17425 + }, + { + "epoch": 9.04, + "learning_rate": 5.7108860759493675e-06, + "loss": 0.0199, + "step": 17450 + }, + { + "epoch": 9.04, + "learning_rate": 5.704556962025317e-06, + "loss": 0.0153, + "step": 17475 + }, + { + "epoch": 9.04, + "learning_rate": 5.698227848101266e-06, + "loss": 0.0234, + "step": 17500 + }, + { + "epoch": 10.0, + "learning_rate": 5.691898734177216e-06, + "loss": 0.0168, + "step": 17525 + }, + { + "epoch": 10.0, + "learning_rate": 5.685569620253165e-06, + "loss": 0.0214, + "step": 17550 + }, + { + "epoch": 10.0, + "learning_rate": 5.679240506329115e-06, + "loss": 0.0185, + "step": 17575 + }, + { + "epoch": 10.0, + "learning_rate": 5.672911392405064e-06, + "loss": 0.0228, + "step": 17600 + }, + { + "epoch": 10.0, + "learning_rate": 5.6665822784810135e-06, + "loss": 0.024, + "step": 17625 + }, + { + "epoch": 10.0, + "learning_rate": 5.660253164556963e-06, + "loss": 0.0235, + "step": 17650 + }, + { + "epoch": 10.0, + "learning_rate": 5.6539240506329115e-06, + "loss": 0.0262, + "step": 17675 + }, + { + "epoch": 10.0, + "learning_rate": 5.647594936708861e-06, + "loss": 0.0214, + "step": 17700 + }, + { + "epoch": 10.01, + "learning_rate": 5.64126582278481e-06, + "loss": 0.0279, + "step": 17725 + }, + { + "epoch": 10.01, + "learning_rate": 5.63493670886076e-06, + "loss": 0.0248, + "step": 17750 + }, + { + "epoch": 10.01, + "learning_rate": 5.628607594936709e-06, + "loss": 0.0155, + "step": 17775 + }, + { + "epoch": 10.01, + "learning_rate": 5.622278481012659e-06, + "loss": 0.017, + "step": 17800 + }, + { + "epoch": 10.01, + "learning_rate": 5.615949367088608e-06, + "loss": 0.0205, + "step": 17825 + }, + { + "epoch": 10.01, + "learning_rate": 5.6096202531645575e-06, + "loss": 0.021, + "step": 17850 + }, + { + "epoch": 10.01, + "learning_rate": 5.603291139240507e-06, + "loss": 0.0178, + "step": 17875 + }, + { + "epoch": 10.01, + "learning_rate": 5.5969620253164555e-06, + "loss": 0.0169, + "step": 17900 + }, + { + "epoch": 10.01, + "learning_rate": 5.590632911392405e-06, + "loss": 0.0158, + "step": 17925 + }, + { + "epoch": 10.01, + "learning_rate": 5.584303797468354e-06, + "loss": 0.0195, + "step": 17950 + }, + { + "epoch": 10.01, + "learning_rate": 5.577974683544304e-06, + "loss": 0.0195, + "step": 17975 + }, + { + "epoch": 10.01, + "learning_rate": 5.571645569620253e-06, + "loss": 0.0187, + "step": 18000 + }, + { + "epoch": 10.01, + "eval_loss": 0.2605174481868744, + "eval_runtime": 1553.7854, + "eval_samples_per_second": 6.7, + "eval_steps_per_second": 0.419, + "eval_wer": 25.330493178750874, + "step": 18000 + }, + { + "epoch": 10.01, + "learning_rate": 5.565316455696203e-06, + "loss": 0.0214, + "step": 18025 + }, + { + "epoch": 10.01, + "learning_rate": 5.558987341772152e-06, + "loss": 0.0193, + "step": 18050 + }, + { + "epoch": 10.01, + "learning_rate": 5.552658227848102e-06, + "loss": 0.0168, + "step": 18075 + }, + { + "epoch": 10.01, + "learning_rate": 5.546329113924052e-06, + "loss": 0.0147, + "step": 18100 + }, + { + "epoch": 10.02, + "learning_rate": 5.540000000000001e-06, + "loss": 0.0192, + "step": 18125 + }, + { + "epoch": 10.02, + "learning_rate": 5.533670886075951e-06, + "loss": 0.014, + "step": 18150 + }, + { + "epoch": 10.02, + "learning_rate": 5.527341772151899e-06, + "loss": 0.0163, + "step": 18175 + }, + { + "epoch": 10.02, + "learning_rate": 5.521012658227849e-06, + "loss": 0.0139, + "step": 18200 + }, + { + "epoch": 10.02, + "learning_rate": 5.514683544303798e-06, + "loss": 0.0168, + "step": 18225 + }, + { + "epoch": 10.02, + "learning_rate": 5.5083544303797475e-06, + "loss": 0.0118, + "step": 18250 + }, + { + "epoch": 10.02, + "learning_rate": 5.502025316455697e-06, + "loss": 0.0119, + "step": 18275 + }, + { + "epoch": 10.02, + "learning_rate": 5.495696202531646e-06, + "loss": 0.0136, + "step": 18300 + }, + { + "epoch": 10.02, + "learning_rate": 5.489367088607596e-06, + "loss": 0.0132, + "step": 18325 + }, + { + "epoch": 10.02, + "learning_rate": 5.483037974683545e-06, + "loss": 0.0148, + "step": 18350 + }, + { + "epoch": 10.02, + "learning_rate": 5.476708860759495e-06, + "loss": 0.015, + "step": 18375 + }, + { + "epoch": 10.02, + "learning_rate": 5.470379746835443e-06, + "loss": 0.0154, + "step": 18400 + }, + { + "epoch": 10.02, + "learning_rate": 5.464050632911393e-06, + "loss": 0.0139, + "step": 18425 + }, + { + "epoch": 10.02, + "learning_rate": 5.457721518987342e-06, + "loss": 0.0239, + "step": 18450 + }, + { + "epoch": 10.02, + "learning_rate": 5.4513924050632915e-06, + "loss": 0.0203, + "step": 18475 + }, + { + "epoch": 10.02, + "learning_rate": 5.445063291139241e-06, + "loss": 0.0199, + "step": 18500 + }, + { + "epoch": 10.03, + "learning_rate": 5.43873417721519e-06, + "loss": 0.013, + "step": 18525 + }, + { + "epoch": 10.03, + "learning_rate": 5.43240506329114e-06, + "loss": 0.0128, + "step": 18550 + }, + { + "epoch": 10.03, + "learning_rate": 5.426075949367089e-06, + "loss": 0.0161, + "step": 18575 + }, + { + "epoch": 10.03, + "learning_rate": 5.419746835443039e-06, + "loss": 0.0129, + "step": 18600 + }, + { + "epoch": 10.03, + "learning_rate": 5.413417721518987e-06, + "loss": 0.0124, + "step": 18625 + }, + { + "epoch": 10.03, + "learning_rate": 5.407088607594937e-06, + "loss": 0.0161, + "step": 18650 + }, + { + "epoch": 10.03, + "learning_rate": 5.400759493670886e-06, + "loss": 0.0104, + "step": 18675 + }, + { + "epoch": 10.03, + "learning_rate": 5.3944303797468355e-06, + "loss": 0.0109, + "step": 18700 + }, + { + "epoch": 10.03, + "learning_rate": 5.388101265822785e-06, + "loss": 0.0128, + "step": 18725 + }, + { + "epoch": 10.03, + "learning_rate": 5.381772151898734e-06, + "loss": 0.014, + "step": 18750 + }, + { + "epoch": 10.03, + "learning_rate": 5.375443037974684e-06, + "loss": 0.0159, + "step": 18775 + }, + { + "epoch": 10.03, + "learning_rate": 5.369113924050633e-06, + "loss": 0.017, + "step": 18800 + }, + { + "epoch": 10.03, + "learning_rate": 5.362784810126583e-06, + "loss": 0.012, + "step": 18825 + }, + { + "epoch": 10.03, + "learning_rate": 5.356455696202531e-06, + "loss": 0.0165, + "step": 18850 + }, + { + "epoch": 10.03, + "learning_rate": 5.350126582278481e-06, + "loss": 0.0153, + "step": 18875 + }, + { + "epoch": 10.03, + "learning_rate": 5.343797468354432e-06, + "loss": 0.0182, + "step": 18900 + }, + { + "epoch": 10.04, + "learning_rate": 5.33746835443038e-06, + "loss": 0.0194, + "step": 18925 + }, + { + "epoch": 10.04, + "learning_rate": 5.33113924050633e-06, + "loss": 0.0144, + "step": 18950 + }, + { + "epoch": 10.04, + "learning_rate": 5.324810126582279e-06, + "loss": 0.0157, + "step": 18975 + }, + { + "epoch": 10.04, + "learning_rate": 5.318481012658229e-06, + "loss": 0.0111, + "step": 19000 + }, + { + "epoch": 10.04, + "eval_loss": 0.2696678638458252, + "eval_runtime": 1463.5986, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.445, + "eval_wer": 24.79877721588969, + "step": 19000 + }, + { + "epoch": 10.04, + "learning_rate": 5.312151898734178e-06, + "loss": 0.0176, + "step": 19025 + }, + { + "epoch": 10.04, + "learning_rate": 5.3058227848101275e-06, + "loss": 0.0144, + "step": 19050 + }, + { + "epoch": 10.04, + "learning_rate": 5.299493670886077e-06, + "loss": 0.0129, + "step": 19075 + }, + { + "epoch": 10.04, + "learning_rate": 5.293164556962026e-06, + "loss": 0.0162, + "step": 19100 + }, + { + "epoch": 10.04, + "learning_rate": 5.286835443037976e-06, + "loss": 0.0128, + "step": 19125 + }, + { + "epoch": 10.04, + "learning_rate": 5.280506329113924e-06, + "loss": 0.0152, + "step": 19150 + }, + { + "epoch": 10.04, + "learning_rate": 5.274177215189874e-06, + "loss": 0.0183, + "step": 19175 + }, + { + "epoch": 10.04, + "learning_rate": 5.267848101265823e-06, + "loss": 0.0198, + "step": 19200 + }, + { + "epoch": 10.04, + "learning_rate": 5.261518987341773e-06, + "loss": 0.0179, + "step": 19225 + }, + { + "epoch": 10.04, + "learning_rate": 5.255443037974683e-06, + "loss": 0.019, + "step": 19250 + }, + { + "epoch": 11.0, + "learning_rate": 5.249113924050633e-06, + "loss": 0.0159, + "step": 19275 + }, + { + "epoch": 11.0, + "learning_rate": 5.242784810126583e-06, + "loss": 0.0156, + "step": 19300 + }, + { + "epoch": 11.0, + "learning_rate": 5.236455696202532e-06, + "loss": 0.015, + "step": 19325 + }, + { + "epoch": 11.0, + "learning_rate": 5.230126582278482e-06, + "loss": 0.0162, + "step": 19350 + }, + { + "epoch": 11.0, + "learning_rate": 5.223797468354431e-06, + "loss": 0.0262, + "step": 19375 + }, + { + "epoch": 11.0, + "learning_rate": 5.217468354430381e-06, + "loss": 0.0207, + "step": 19400 + }, + { + "epoch": 11.0, + "learning_rate": 5.21113924050633e-06, + "loss": 0.0222, + "step": 19425 + }, + { + "epoch": 11.0, + "learning_rate": 5.2048101265822795e-06, + "loss": 0.016, + "step": 19450 + }, + { + "epoch": 11.01, + "learning_rate": 5.198481012658229e-06, + "loss": 0.0176, + "step": 19475 + }, + { + "epoch": 11.01, + "learning_rate": 5.192151898734178e-06, + "loss": 0.0209, + "step": 19500 + }, + { + "epoch": 11.01, + "learning_rate": 5.185822784810127e-06, + "loss": 0.0212, + "step": 19525 + }, + { + "epoch": 11.01, + "learning_rate": 5.179493670886076e-06, + "loss": 0.0143, + "step": 19550 + }, + { + "epoch": 11.01, + "learning_rate": 5.173164556962026e-06, + "loss": 0.0224, + "step": 19575 + }, + { + "epoch": 11.01, + "learning_rate": 5.166835443037975e-06, + "loss": 0.0171, + "step": 19600 + }, + { + "epoch": 11.01, + "learning_rate": 5.160506329113925e-06, + "loss": 0.0166, + "step": 19625 + }, + { + "epoch": 11.01, + "learning_rate": 5.154177215189874e-06, + "loss": 0.0135, + "step": 19650 + }, + { + "epoch": 11.01, + "learning_rate": 5.1478481012658235e-06, + "loss": 0.0164, + "step": 19675 + }, + { + "epoch": 11.01, + "learning_rate": 5.141518987341773e-06, + "loss": 0.0154, + "step": 19700 + }, + { + "epoch": 11.01, + "learning_rate": 5.135189873417722e-06, + "loss": 0.0183, + "step": 19725 + }, + { + "epoch": 11.01, + "learning_rate": 5.128860759493671e-06, + "loss": 0.0149, + "step": 19750 + }, + { + "epoch": 11.01, + "learning_rate": 5.12253164556962e-06, + "loss": 0.0176, + "step": 19775 + }, + { + "epoch": 11.01, + "learning_rate": 5.11620253164557e-06, + "loss": 0.0135, + "step": 19800 + }, + { + "epoch": 11.01, + "learning_rate": 5.109873417721519e-06, + "loss": 0.0121, + "step": 19825 + }, + { + "epoch": 11.01, + "learning_rate": 5.103544303797469e-06, + "loss": 0.0158, + "step": 19850 + }, + { + "epoch": 11.02, + "learning_rate": 5.097215189873418e-06, + "loss": 0.0191, + "step": 19875 + }, + { + "epoch": 11.02, + "learning_rate": 5.0908860759493675e-06, + "loss": 0.0127, + "step": 19900 + }, + { + "epoch": 11.02, + "learning_rate": 5.084556962025317e-06, + "loss": 0.0149, + "step": 19925 + }, + { + "epoch": 11.02, + "learning_rate": 5.0782278481012655e-06, + "loss": 0.0152, + "step": 19950 + }, + { + "epoch": 11.02, + "learning_rate": 5.071898734177215e-06, + "loss": 0.0139, + "step": 19975 + }, + { + "epoch": 11.02, + "learning_rate": 5.065569620253164e-06, + "loss": 0.015, + "step": 20000 + }, + { + "epoch": 11.02, + "eval_loss": 0.2623814344406128, + "eval_runtime": 1462.1621, + "eval_samples_per_second": 7.12, + "eval_steps_per_second": 0.445, + "eval_wer": 24.392266540919362, + "step": 20000 + }, + { + "epoch": 11.02, + "learning_rate": 5.059240506329114e-06, + "loss": 0.0094, + "step": 20025 + }, + { + "epoch": 11.02, + "learning_rate": 5.052911392405063e-06, + "loss": 0.0108, + "step": 20050 + }, + { + "epoch": 11.02, + "learning_rate": 5.046582278481013e-06, + "loss": 0.0155, + "step": 20075 + }, + { + "epoch": 11.02, + "learning_rate": 5.040253164556962e-06, + "loss": 0.0103, + "step": 20100 + }, + { + "epoch": 11.02, + "learning_rate": 5.033924050632912e-06, + "loss": 0.0109, + "step": 20125 + }, + { + "epoch": 11.02, + "learning_rate": 5.027594936708862e-06, + "loss": 0.0188, + "step": 20150 + }, + { + "epoch": 11.02, + "learning_rate": 5.021265822784811e-06, + "loss": 0.0178, + "step": 20175 + }, + { + "epoch": 11.02, + "learning_rate": 5.014936708860761e-06, + "loss": 0.0262, + "step": 20200 + }, + { + "epoch": 11.02, + "learning_rate": 5.00860759493671e-06, + "loss": 0.0194, + "step": 20225 + }, + { + "epoch": 11.02, + "learning_rate": 5.002278481012659e-06, + "loss": 0.0107, + "step": 20250 + }, + { + "epoch": 11.03, + "learning_rate": 4.995949367088608e-06, + "loss": 0.0147, + "step": 20275 + }, + { + "epoch": 11.03, + "learning_rate": 4.9896202531645575e-06, + "loss": 0.0084, + "step": 20300 + }, + { + "epoch": 11.03, + "learning_rate": 4.983291139240507e-06, + "loss": 0.0214, + "step": 20325 + }, + { + "epoch": 11.03, + "learning_rate": 4.976962025316456e-06, + "loss": 0.014, + "step": 20350 + }, + { + "epoch": 11.03, + "learning_rate": 4.970632911392406e-06, + "loss": 0.0129, + "step": 20375 + }, + { + "epoch": 11.03, + "learning_rate": 4.964303797468355e-06, + "loss": 0.0145, + "step": 20400 + }, + { + "epoch": 11.03, + "learning_rate": 4.957974683544305e-06, + "loss": 0.0109, + "step": 20425 + }, + { + "epoch": 11.03, + "learning_rate": 4.951645569620254e-06, + "loss": 0.0117, + "step": 20450 + }, + { + "epoch": 11.03, + "learning_rate": 4.945316455696203e-06, + "loss": 0.0137, + "step": 20475 + }, + { + "epoch": 11.03, + "learning_rate": 4.938987341772152e-06, + "loss": 0.0086, + "step": 20500 + }, + { + "epoch": 11.03, + "learning_rate": 4.9326582278481015e-06, + "loss": 0.0128, + "step": 20525 + }, + { + "epoch": 11.03, + "learning_rate": 4.926329113924051e-06, + "loss": 0.0101, + "step": 20550 + }, + { + "epoch": 11.03, + "learning_rate": 4.92e-06, + "loss": 0.0132, + "step": 20575 + }, + { + "epoch": 11.03, + "learning_rate": 4.91367088607595e-06, + "loss": 0.0147, + "step": 20600 + }, + { + "epoch": 11.03, + "learning_rate": 4.907341772151899e-06, + "loss": 0.0077, + "step": 20625 + }, + { + "epoch": 11.03, + "learning_rate": 4.901012658227849e-06, + "loss": 0.0125, + "step": 20650 + }, + { + "epoch": 11.04, + "learning_rate": 4.894683544303798e-06, + "loss": 0.0115, + "step": 20675 + }, + { + "epoch": 11.04, + "learning_rate": 4.888354430379747e-06, + "loss": 0.0136, + "step": 20700 + }, + { + "epoch": 11.04, + "learning_rate": 4.882025316455696e-06, + "loss": 0.0098, + "step": 20725 + }, + { + "epoch": 11.04, + "learning_rate": 4.875696202531646e-06, + "loss": 0.0154, + "step": 20750 + }, + { + "epoch": 11.04, + "learning_rate": 4.869367088607596e-06, + "loss": 0.0126, + "step": 20775 + }, + { + "epoch": 11.04, + "learning_rate": 4.863037974683545e-06, + "loss": 0.0116, + "step": 20800 + }, + { + "epoch": 11.04, + "learning_rate": 4.856708860759495e-06, + "loss": 0.0133, + "step": 20825 + }, + { + "epoch": 11.04, + "learning_rate": 4.850379746835443e-06, + "loss": 0.0106, + "step": 20850 + }, + { + "epoch": 11.04, + "learning_rate": 4.844050632911393e-06, + "loss": 0.0151, + "step": 20875 + }, + { + "epoch": 11.04, + "learning_rate": 4.837721518987342e-06, + "loss": 0.0148, + "step": 20900 + }, + { + "epoch": 11.04, + "learning_rate": 4.8313924050632915e-06, + "loss": 0.0173, + "step": 20925 + }, + { + "epoch": 11.04, + "learning_rate": 4.825063291139241e-06, + "loss": 0.0126, + "step": 20950 + }, + { + "epoch": 11.04, + "learning_rate": 4.81873417721519e-06, + "loss": 0.0163, + "step": 20975 + }, + { + "epoch": 11.04, + "learning_rate": 4.81240506329114e-06, + "loss": 0.0203, + "step": 21000 + }, + { + "epoch": 11.04, + "eval_loss": 0.27788785099983215, + "eval_runtime": 1546.852, + "eval_samples_per_second": 6.73, + "eval_steps_per_second": 0.421, + "eval_wer": 25.284963983154196, + "step": 21000 + }, + { + "epoch": 12.0, + "learning_rate": 4.806075949367089e-06, + "loss": 0.0176, + "step": 21025 + }, + { + "epoch": 12.0, + "learning_rate": 4.799746835443039e-06, + "loss": 0.0157, + "step": 21050 + }, + { + "epoch": 12.0, + "learning_rate": 4.793417721518987e-06, + "loss": 0.0192, + "step": 21075 + }, + { + "epoch": 12.0, + "learning_rate": 4.787088607594937e-06, + "loss": 0.0124, + "step": 21100 + }, + { + "epoch": 12.0, + "learning_rate": 4.780759493670886e-06, + "loss": 0.0194, + "step": 21125 + }, + { + "epoch": 12.0, + "learning_rate": 4.774430379746836e-06, + "loss": 0.0138, + "step": 21150 + }, + { + "epoch": 12.0, + "learning_rate": 4.768101265822786e-06, + "loss": 0.0191, + "step": 21175 + }, + { + "epoch": 12.0, + "learning_rate": 4.761772151898735e-06, + "loss": 0.0124, + "step": 21200 + }, + { + "epoch": 12.01, + "learning_rate": 4.755443037974684e-06, + "loss": 0.0175, + "step": 21225 + }, + { + "epoch": 12.01, + "learning_rate": 4.749113924050633e-06, + "loss": 0.0118, + "step": 21250 + }, + { + "epoch": 12.01, + "learning_rate": 4.742784810126583e-06, + "loss": 0.0172, + "step": 21275 + }, + { + "epoch": 12.01, + "learning_rate": 4.736455696202532e-06, + "loss": 0.0136, + "step": 21300 + }, + { + "epoch": 12.01, + "learning_rate": 4.7301265822784815e-06, + "loss": 0.018, + "step": 21325 + }, + { + "epoch": 12.01, + "learning_rate": 4.723797468354431e-06, + "loss": 0.0184, + "step": 21350 + }, + { + "epoch": 12.01, + "learning_rate": 4.71746835443038e-06, + "loss": 0.0166, + "step": 21375 + }, + { + "epoch": 12.01, + "learning_rate": 4.71113924050633e-06, + "loss": 0.0107, + "step": 21400 + }, + { + "epoch": 12.01, + "learning_rate": 4.704810126582279e-06, + "loss": 0.0125, + "step": 21425 + }, + { + "epoch": 12.01, + "learning_rate": 4.698481012658228e-06, + "loss": 0.0098, + "step": 21450 + }, + { + "epoch": 12.01, + "learning_rate": 4.692151898734177e-06, + "loss": 0.0103, + "step": 21475 + }, + { + "epoch": 12.01, + "learning_rate": 4.685822784810127e-06, + "loss": 0.0211, + "step": 21500 + }, + { + "epoch": 12.01, + "learning_rate": 4.679493670886076e-06, + "loss": 0.0125, + "step": 21525 + }, + { + "epoch": 12.01, + "learning_rate": 4.6731645569620255e-06, + "loss": 0.0083, + "step": 21550 + }, + { + "epoch": 12.01, + "learning_rate": 4.666835443037975e-06, + "loss": 0.0156, + "step": 21575 + }, + { + "epoch": 12.01, + "learning_rate": 4.660506329113924e-06, + "loss": 0.0137, + "step": 21600 + }, + { + "epoch": 12.02, + "learning_rate": 4.654177215189874e-06, + "loss": 0.0137, + "step": 21625 + }, + { + "epoch": 12.02, + "learning_rate": 4.647848101265823e-06, + "loss": 0.0092, + "step": 21650 + }, + { + "epoch": 12.02, + "learning_rate": 4.641518987341773e-06, + "loss": 0.0101, + "step": 21675 + }, + { + "epoch": 12.02, + "learning_rate": 4.635189873417722e-06, + "loss": 0.0118, + "step": 21700 + }, + { + "epoch": 12.02, + "learning_rate": 4.6288607594936715e-06, + "loss": 0.0119, + "step": 21725 + }, + { + "epoch": 12.02, + "learning_rate": 4.622531645569621e-06, + "loss": 0.009, + "step": 21750 + }, + { + "epoch": 12.02, + "learning_rate": 4.61620253164557e-06, + "loss": 0.014, + "step": 21775 + }, + { + "epoch": 12.02, + "learning_rate": 4.609873417721519e-06, + "loss": 0.0108, + "step": 21800 + }, + { + "epoch": 12.02, + "learning_rate": 4.603544303797468e-06, + "loss": 0.0117, + "step": 21825 + }, + { + "epoch": 12.02, + "learning_rate": 4.597215189873418e-06, + "loss": 0.0147, + "step": 21850 + }, + { + "epoch": 12.02, + "learning_rate": 4.590886075949367e-06, + "loss": 0.0133, + "step": 21875 + }, + { + "epoch": 12.02, + "learning_rate": 4.584556962025317e-06, + "loss": 0.0131, + "step": 21900 + }, + { + "epoch": 12.02, + "learning_rate": 4.578227848101266e-06, + "loss": 0.0156, + "step": 21925 + }, + { + "epoch": 12.02, + "learning_rate": 4.5718987341772155e-06, + "loss": 0.0211, + "step": 21950 + }, + { + "epoch": 12.02, + "learning_rate": 4.565569620253165e-06, + "loss": 0.0127, + "step": 21975 + }, + { + "epoch": 12.02, + "learning_rate": 4.559240506329114e-06, + "loss": 0.011, + "step": 22000 + }, + { + "epoch": 12.02, + "eval_loss": 0.26152506470680237, + "eval_runtime": 1497.9019, + "eval_samples_per_second": 6.95, + "eval_steps_per_second": 0.435, + "eval_wer": 26.236198962584755, + "step": 22000 + }, + { + "epoch": 12.03, + "learning_rate": 4.552911392405064e-06, + "loss": 0.0164, + "step": 22025 + }, + { + "epoch": 12.03, + "learning_rate": 4.546582278481013e-06, + "loss": 0.01, + "step": 22050 + }, + { + "epoch": 12.03, + "learning_rate": 4.540253164556963e-06, + "loss": 0.0127, + "step": 22075 + }, + { + "epoch": 12.03, + "learning_rate": 4.533924050632912e-06, + "loss": 0.0089, + "step": 22100 + }, + { + "epoch": 12.03, + "learning_rate": 4.5275949367088615e-06, + "loss": 0.0077, + "step": 22125 + }, + { + "epoch": 12.03, + "learning_rate": 4.521265822784811e-06, + "loss": 0.0098, + "step": 22150 + }, + { + "epoch": 12.03, + "learning_rate": 4.5149367088607595e-06, + "loss": 0.0084, + "step": 22175 + }, + { + "epoch": 12.03, + "learning_rate": 4.508607594936709e-06, + "loss": 0.0096, + "step": 22200 + }, + { + "epoch": 12.03, + "learning_rate": 4.502278481012658e-06, + "loss": 0.0097, + "step": 22225 + }, + { + "epoch": 12.03, + "learning_rate": 4.495949367088608e-06, + "loss": 0.0114, + "step": 22250 + }, + { + "epoch": 12.03, + "learning_rate": 4.489620253164557e-06, + "loss": 0.0109, + "step": 22275 + }, + { + "epoch": 12.03, + "learning_rate": 4.483291139240507e-06, + "loss": 0.0096, + "step": 22300 + }, + { + "epoch": 12.03, + "learning_rate": 4.476962025316456e-06, + "loss": 0.0117, + "step": 22325 + }, + { + "epoch": 12.03, + "learning_rate": 4.4706329113924055e-06, + "loss": 0.0105, + "step": 22350 + }, + { + "epoch": 12.03, + "learning_rate": 4.464303797468355e-06, + "loss": 0.0103, + "step": 22375 + }, + { + "epoch": 12.03, + "learning_rate": 4.457974683544304e-06, + "loss": 0.0085, + "step": 22400 + }, + { + "epoch": 12.04, + "learning_rate": 4.451645569620254e-06, + "loss": 0.0129, + "step": 22425 + }, + { + "epoch": 12.04, + "learning_rate": 4.445316455696203e-06, + "loss": 0.0188, + "step": 22450 + }, + { + "epoch": 12.04, + "learning_rate": 4.438987341772153e-06, + "loss": 0.013, + "step": 22475 + }, + { + "epoch": 12.04, + "learning_rate": 4.432658227848102e-06, + "loss": 0.0095, + "step": 22500 + }, + { + "epoch": 12.04, + "learning_rate": 4.4263291139240515e-06, + "loss": 0.0111, + "step": 22525 + }, + { + "epoch": 12.04, + "learning_rate": 4.42e-06, + "loss": 0.0115, + "step": 22550 + }, + { + "epoch": 12.04, + "learning_rate": 4.4136708860759495e-06, + "loss": 0.0096, + "step": 22575 + }, + { + "epoch": 12.04, + "learning_rate": 4.407341772151899e-06, + "loss": 0.0108, + "step": 22600 + }, + { + "epoch": 12.04, + "learning_rate": 4.401012658227848e-06, + "loss": 0.0118, + "step": 22625 + }, + { + "epoch": 12.04, + "learning_rate": 4.394683544303798e-06, + "loss": 0.0136, + "step": 22650 + }, + { + "epoch": 12.04, + "learning_rate": 4.388354430379747e-06, + "loss": 0.0176, + "step": 22675 + }, + { + "epoch": 12.04, + "learning_rate": 4.382025316455697e-06, + "loss": 0.011, + "step": 22700 + }, + { + "epoch": 12.04, + "learning_rate": 4.375696202531646e-06, + "loss": 0.0125, + "step": 22725 + }, + { + "epoch": 12.04, + "learning_rate": 4.3693670886075955e-06, + "loss": 0.0164, + "step": 22750 + }, + { + "epoch": 13.0, + "learning_rate": 4.363037974683544e-06, + "loss": 0.0192, + "step": 22775 + }, + { + "epoch": 13.0, + "learning_rate": 4.356708860759494e-06, + "loss": 0.0145, + "step": 22800 + }, + { + "epoch": 13.0, + "learning_rate": 4.350379746835444e-06, + "loss": 0.0174, + "step": 22825 + }, + { + "epoch": 13.0, + "learning_rate": 4.344050632911393e-06, + "loss": 0.0142, + "step": 22850 + }, + { + "epoch": 13.0, + "learning_rate": 4.337721518987343e-06, + "loss": 0.0228, + "step": 22875 + }, + { + "epoch": 13.0, + "learning_rate": 4.331392405063292e-06, + "loss": 0.0201, + "step": 22900 + }, + { + "epoch": 13.0, + "learning_rate": 4.325063291139241e-06, + "loss": 0.0188, + "step": 22925 + }, + { + "epoch": 13.0, + "learning_rate": 4.31873417721519e-06, + "loss": 0.0185, + "step": 22950 + }, + { + "epoch": 13.01, + "learning_rate": 4.3124050632911395e-06, + "loss": 0.0186, + "step": 22975 + }, + { + "epoch": 13.01, + "learning_rate": 4.306075949367089e-06, + "loss": 0.0191, + "step": 23000 + }, + { + "epoch": 13.01, + "eval_loss": 0.25467297434806824, + "eval_runtime": 1486.1182, + "eval_samples_per_second": 7.005, + "eval_steps_per_second": 0.438, + "eval_wer": 23.982503780549276, + "step": 23000 + }, + { + "epoch": 13.01, + "learning_rate": 4.299746835443038e-06, + "loss": 0.0145, + "step": 23025 + }, + { + "epoch": 13.01, + "learning_rate": 4.293417721518988e-06, + "loss": 0.0106, + "step": 23050 + }, + { + "epoch": 13.01, + "learning_rate": 4.287088607594937e-06, + "loss": 0.0135, + "step": 23075 + }, + { + "epoch": 13.01, + "learning_rate": 4.280759493670887e-06, + "loss": 0.0173, + "step": 23100 + }, + { + "epoch": 13.01, + "learning_rate": 4.274430379746836e-06, + "loss": 0.0126, + "step": 23125 + }, + { + "epoch": 13.01, + "learning_rate": 4.268101265822785e-06, + "loss": 0.0166, + "step": 23150 + }, + { + "epoch": 13.01, + "learning_rate": 4.261772151898734e-06, + "loss": 0.0097, + "step": 23175 + }, + { + "epoch": 13.01, + "learning_rate": 4.255443037974684e-06, + "loss": 0.0094, + "step": 23200 + }, + { + "epoch": 13.01, + "learning_rate": 4.249113924050634e-06, + "loss": 0.012, + "step": 23225 + }, + { + "epoch": 13.01, + "learning_rate": 4.242784810126583e-06, + "loss": 0.0153, + "step": 23250 + }, + { + "epoch": 13.01, + "learning_rate": 4.236455696202532e-06, + "loss": 0.0169, + "step": 23275 + }, + { + "epoch": 13.01, + "learning_rate": 4.230379746835443e-06, + "loss": 0.0097, + "step": 23300 + }, + { + "epoch": 13.01, + "learning_rate": 4.224050632911393e-06, + "loss": 0.0098, + "step": 23325 + }, + { + "epoch": 13.01, + "learning_rate": 4.217721518987342e-06, + "loss": 0.0129, + "step": 23350 + }, + { + "epoch": 13.02, + "learning_rate": 4.2113924050632915e-06, + "loss": 0.0157, + "step": 23375 + }, + { + "epoch": 13.02, + "learning_rate": 4.205063291139241e-06, + "loss": 0.0088, + "step": 23400 + }, + { + "epoch": 13.02, + "learning_rate": 4.19873417721519e-06, + "loss": 0.0105, + "step": 23425 + }, + { + "epoch": 13.02, + "learning_rate": 4.19240506329114e-06, + "loss": 0.0131, + "step": 23450 + }, + { + "epoch": 13.02, + "learning_rate": 4.186075949367089e-06, + "loss": 0.0133, + "step": 23475 + }, + { + "epoch": 13.02, + "learning_rate": 4.179746835443038e-06, + "loss": 0.0054, + "step": 23500 + }, + { + "epoch": 13.02, + "learning_rate": 4.173417721518987e-06, + "loss": 0.01, + "step": 23525 + }, + { + "epoch": 13.02, + "learning_rate": 4.167088607594937e-06, + "loss": 0.013, + "step": 23550 + }, + { + "epoch": 13.02, + "learning_rate": 4.160759493670886e-06, + "loss": 0.0141, + "step": 23575 + }, + { + "epoch": 13.02, + "learning_rate": 4.154430379746836e-06, + "loss": 0.0126, + "step": 23600 + }, + { + "epoch": 13.02, + "learning_rate": 4.148101265822786e-06, + "loss": 0.015, + "step": 23625 + }, + { + "epoch": 13.02, + "learning_rate": 4.141772151898734e-06, + "loss": 0.0164, + "step": 23650 + }, + { + "epoch": 13.02, + "learning_rate": 4.135443037974684e-06, + "loss": 0.0135, + "step": 23675 + }, + { + "epoch": 13.02, + "learning_rate": 4.129113924050633e-06, + "loss": 0.0151, + "step": 23700 + }, + { + "epoch": 13.02, + "learning_rate": 4.122784810126583e-06, + "loss": 0.0157, + "step": 23725 + }, + { + "epoch": 13.02, + "learning_rate": 4.116455696202532e-06, + "loss": 0.0087, + "step": 23750 + }, + { + "epoch": 13.03, + "learning_rate": 4.1101265822784815e-06, + "loss": 0.0097, + "step": 23775 + }, + { + "epoch": 13.03, + "learning_rate": 4.103797468354431e-06, + "loss": 0.0141, + "step": 23800 + }, + { + "epoch": 13.03, + "learning_rate": 4.09746835443038e-06, + "loss": 0.0079, + "step": 23825 + }, + { + "epoch": 13.03, + "learning_rate": 4.09113924050633e-06, + "loss": 0.0116, + "step": 23850 + }, + { + "epoch": 13.03, + "learning_rate": 4.084810126582278e-06, + "loss": 0.0093, + "step": 23875 + }, + { + "epoch": 13.03, + "learning_rate": 4.078481012658228e-06, + "loss": 0.0149, + "step": 23900 + }, + { + "epoch": 13.03, + "learning_rate": 4.072151898734177e-06, + "loss": 0.0125, + "step": 23925 + }, + { + "epoch": 13.03, + "learning_rate": 4.065822784810127e-06, + "loss": 0.0106, + "step": 23950 + }, + { + "epoch": 13.03, + "learning_rate": 4.059493670886076e-06, + "loss": 0.0124, + "step": 23975 + }, + { + "epoch": 13.03, + "learning_rate": 4.0531645569620255e-06, + "loss": 0.0074, + "step": 24000 + }, + { + "epoch": 13.03, + "eval_loss": 0.27036768198013306, + "eval_runtime": 1458.9195, + "eval_samples_per_second": 7.136, + "eval_steps_per_second": 0.446, + "eval_wer": 24.153238264036816, + "step": 24000 + }, + { + "epoch": 13.03, + "learning_rate": 4.046835443037975e-06, + "loss": 0.0128, + "step": 24025 + }, + { + "epoch": 13.03, + "learning_rate": 4.040506329113924e-06, + "loss": 0.012, + "step": 24050 + }, + { + "epoch": 13.03, + "learning_rate": 4.034177215189874e-06, + "loss": 0.0098, + "step": 24075 + }, + { + "epoch": 13.03, + "learning_rate": 4.027848101265823e-06, + "loss": 0.0091, + "step": 24100 + }, + { + "epoch": 13.03, + "learning_rate": 4.021518987341773e-06, + "loss": 0.0077, + "step": 24125 + }, + { + "epoch": 13.03, + "learning_rate": 4.015189873417722e-06, + "loss": 0.0135, + "step": 24150 + }, + { + "epoch": 13.04, + "learning_rate": 4.0088607594936715e-06, + "loss": 0.0124, + "step": 24175 + }, + { + "epoch": 13.04, + "learning_rate": 4.002531645569621e-06, + "loss": 0.0152, + "step": 24200 + }, + { + "epoch": 13.04, + "learning_rate": 3.99620253164557e-06, + "loss": 0.0127, + "step": 24225 + }, + { + "epoch": 13.04, + "learning_rate": 3.989873417721519e-06, + "loss": 0.0066, + "step": 24250 + }, + { + "epoch": 13.04, + "learning_rate": 3.983544303797468e-06, + "loss": 0.0163, + "step": 24275 + }, + { + "epoch": 13.04, + "learning_rate": 3.977215189873418e-06, + "loss": 0.0095, + "step": 24300 + }, + { + "epoch": 13.04, + "learning_rate": 3.970886075949367e-06, + "loss": 0.0103, + "step": 24325 + }, + { + "epoch": 13.04, + "learning_rate": 3.964556962025317e-06, + "loss": 0.0112, + "step": 24350 + }, + { + "epoch": 13.04, + "learning_rate": 3.958227848101266e-06, + "loss": 0.01, + "step": 24375 + }, + { + "epoch": 13.04, + "learning_rate": 3.9518987341772155e-06, + "loss": 0.0131, + "step": 24400 + }, + { + "epoch": 13.04, + "learning_rate": 3.945569620253165e-06, + "loss": 0.0143, + "step": 24425 + }, + { + "epoch": 13.04, + "learning_rate": 3.939240506329114e-06, + "loss": 0.0151, + "step": 24450 + }, + { + "epoch": 13.04, + "learning_rate": 3.932911392405064e-06, + "loss": 0.0129, + "step": 24475 + }, + { + "epoch": 13.04, + "learning_rate": 3.926582278481013e-06, + "loss": 0.0125, + "step": 24500 + }, + { + "epoch": 14.0, + "learning_rate": 3.920506329113925e-06, + "loss": 0.0165, + "step": 24525 + }, + { + "epoch": 14.0, + "learning_rate": 3.914177215189874e-06, + "loss": 0.0114, + "step": 24550 + }, + { + "epoch": 14.0, + "learning_rate": 3.9078481012658235e-06, + "loss": 0.0111, + "step": 24575 + }, + { + "epoch": 14.0, + "learning_rate": 3.901518987341772e-06, + "loss": 0.0134, + "step": 24600 + }, + { + "epoch": 14.0, + "learning_rate": 3.8951898734177215e-06, + "loss": 0.0138, + "step": 24625 + }, + { + "epoch": 14.0, + "learning_rate": 3.888860759493671e-06, + "loss": 0.0151, + "step": 24650 + }, + { + "epoch": 14.0, + "learning_rate": 3.88253164556962e-06, + "loss": 0.0101, + "step": 24675 + }, + { + "epoch": 14.0, + "learning_rate": 3.87620253164557e-06, + "loss": 0.0108, + "step": 24700 + }, + { + "epoch": 14.01, + "learning_rate": 3.869873417721519e-06, + "loss": 0.0151, + "step": 24725 + }, + { + "epoch": 14.01, + "learning_rate": 3.863544303797469e-06, + "loss": 0.0132, + "step": 24750 + }, + { + "epoch": 14.01, + "learning_rate": 3.857215189873418e-06, + "loss": 0.012, + "step": 24775 + }, + { + "epoch": 14.01, + "learning_rate": 3.8508860759493675e-06, + "loss": 0.017, + "step": 24800 + }, + { + "epoch": 14.01, + "learning_rate": 3.844556962025317e-06, + "loss": 0.0141, + "step": 24825 + }, + { + "epoch": 14.01, + "learning_rate": 3.838227848101266e-06, + "loss": 0.0166, + "step": 24850 + }, + { + "epoch": 14.01, + "learning_rate": 3.831898734177216e-06, + "loss": 0.0112, + "step": 24875 + }, + { + "epoch": 14.01, + "learning_rate": 3.825569620253165e-06, + "loss": 0.0097, + "step": 24900 + }, + { + "epoch": 14.01, + "learning_rate": 3.819240506329115e-06, + "loss": 0.0138, + "step": 24925 + }, + { + "epoch": 14.01, + "learning_rate": 3.8129113924050636e-06, + "loss": 0.0069, + "step": 24950 + }, + { + "epoch": 14.01, + "learning_rate": 3.806582278481013e-06, + "loss": 0.0111, + "step": 24975 + }, + { + "epoch": 14.01, + "learning_rate": 3.8002531645569625e-06, + "loss": 0.0149, + "step": 25000 + }, + { + "epoch": 14.01, + "eval_loss": 0.2640444040298462, + "eval_runtime": 1478.4977, + "eval_samples_per_second": 7.042, + "eval_steps_per_second": 0.44, + "eval_wer": 24.063805915543345, + "step": 25000 + }, + { + "epoch": 14.01, + "learning_rate": 3.7939240506329115e-06, + "loss": 0.0176, + "step": 25025 + }, + { + "epoch": 14.01, + "learning_rate": 3.787594936708861e-06, + "loss": 0.0104, + "step": 25050 + }, + { + "epoch": 14.01, + "learning_rate": 3.7812658227848103e-06, + "loss": 0.0084, + "step": 25075 + }, + { + "epoch": 14.01, + "learning_rate": 3.7749367088607598e-06, + "loss": 0.0118, + "step": 25100 + }, + { + "epoch": 14.02, + "learning_rate": 3.768607594936709e-06, + "loss": 0.0099, + "step": 25125 + }, + { + "epoch": 14.02, + "learning_rate": 3.762278481012658e-06, + "loss": 0.0089, + "step": 25150 + }, + { + "epoch": 14.02, + "learning_rate": 3.7559493670886076e-06, + "loss": 0.0119, + "step": 25175 + }, + { + "epoch": 14.02, + "learning_rate": 3.749620253164557e-06, + "loss": 0.0118, + "step": 25200 + }, + { + "epoch": 14.02, + "learning_rate": 3.743291139240507e-06, + "loss": 0.0108, + "step": 25225 + }, + { + "epoch": 14.02, + "learning_rate": 3.7369620253164563e-06, + "loss": 0.0101, + "step": 25250 + }, + { + "epoch": 14.02, + "learning_rate": 3.7306329113924058e-06, + "loss": 0.0095, + "step": 25275 + }, + { + "epoch": 14.02, + "learning_rate": 3.7243037974683548e-06, + "loss": 0.0106, + "step": 25300 + }, + { + "epoch": 14.02, + "learning_rate": 3.717974683544304e-06, + "loss": 0.0135, + "step": 25325 + }, + { + "epoch": 14.02, + "learning_rate": 3.7116455696202536e-06, + "loss": 0.0115, + "step": 25350 + }, + { + "epoch": 14.02, + "learning_rate": 3.705316455696203e-06, + "loss": 0.0114, + "step": 25375 + }, + { + "epoch": 14.02, + "learning_rate": 3.698987341772152e-06, + "loss": 0.0119, + "step": 25400 + }, + { + "epoch": 14.02, + "learning_rate": 3.6926582278481015e-06, + "loss": 0.0123, + "step": 25425 + }, + { + "epoch": 14.02, + "learning_rate": 3.686329113924051e-06, + "loss": 0.0138, + "step": 25450 + }, + { + "epoch": 14.02, + "learning_rate": 3.6800000000000003e-06, + "loss": 0.0133, + "step": 25475 + }, + { + "epoch": 14.02, + "learning_rate": 3.6736708860759498e-06, + "loss": 0.0106, + "step": 25500 + }, + { + "epoch": 14.03, + "learning_rate": 3.6673417721518988e-06, + "loss": 0.0099, + "step": 25525 + }, + { + "epoch": 14.03, + "learning_rate": 3.661012658227848e-06, + "loss": 0.0104, + "step": 25550 + }, + { + "epoch": 14.03, + "learning_rate": 3.6546835443037976e-06, + "loss": 0.0143, + "step": 25575 + }, + { + "epoch": 14.03, + "learning_rate": 3.648354430379747e-06, + "loss": 0.0096, + "step": 25600 + }, + { + "epoch": 14.03, + "learning_rate": 3.642025316455696e-06, + "loss": 0.0094, + "step": 25625 + }, + { + "epoch": 14.03, + "learning_rate": 3.6356962025316463e-06, + "loss": 0.0162, + "step": 25650 + }, + { + "epoch": 14.03, + "learning_rate": 3.6293670886075953e-06, + "loss": 0.008, + "step": 25675 + }, + { + "epoch": 14.03, + "learning_rate": 3.6230379746835448e-06, + "loss": 0.0094, + "step": 25700 + }, + { + "epoch": 14.03, + "learning_rate": 3.616708860759494e-06, + "loss": 0.0122, + "step": 25725 + }, + { + "epoch": 14.03, + "learning_rate": 3.6103797468354436e-06, + "loss": 0.0128, + "step": 25750 + }, + { + "epoch": 14.03, + "learning_rate": 3.6040506329113926e-06, + "loss": 0.0117, + "step": 25775 + }, + { + "epoch": 14.03, + "learning_rate": 3.597721518987342e-06, + "loss": 0.0067, + "step": 25800 + }, + { + "epoch": 14.03, + "learning_rate": 3.5913924050632915e-06, + "loss": 0.0078, + "step": 25825 + }, + { + "epoch": 14.03, + "learning_rate": 3.585063291139241e-06, + "loss": 0.0071, + "step": 25850 + }, + { + "epoch": 14.03, + "learning_rate": 3.57873417721519e-06, + "loss": 0.0093, + "step": 25875 + }, + { + "epoch": 14.03, + "learning_rate": 3.5724050632911393e-06, + "loss": 0.0064, + "step": 25900 + }, + { + "epoch": 14.04, + "learning_rate": 3.5660759493670888e-06, + "loss": 0.0067, + "step": 25925 + }, + { + "epoch": 14.04, + "learning_rate": 3.559746835443038e-06, + "loss": 0.0076, + "step": 25950 + }, + { + "epoch": 14.04, + "learning_rate": 3.5534177215189876e-06, + "loss": 0.0099, + "step": 25975 + }, + { + "epoch": 14.04, + "learning_rate": 3.5470886075949366e-06, + "loss": 0.0111, + "step": 26000 + }, + { + "epoch": 14.04, + "eval_loss": 0.2734878659248352, + "eval_runtime": 1456.7487, + "eval_samples_per_second": 7.147, + "eval_steps_per_second": 0.447, + "eval_wer": 24.359745686921737, + "step": 26000 + }, + { + "epoch": 14.04, + "learning_rate": 3.540759493670886e-06, + "loss": 0.0134, + "step": 26025 + }, + { + "epoch": 14.04, + "learning_rate": 3.534430379746836e-06, + "loss": 0.008, + "step": 26050 + }, + { + "epoch": 14.04, + "learning_rate": 3.5281012658227853e-06, + "loss": 0.0138, + "step": 26075 + }, + { + "epoch": 14.04, + "learning_rate": 3.5217721518987348e-06, + "loss": 0.0075, + "step": 26100 + }, + { + "epoch": 14.04, + "learning_rate": 3.515443037974684e-06, + "loss": 0.0097, + "step": 26125 + }, + { + "epoch": 14.04, + "learning_rate": 3.509113924050633e-06, + "loss": 0.0139, + "step": 26150 + }, + { + "epoch": 14.04, + "learning_rate": 3.5027848101265826e-06, + "loss": 0.0099, + "step": 26175 + }, + { + "epoch": 14.04, + "learning_rate": 3.496455696202532e-06, + "loss": 0.0148, + "step": 26200 + }, + { + "epoch": 14.04, + "learning_rate": 3.4901265822784815e-06, + "loss": 0.0119, + "step": 26225 + }, + { + "epoch": 14.04, + "learning_rate": 3.4837974683544305e-06, + "loss": 0.0086, + "step": 26250 + }, + { + "epoch": 15.0, + "learning_rate": 3.47746835443038e-06, + "loss": 0.0091, + "step": 26275 + }, + { + "epoch": 15.0, + "learning_rate": 3.4711392405063293e-06, + "loss": 0.0105, + "step": 26300 + }, + { + "epoch": 15.0, + "learning_rate": 3.465063291139241e-06, + "loss": 0.0138, + "step": 26325 + }, + { + "epoch": 15.0, + "learning_rate": 3.45873417721519e-06, + "loss": 0.0185, + "step": 26350 + }, + { + "epoch": 15.0, + "learning_rate": 3.4524050632911392e-06, + "loss": 0.015, + "step": 26375 + }, + { + "epoch": 15.0, + "learning_rate": 3.4460759493670886e-06, + "loss": 0.0177, + "step": 26400 + }, + { + "epoch": 15.0, + "learning_rate": 3.439746835443038e-06, + "loss": 0.014, + "step": 26425 + }, + { + "epoch": 15.0, + "learning_rate": 3.433417721518988e-06, + "loss": 0.0127, + "step": 26450 + }, + { + "epoch": 15.01, + "learning_rate": 3.4270886075949374e-06, + "loss": 0.0135, + "step": 26475 + }, + { + "epoch": 15.01, + "learning_rate": 3.4207594936708864e-06, + "loss": 0.0171, + "step": 26500 + }, + { + "epoch": 15.01, + "learning_rate": 3.4144303797468358e-06, + "loss": 0.0114, + "step": 26525 + }, + { + "epoch": 15.01, + "learning_rate": 3.4081012658227852e-06, + "loss": 0.0099, + "step": 26550 + }, + { + "epoch": 15.01, + "learning_rate": 3.4017721518987346e-06, + "loss": 0.0126, + "step": 26575 + }, + { + "epoch": 15.01, + "learning_rate": 3.395443037974684e-06, + "loss": 0.0116, + "step": 26600 + }, + { + "epoch": 15.01, + "learning_rate": 3.389113924050633e-06, + "loss": 0.0087, + "step": 26625 + }, + { + "epoch": 15.01, + "learning_rate": 3.3827848101265825e-06, + "loss": 0.0073, + "step": 26650 + }, + { + "epoch": 15.01, + "learning_rate": 3.376455696202532e-06, + "loss": 0.0102, + "step": 26675 + }, + { + "epoch": 15.01, + "learning_rate": 3.3701265822784814e-06, + "loss": 0.0106, + "step": 26700 + }, + { + "epoch": 15.01, + "learning_rate": 3.3637974683544304e-06, + "loss": 0.0094, + "step": 26725 + }, + { + "epoch": 15.01, + "learning_rate": 3.35746835443038e-06, + "loss": 0.0113, + "step": 26750 + }, + { + "epoch": 15.01, + "learning_rate": 3.3511392405063292e-06, + "loss": 0.0169, + "step": 26775 + }, + { + "epoch": 15.01, + "learning_rate": 3.3448101265822786e-06, + "loss": 0.0084, + "step": 26800 + }, + { + "epoch": 15.01, + "learning_rate": 3.338481012658228e-06, + "loss": 0.0153, + "step": 26825 + }, + { + "epoch": 15.01, + "learning_rate": 3.332151898734177e-06, + "loss": 0.012, + "step": 26850 + }, + { + "epoch": 15.02, + "learning_rate": 3.325822784810127e-06, + "loss": 0.0106, + "step": 26875 + }, + { + "epoch": 15.02, + "learning_rate": 3.3194936708860764e-06, + "loss": 0.0099, + "step": 26900 + }, + { + "epoch": 15.02, + "learning_rate": 3.3131645569620258e-06, + "loss": 0.008, + "step": 26925 + }, + { + "epoch": 15.02, + "learning_rate": 3.306835443037975e-06, + "loss": 0.0076, + "step": 26950 + }, + { + "epoch": 15.02, + "learning_rate": 3.3005063291139246e-06, + "loss": 0.0088, + "step": 26975 + }, + { + "epoch": 15.02, + "learning_rate": 3.2941772151898736e-06, + "loss": 0.009, + "step": 27000 + }, + { + "epoch": 15.02, + "eval_loss": 0.2693029046058655, + "eval_runtime": 1479.1374, + "eval_samples_per_second": 7.039, + "eval_steps_per_second": 0.44, + "eval_wer": 24.28819980812696, + "step": 27000 + }, + { + "epoch": 15.02, + "learning_rate": 3.287848101265823e-06, + "loss": 0.0115, + "step": 27025 + }, + { + "epoch": 15.02, + "learning_rate": 3.2815189873417725e-06, + "loss": 0.0099, + "step": 27050 + }, + { + "epoch": 15.02, + "learning_rate": 3.275189873417722e-06, + "loss": 0.0073, + "step": 27075 + }, + { + "epoch": 15.02, + "learning_rate": 3.268860759493671e-06, + "loss": 0.0061, + "step": 27100 + }, + { + "epoch": 15.02, + "learning_rate": 3.2625316455696204e-06, + "loss": 0.007, + "step": 27125 + }, + { + "epoch": 15.02, + "learning_rate": 3.2562025316455698e-06, + "loss": 0.0109, + "step": 27150 + }, + { + "epoch": 15.02, + "learning_rate": 3.2498734177215192e-06, + "loss": 0.0123, + "step": 27175 + }, + { + "epoch": 15.02, + "learning_rate": 3.2435443037974686e-06, + "loss": 0.0121, + "step": 27200 + }, + { + "epoch": 15.02, + "learning_rate": 3.2372151898734176e-06, + "loss": 0.0129, + "step": 27225 + }, + { + "epoch": 15.02, + "learning_rate": 3.230886075949367e-06, + "loss": 0.0128, + "step": 27250 + }, + { + "epoch": 15.03, + "learning_rate": 3.224556962025317e-06, + "loss": 0.0094, + "step": 27275 + }, + { + "epoch": 15.03, + "learning_rate": 3.2182278481012664e-06, + "loss": 0.0081, + "step": 27300 + }, + { + "epoch": 15.03, + "learning_rate": 3.2118987341772158e-06, + "loss": 0.0087, + "step": 27325 + }, + { + "epoch": 15.03, + "learning_rate": 3.2055696202531648e-06, + "loss": 0.0131, + "step": 27350 + }, + { + "epoch": 15.03, + "learning_rate": 3.199240506329114e-06, + "loss": 0.0073, + "step": 27375 + }, + { + "epoch": 15.03, + "learning_rate": 3.1929113924050636e-06, + "loss": 0.0086, + "step": 27400 + }, + { + "epoch": 15.03, + "learning_rate": 3.186582278481013e-06, + "loss": 0.0108, + "step": 27425 + }, + { + "epoch": 15.03, + "learning_rate": 3.1802531645569625e-06, + "loss": 0.01, + "step": 27450 + }, + { + "epoch": 15.03, + "learning_rate": 3.1739240506329115e-06, + "loss": 0.0123, + "step": 27475 + }, + { + "epoch": 15.03, + "learning_rate": 3.167594936708861e-06, + "loss": 0.0081, + "step": 27500 + }, + { + "epoch": 15.03, + "learning_rate": 3.1612658227848104e-06, + "loss": 0.0123, + "step": 27525 + }, + { + "epoch": 15.03, + "learning_rate": 3.1549367088607598e-06, + "loss": 0.0096, + "step": 27550 + }, + { + "epoch": 15.03, + "learning_rate": 3.1486075949367088e-06, + "loss": 0.0054, + "step": 27575 + }, + { + "epoch": 15.03, + "learning_rate": 3.1422784810126582e-06, + "loss": 0.006, + "step": 27600 + }, + { + "epoch": 15.03, + "learning_rate": 3.1359493670886076e-06, + "loss": 0.0089, + "step": 27625 + }, + { + "epoch": 15.03, + "learning_rate": 3.129620253164557e-06, + "loss": 0.0109, + "step": 27650 + }, + { + "epoch": 15.04, + "learning_rate": 3.1232911392405065e-06, + "loss": 0.0089, + "step": 27675 + }, + { + "epoch": 15.04, + "learning_rate": 3.1169620253164563e-06, + "loss": 0.0094, + "step": 27700 + }, + { + "epoch": 15.04, + "learning_rate": 3.1106329113924054e-06, + "loss": 0.0102, + "step": 27725 + }, + { + "epoch": 15.04, + "learning_rate": 3.1043037974683548e-06, + "loss": 0.0097, + "step": 27750 + }, + { + "epoch": 15.04, + "learning_rate": 3.097974683544304e-06, + "loss": 0.0078, + "step": 27775 + }, + { + "epoch": 15.04, + "learning_rate": 3.0916455696202536e-06, + "loss": 0.0073, + "step": 27800 + }, + { + "epoch": 15.04, + "learning_rate": 3.085316455696203e-06, + "loss": 0.0105, + "step": 27825 + }, + { + "epoch": 15.04, + "learning_rate": 3.078987341772152e-06, + "loss": 0.0083, + "step": 27850 + }, + { + "epoch": 15.04, + "learning_rate": 3.0726582278481015e-06, + "loss": 0.0138, + "step": 27875 + }, + { + "epoch": 15.04, + "learning_rate": 3.066329113924051e-06, + "loss": 0.0049, + "step": 27900 + }, + { + "epoch": 15.04, + "learning_rate": 3.0600000000000003e-06, + "loss": 0.0107, + "step": 27925 + }, + { + "epoch": 15.04, + "learning_rate": 3.0536708860759494e-06, + "loss": 0.0105, + "step": 27950 + }, + { + "epoch": 15.04, + "learning_rate": 3.0473417721518988e-06, + "loss": 0.0134, + "step": 27975 + }, + { + "epoch": 15.04, + "learning_rate": 3.041012658227848e-06, + "loss": 0.0137, + "step": 28000 + }, + { + "epoch": 15.04, + "eval_loss": 0.2780563235282898, + "eval_runtime": 1465.9542, + "eval_samples_per_second": 7.102, + "eval_steps_per_second": 0.444, + "eval_wer": 25.010162766874256, + "step": 28000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.29242971533312e+20, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-28000/training_args.bin b/checkpoint-28000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-28000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-29000/config.json b/checkpoint-29000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-29000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-29000/generation_config.json b/checkpoint-29000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-29000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-29000/optimizer.pt b/checkpoint-29000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..24ce11a6b160c4e057267533839714c0aa399d99 --- /dev/null +++ b/checkpoint-29000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce7939557a16c1c25ce84579857e602528fcd4ea02af2cd5b731f8a13b459064 +size 1934161093 diff --git a/checkpoint-29000/preprocessor_config.json b/checkpoint-29000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-29000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-29000/pytorch_model.bin b/checkpoint-29000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a2d00c62587615d9fae9507ff2b7a6d8e396775a --- /dev/null +++ b/checkpoint-29000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7444df663a546d6c2120fe65babae482f684ab0558fe3915cce190cab1477d56 +size 967102601 diff --git a/checkpoint-29000/rng_state.pth b/checkpoint-29000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e28afb61443d9b2ff5d4a7a89340c4de0277f6c2 --- /dev/null +++ b/checkpoint-29000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2fd54c298eb4d4c6c24b4b89087007e708bdaedc9d99ecdf4cd2674b9bed6bb +size 14575 diff --git a/checkpoint-29000/scaler.pt b/checkpoint-29000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f7f43d70c64fcb637a900d46aa583f9ba3d75882 --- /dev/null +++ b/checkpoint-29000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:720323e548863674bafe89e3dce30a258b668cfadf00060f926d478530329bc7 +size 557 diff --git a/checkpoint-29000/scheduler.pt b/checkpoint-29000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a351c8018966dcbcdbe1a9a5156cc38396392c87 --- /dev/null +++ b/checkpoint-29000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5dcb743a0a9a515c460142ba56c7dee612bee3100e88f44b04f67d36f13cc1b +size 627 diff --git a/checkpoint-29000/trainer_state.json b/checkpoint-29000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d6769064b5900567dfef89acd6d5999ed73156e5 --- /dev/null +++ b/checkpoint-29000/trainer_state.json @@ -0,0 +1,7237 @@ +{ + "best_metric": 23.278427291500677, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-29000", + "epoch": 16.0246, + "global_step": 29000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + }, + { + "epoch": 7.02, + "learning_rate": 6.830632911392405e-06, + "loss": 0.0159, + "step": 13025 + }, + { + "epoch": 7.02, + "learning_rate": 6.824303797468354e-06, + "loss": 0.0186, + "step": 13050 + }, + { + "epoch": 7.02, + "learning_rate": 6.817974683544304e-06, + "loss": 0.0246, + "step": 13075 + }, + { + "epoch": 7.02, + "learning_rate": 6.811645569620253e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 7.02, + "learning_rate": 6.805316455696203e-06, + "loss": 0.0287, + "step": 13125 + }, + { + "epoch": 7.02, + "learning_rate": 6.798987341772152e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 7.02, + "learning_rate": 6.792658227848102e-06, + "loss": 0.035, + "step": 13175 + }, + { + "epoch": 7.02, + "learning_rate": 6.786329113924052e-06, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 7.02, + "learning_rate": 6.780000000000001e-06, + "loss": 0.0283, + "step": 13225 + }, + { + "epoch": 7.02, + "learning_rate": 6.773670886075951e-06, + "loss": 0.0203, + "step": 13250 + }, + { + "epoch": 7.03, + "learning_rate": 6.7673417721519e-06, + "loss": 0.0193, + "step": 13275 + }, + { + "epoch": 7.03, + "learning_rate": 6.7610126582278494e-06, + "loss": 0.0174, + "step": 13300 + }, + { + "epoch": 7.03, + "learning_rate": 6.754683544303798e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 7.03, + "learning_rate": 6.7483544303797474e-06, + "loss": 0.023, + "step": 13350 + }, + { + "epoch": 7.03, + "learning_rate": 6.742025316455697e-06, + "loss": 0.0266, + "step": 13375 + }, + { + "epoch": 7.03, + "learning_rate": 6.735696202531646e-06, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 7.03, + "learning_rate": 6.729367088607596e-06, + "loss": 0.0164, + "step": 13425 + }, + { + "epoch": 7.03, + "learning_rate": 6.723037974683545e-06, + "loss": 0.019, + "step": 13450 + }, + { + "epoch": 7.03, + "learning_rate": 6.716708860759495e-06, + "loss": 0.0209, + "step": 13475 + }, + { + "epoch": 7.03, + "learning_rate": 6.710379746835444e-06, + "loss": 0.0241, + "step": 13500 + }, + { + "epoch": 7.03, + "learning_rate": 6.7040506329113934e-06, + "loss": 0.0233, + "step": 13525 + }, + { + "epoch": 7.03, + "learning_rate": 6.697721518987342e-06, + "loss": 0.0204, + "step": 13550 + }, + { + "epoch": 7.03, + "learning_rate": 6.6913924050632914e-06, + "loss": 0.0131, + "step": 13575 + }, + { + "epoch": 7.03, + "learning_rate": 6.685063291139241e-06, + "loss": 0.0268, + "step": 13600 + }, + { + "epoch": 7.03, + "learning_rate": 6.67873417721519e-06, + "loss": 0.0156, + "step": 13625 + }, + { + "epoch": 7.03, + "learning_rate": 6.67240506329114e-06, + "loss": 0.0205, + "step": 13650 + }, + { + "epoch": 7.04, + "learning_rate": 6.666075949367089e-06, + "loss": 0.0217, + "step": 13675 + }, + { + "epoch": 7.04, + "learning_rate": 6.659746835443039e-06, + "loss": 0.0258, + "step": 13700 + }, + { + "epoch": 7.04, + "learning_rate": 6.653417721518988e-06, + "loss": 0.0211, + "step": 13725 + }, + { + "epoch": 7.04, + "learning_rate": 6.647088607594937e-06, + "loss": 0.0188, + "step": 13750 + }, + { + "epoch": 7.04, + "learning_rate": 6.640759493670886e-06, + "loss": 0.0234, + "step": 13775 + }, + { + "epoch": 7.04, + "learning_rate": 6.6344303797468355e-06, + "loss": 0.0202, + "step": 13800 + }, + { + "epoch": 7.04, + "learning_rate": 6.628101265822785e-06, + "loss": 0.0251, + "step": 13825 + }, + { + "epoch": 7.04, + "learning_rate": 6.621772151898734e-06, + "loss": 0.017, + "step": 13850 + }, + { + "epoch": 7.04, + "learning_rate": 6.615443037974684e-06, + "loss": 0.0254, + "step": 13875 + }, + { + "epoch": 7.04, + "learning_rate": 6.609113924050633e-06, + "loss": 0.0289, + "step": 13900 + }, + { + "epoch": 7.04, + "learning_rate": 6.602784810126583e-06, + "loss": 0.0238, + "step": 13925 + }, + { + "epoch": 7.04, + "learning_rate": 6.596455696202532e-06, + "loss": 0.0218, + "step": 13950 + }, + { + "epoch": 7.04, + "learning_rate": 6.590126582278481e-06, + "loss": 0.0289, + "step": 13975 + }, + { + "epoch": 7.04, + "learning_rate": 6.583797468354432e-06, + "loss": 0.0271, + "step": 14000 + }, + { + "epoch": 7.04, + "eval_loss": 0.26684999465942383, + "eval_runtime": 1448.0228, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "eval_wer": 42.464105107400115, + "step": 14000 + }, + { + "epoch": 8.0, + "learning_rate": 6.577468354430381e-06, + "loss": 0.0292, + "step": 14025 + }, + { + "epoch": 8.0, + "learning_rate": 6.57113924050633e-06, + "loss": 0.0192, + "step": 14050 + }, + { + "epoch": 8.0, + "learning_rate": 6.564810126582279e-06, + "loss": 0.027, + "step": 14075 + }, + { + "epoch": 8.0, + "learning_rate": 6.558481012658229e-06, + "loss": 0.03, + "step": 14100 + }, + { + "epoch": 8.0, + "learning_rate": 6.552151898734178e-06, + "loss": 0.0375, + "step": 14125 + }, + { + "epoch": 8.0, + "learning_rate": 6.5458227848101274e-06, + "loss": 0.0338, + "step": 14150 + }, + { + "epoch": 8.0, + "learning_rate": 6.539493670886077e-06, + "loss": 0.0305, + "step": 14175 + }, + { + "epoch": 8.0, + "learning_rate": 6.533164556962026e-06, + "loss": 0.0266, + "step": 14200 + }, + { + "epoch": 8.01, + "learning_rate": 6.526835443037976e-06, + "loss": 0.0378, + "step": 14225 + }, + { + "epoch": 8.01, + "learning_rate": 6.520506329113925e-06, + "loss": 0.0256, + "step": 14250 + }, + { + "epoch": 8.01, + "learning_rate": 6.514177215189874e-06, + "loss": 0.0203, + "step": 14275 + }, + { + "epoch": 8.01, + "learning_rate": 6.507848101265823e-06, + "loss": 0.0212, + "step": 14300 + }, + { + "epoch": 8.01, + "learning_rate": 6.501518987341773e-06, + "loss": 0.0204, + "step": 14325 + }, + { + "epoch": 8.01, + "learning_rate": 6.495189873417722e-06, + "loss": 0.0259, + "step": 14350 + }, + { + "epoch": 8.01, + "learning_rate": 6.4888607594936714e-06, + "loss": 0.0214, + "step": 14375 + }, + { + "epoch": 8.01, + "learning_rate": 6.482531645569621e-06, + "loss": 0.021, + "step": 14400 + }, + { + "epoch": 8.01, + "learning_rate": 6.47620253164557e-06, + "loss": 0.0241, + "step": 14425 + }, + { + "epoch": 8.01, + "learning_rate": 6.46987341772152e-06, + "loss": 0.0216, + "step": 14450 + }, + { + "epoch": 8.01, + "learning_rate": 6.463544303797469e-06, + "loss": 0.0209, + "step": 14475 + }, + { + "epoch": 8.01, + "learning_rate": 6.457215189873418e-06, + "loss": 0.0292, + "step": 14500 + }, + { + "epoch": 8.01, + "learning_rate": 6.450886075949367e-06, + "loss": 0.0286, + "step": 14525 + }, + { + "epoch": 8.01, + "learning_rate": 6.444556962025317e-06, + "loss": 0.0218, + "step": 14550 + }, + { + "epoch": 8.01, + "learning_rate": 6.438227848101266e-06, + "loss": 0.0209, + "step": 14575 + }, + { + "epoch": 8.01, + "learning_rate": 6.4318987341772154e-06, + "loss": 0.0244, + "step": 14600 + }, + { + "epoch": 8.02, + "learning_rate": 6.425569620253165e-06, + "loss": 0.0212, + "step": 14625 + }, + { + "epoch": 8.02, + "learning_rate": 6.419240506329114e-06, + "loss": 0.0155, + "step": 14650 + }, + { + "epoch": 8.02, + "learning_rate": 6.412911392405064e-06, + "loss": 0.0209, + "step": 14675 + }, + { + "epoch": 8.02, + "learning_rate": 6.406582278481013e-06, + "loss": 0.0203, + "step": 14700 + }, + { + "epoch": 8.02, + "learning_rate": 6.400253164556962e-06, + "loss": 0.0192, + "step": 14725 + }, + { + "epoch": 8.02, + "learning_rate": 6.393924050632911e-06, + "loss": 0.0216, + "step": 14750 + }, + { + "epoch": 8.02, + "learning_rate": 6.387594936708861e-06, + "loss": 0.012, + "step": 14775 + }, + { + "epoch": 8.02, + "learning_rate": 6.38126582278481e-06, + "loss": 0.0241, + "step": 14800 + }, + { + "epoch": 8.02, + "learning_rate": 6.37493670886076e-06, + "loss": 0.0192, + "step": 14825 + }, + { + "epoch": 8.02, + "learning_rate": 6.36860759493671e-06, + "loss": 0.0202, + "step": 14850 + }, + { + "epoch": 8.02, + "learning_rate": 6.362278481012659e-06, + "loss": 0.0257, + "step": 14875 + }, + { + "epoch": 8.02, + "learning_rate": 6.3559493670886086e-06, + "loss": 0.0218, + "step": 14900 + }, + { + "epoch": 8.02, + "learning_rate": 6.349620253164558e-06, + "loss": 0.0232, + "step": 14925 + }, + { + "epoch": 8.02, + "learning_rate": 6.3432911392405074e-06, + "loss": 0.03, + "step": 14950 + }, + { + "epoch": 8.02, + "learning_rate": 6.337215189873418e-06, + "loss": 0.0252, + "step": 14975 + }, + { + "epoch": 8.02, + "learning_rate": 6.3308860759493675e-06, + "loss": 0.0176, + "step": 15000 + }, + { + "epoch": 8.02, + "eval_loss": 0.2596448063850403, + "eval_runtime": 1417.6481, + "eval_samples_per_second": 7.344, + "eval_steps_per_second": 0.459, + "eval_wer": 28.52241499861786, + "step": 15000 + }, + { + "epoch": 8.03, + "learning_rate": 6.324556962025317e-06, + "loss": 0.0166, + "step": 15025 + }, + { + "epoch": 8.03, + "learning_rate": 6.318227848101266e-06, + "loss": 0.017, + "step": 15050 + }, + { + "epoch": 8.03, + "learning_rate": 6.311898734177216e-06, + "loss": 0.0291, + "step": 15075 + }, + { + "epoch": 8.03, + "learning_rate": 6.305569620253164e-06, + "loss": 0.0149, + "step": 15100 + }, + { + "epoch": 8.03, + "learning_rate": 6.299240506329114e-06, + "loss": 0.0223, + "step": 15125 + }, + { + "epoch": 8.03, + "learning_rate": 6.292911392405063e-06, + "loss": 0.0212, + "step": 15150 + }, + { + "epoch": 8.03, + "learning_rate": 6.286582278481013e-06, + "loss": 0.0125, + "step": 15175 + }, + { + "epoch": 8.03, + "learning_rate": 6.280253164556962e-06, + "loss": 0.0169, + "step": 15200 + }, + { + "epoch": 8.03, + "learning_rate": 6.273924050632912e-06, + "loss": 0.0244, + "step": 15225 + }, + { + "epoch": 8.03, + "learning_rate": 6.267594936708862e-06, + "loss": 0.015, + "step": 15250 + }, + { + "epoch": 8.03, + "learning_rate": 6.261265822784811e-06, + "loss": 0.0171, + "step": 15275 + }, + { + "epoch": 8.03, + "learning_rate": 6.254936708860761e-06, + "loss": 0.017, + "step": 15300 + }, + { + "epoch": 8.03, + "learning_rate": 6.24860759493671e-06, + "loss": 0.0162, + "step": 15325 + }, + { + "epoch": 8.03, + "learning_rate": 6.2422784810126594e-06, + "loss": 0.0133, + "step": 15350 + }, + { + "epoch": 8.03, + "learning_rate": 6.235949367088608e-06, + "loss": 0.0139, + "step": 15375 + }, + { + "epoch": 8.03, + "learning_rate": 6.2296202531645575e-06, + "loss": 0.0177, + "step": 15400 + }, + { + "epoch": 8.04, + "learning_rate": 6.223291139240507e-06, + "loss": 0.0207, + "step": 15425 + }, + { + "epoch": 8.04, + "learning_rate": 6.216962025316456e-06, + "loss": 0.0192, + "step": 15450 + }, + { + "epoch": 8.04, + "learning_rate": 6.210632911392406e-06, + "loss": 0.0176, + "step": 15475 + }, + { + "epoch": 8.04, + "learning_rate": 6.204303797468355e-06, + "loss": 0.0133, + "step": 15500 + }, + { + "epoch": 8.04, + "learning_rate": 6.197974683544305e-06, + "loss": 0.0199, + "step": 15525 + }, + { + "epoch": 8.04, + "learning_rate": 6.191645569620254e-06, + "loss": 0.0135, + "step": 15550 + }, + { + "epoch": 8.04, + "learning_rate": 6.1853164556962035e-06, + "loss": 0.0121, + "step": 15575 + }, + { + "epoch": 8.04, + "learning_rate": 6.178987341772152e-06, + "loss": 0.0152, + "step": 15600 + }, + { + "epoch": 8.04, + "learning_rate": 6.1726582278481015e-06, + "loss": 0.0201, + "step": 15625 + }, + { + "epoch": 8.04, + "learning_rate": 6.166329113924051e-06, + "loss": 0.0226, + "step": 15650 + }, + { + "epoch": 8.04, + "learning_rate": 6.16e-06, + "loss": 0.0222, + "step": 15675 + }, + { + "epoch": 8.04, + "learning_rate": 6.15367088607595e-06, + "loss": 0.0184, + "step": 15700 + }, + { + "epoch": 8.04, + "learning_rate": 6.147341772151899e-06, + "loss": 0.0219, + "step": 15725 + }, + { + "epoch": 8.04, + "learning_rate": 6.141012658227849e-06, + "loss": 0.0219, + "step": 15750 + }, + { + "epoch": 9.0, + "learning_rate": 6.134683544303798e-06, + "loss": 0.0208, + "step": 15775 + }, + { + "epoch": 9.0, + "learning_rate": 6.1283544303797475e-06, + "loss": 0.0143, + "step": 15800 + }, + { + "epoch": 9.0, + "learning_rate": 6.122025316455696e-06, + "loss": 0.0254, + "step": 15825 + }, + { + "epoch": 9.0, + "learning_rate": 6.1156962025316455e-06, + "loss": 0.0287, + "step": 15850 + }, + { + "epoch": 9.0, + "learning_rate": 6.109367088607595e-06, + "loss": 0.0252, + "step": 15875 + }, + { + "epoch": 9.0, + "learning_rate": 6.103037974683544e-06, + "loss": 0.0275, + "step": 15900 + }, + { + "epoch": 9.0, + "learning_rate": 6.096708860759494e-06, + "loss": 0.0193, + "step": 15925 + }, + { + "epoch": 9.0, + "learning_rate": 6.090379746835443e-06, + "loss": 0.0255, + "step": 15950 + }, + { + "epoch": 9.01, + "learning_rate": 6.084050632911393e-06, + "loss": 0.0248, + "step": 15975 + }, + { + "epoch": 9.01, + "learning_rate": 6.077721518987342e-06, + "loss": 0.0328, + "step": 16000 + }, + { + "epoch": 9.01, + "eval_loss": 0.2512108087539673, + "eval_runtime": 1474.2883, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.442, + "eval_wer": 35.66236849379665, + "step": 16000 + }, + { + "epoch": 9.01, + "learning_rate": 6.0713924050632915e-06, + "loss": 0.0181, + "step": 16025 + }, + { + "epoch": 9.01, + "learning_rate": 6.065063291139242e-06, + "loss": 0.0134, + "step": 16050 + }, + { + "epoch": 9.01, + "learning_rate": 6.058734177215191e-06, + "loss": 0.0203, + "step": 16075 + }, + { + "epoch": 9.01, + "learning_rate": 6.052405063291141e-06, + "loss": 0.0224, + "step": 16100 + }, + { + "epoch": 9.01, + "learning_rate": 6.046075949367089e-06, + "loss": 0.0164, + "step": 16125 + }, + { + "epoch": 9.01, + "learning_rate": 6.039746835443039e-06, + "loss": 0.019, + "step": 16150 + }, + { + "epoch": 9.01, + "learning_rate": 6.033417721518988e-06, + "loss": 0.0152, + "step": 16175 + }, + { + "epoch": 9.01, + "learning_rate": 6.0270886075949374e-06, + "loss": 0.0197, + "step": 16200 + }, + { + "epoch": 9.01, + "learning_rate": 6.020759493670887e-06, + "loss": 0.0188, + "step": 16225 + }, + { + "epoch": 9.01, + "learning_rate": 6.014430379746836e-06, + "loss": 0.0245, + "step": 16250 + }, + { + "epoch": 9.01, + "learning_rate": 6.008101265822786e-06, + "loss": 0.0202, + "step": 16275 + }, + { + "epoch": 9.01, + "learning_rate": 6.001772151898735e-06, + "loss": 0.0184, + "step": 16300 + }, + { + "epoch": 9.01, + "learning_rate": 5.995443037974685e-06, + "loss": 0.0158, + "step": 16325 + }, + { + "epoch": 9.01, + "learning_rate": 5.989113924050633e-06, + "loss": 0.0175, + "step": 16350 + }, + { + "epoch": 9.02, + "learning_rate": 5.982784810126583e-06, + "loss": 0.014, + "step": 16375 + }, + { + "epoch": 9.02, + "learning_rate": 5.976455696202532e-06, + "loss": 0.0149, + "step": 16400 + }, + { + "epoch": 9.02, + "learning_rate": 5.9701265822784815e-06, + "loss": 0.0188, + "step": 16425 + }, + { + "epoch": 9.02, + "learning_rate": 5.963797468354431e-06, + "loss": 0.0215, + "step": 16450 + }, + { + "epoch": 9.02, + "learning_rate": 5.95746835443038e-06, + "loss": 0.017, + "step": 16475 + }, + { + "epoch": 9.02, + "learning_rate": 5.95113924050633e-06, + "loss": 0.0095, + "step": 16500 + }, + { + "epoch": 9.02, + "learning_rate": 5.944810126582279e-06, + "loss": 0.0112, + "step": 16525 + }, + { + "epoch": 9.02, + "learning_rate": 5.938481012658229e-06, + "loss": 0.0198, + "step": 16550 + }, + { + "epoch": 9.02, + "learning_rate": 5.932151898734177e-06, + "loss": 0.0252, + "step": 16575 + }, + { + "epoch": 9.02, + "learning_rate": 5.925822784810127e-06, + "loss": 0.0226, + "step": 16600 + }, + { + "epoch": 9.02, + "learning_rate": 5.919493670886076e-06, + "loss": 0.0167, + "step": 16625 + }, + { + "epoch": 9.02, + "learning_rate": 5.9131645569620255e-06, + "loss": 0.0259, + "step": 16650 + }, + { + "epoch": 9.02, + "learning_rate": 5.906835443037975e-06, + "loss": 0.0229, + "step": 16675 + }, + { + "epoch": 9.02, + "learning_rate": 5.900506329113924e-06, + "loss": 0.0238, + "step": 16700 + }, + { + "epoch": 9.02, + "learning_rate": 5.894177215189874e-06, + "loss": 0.02, + "step": 16725 + }, + { + "epoch": 9.02, + "learning_rate": 5.887848101265823e-06, + "loss": 0.0182, + "step": 16750 + }, + { + "epoch": 9.03, + "learning_rate": 5.881518987341773e-06, + "loss": 0.0157, + "step": 16775 + }, + { + "epoch": 9.03, + "learning_rate": 5.875189873417721e-06, + "loss": 0.0145, + "step": 16800 + }, + { + "epoch": 9.03, + "learning_rate": 5.868860759493671e-06, + "loss": 0.0166, + "step": 16825 + }, + { + "epoch": 9.03, + "learning_rate": 5.86253164556962e-06, + "loss": 0.0108, + "step": 16850 + }, + { + "epoch": 9.03, + "learning_rate": 5.85620253164557e-06, + "loss": 0.014, + "step": 16875 + }, + { + "epoch": 9.03, + "learning_rate": 5.84987341772152e-06, + "loss": 0.0167, + "step": 16900 + }, + { + "epoch": 9.03, + "learning_rate": 5.843544303797469e-06, + "loss": 0.0121, + "step": 16925 + }, + { + "epoch": 9.03, + "learning_rate": 5.837215189873419e-06, + "loss": 0.0149, + "step": 16950 + }, + { + "epoch": 9.03, + "learning_rate": 5.831139240506329e-06, + "loss": 0.0143, + "step": 16975 + }, + { + "epoch": 9.03, + "learning_rate": 5.824810126582279e-06, + "loss": 0.0139, + "step": 17000 + }, + { + "epoch": 9.03, + "eval_loss": 0.2688957154750824, + "eval_runtime": 1423.1196, + "eval_samples_per_second": 7.316, + "eval_steps_per_second": 0.457, + "eval_wer": 25.920746678807788, + "step": 17000 + }, + { + "epoch": 9.03, + "learning_rate": 5.818481012658228e-06, + "loss": 0.0169, + "step": 17025 + }, + { + "epoch": 9.03, + "learning_rate": 5.8121518987341775e-06, + "loss": 0.013, + "step": 17050 + }, + { + "epoch": 9.03, + "learning_rate": 5.805822784810127e-06, + "loss": 0.0123, + "step": 17075 + }, + { + "epoch": 9.03, + "learning_rate": 5.799493670886076e-06, + "loss": 0.0161, + "step": 17100 + }, + { + "epoch": 9.03, + "learning_rate": 5.793164556962026e-06, + "loss": 0.0114, + "step": 17125 + }, + { + "epoch": 9.03, + "learning_rate": 5.786835443037974e-06, + "loss": 0.0194, + "step": 17150 + }, + { + "epoch": 9.04, + "learning_rate": 5.780506329113924e-06, + "loss": 0.0179, + "step": 17175 + }, + { + "epoch": 9.04, + "learning_rate": 5.774177215189873e-06, + "loss": 0.0165, + "step": 17200 + }, + { + "epoch": 9.04, + "learning_rate": 5.767848101265823e-06, + "loss": 0.0165, + "step": 17225 + }, + { + "epoch": 9.04, + "learning_rate": 5.761518987341773e-06, + "loss": 0.0125, + "step": 17250 + }, + { + "epoch": 9.04, + "learning_rate": 5.755189873417722e-06, + "loss": 0.0172, + "step": 17275 + }, + { + "epoch": 9.04, + "learning_rate": 5.748860759493672e-06, + "loss": 0.0158, + "step": 17300 + }, + { + "epoch": 9.04, + "learning_rate": 5.742531645569621e-06, + "loss": 0.0174, + "step": 17325 + }, + { + "epoch": 9.04, + "learning_rate": 5.736202531645571e-06, + "loss": 0.0162, + "step": 17350 + }, + { + "epoch": 9.04, + "learning_rate": 5.72987341772152e-06, + "loss": 0.0203, + "step": 17375 + }, + { + "epoch": 9.04, + "learning_rate": 5.7235443037974695e-06, + "loss": 0.0157, + "step": 17400 + }, + { + "epoch": 9.04, + "learning_rate": 5.717215189873419e-06, + "loss": 0.018, + "step": 17425 + }, + { + "epoch": 9.04, + "learning_rate": 5.7108860759493675e-06, + "loss": 0.0199, + "step": 17450 + }, + { + "epoch": 9.04, + "learning_rate": 5.704556962025317e-06, + "loss": 0.0153, + "step": 17475 + }, + { + "epoch": 9.04, + "learning_rate": 5.698227848101266e-06, + "loss": 0.0234, + "step": 17500 + }, + { + "epoch": 10.0, + "learning_rate": 5.691898734177216e-06, + "loss": 0.0168, + "step": 17525 + }, + { + "epoch": 10.0, + "learning_rate": 5.685569620253165e-06, + "loss": 0.0214, + "step": 17550 + }, + { + "epoch": 10.0, + "learning_rate": 5.679240506329115e-06, + "loss": 0.0185, + "step": 17575 + }, + { + "epoch": 10.0, + "learning_rate": 5.672911392405064e-06, + "loss": 0.0228, + "step": 17600 + }, + { + "epoch": 10.0, + "learning_rate": 5.6665822784810135e-06, + "loss": 0.024, + "step": 17625 + }, + { + "epoch": 10.0, + "learning_rate": 5.660253164556963e-06, + "loss": 0.0235, + "step": 17650 + }, + { + "epoch": 10.0, + "learning_rate": 5.6539240506329115e-06, + "loss": 0.0262, + "step": 17675 + }, + { + "epoch": 10.0, + "learning_rate": 5.647594936708861e-06, + "loss": 0.0214, + "step": 17700 + }, + { + "epoch": 10.01, + "learning_rate": 5.64126582278481e-06, + "loss": 0.0279, + "step": 17725 + }, + { + "epoch": 10.01, + "learning_rate": 5.63493670886076e-06, + "loss": 0.0248, + "step": 17750 + }, + { + "epoch": 10.01, + "learning_rate": 5.628607594936709e-06, + "loss": 0.0155, + "step": 17775 + }, + { + "epoch": 10.01, + "learning_rate": 5.622278481012659e-06, + "loss": 0.017, + "step": 17800 + }, + { + "epoch": 10.01, + "learning_rate": 5.615949367088608e-06, + "loss": 0.0205, + "step": 17825 + }, + { + "epoch": 10.01, + "learning_rate": 5.6096202531645575e-06, + "loss": 0.021, + "step": 17850 + }, + { + "epoch": 10.01, + "learning_rate": 5.603291139240507e-06, + "loss": 0.0178, + "step": 17875 + }, + { + "epoch": 10.01, + "learning_rate": 5.5969620253164555e-06, + "loss": 0.0169, + "step": 17900 + }, + { + "epoch": 10.01, + "learning_rate": 5.590632911392405e-06, + "loss": 0.0158, + "step": 17925 + }, + { + "epoch": 10.01, + "learning_rate": 5.584303797468354e-06, + "loss": 0.0195, + "step": 17950 + }, + { + "epoch": 10.01, + "learning_rate": 5.577974683544304e-06, + "loss": 0.0195, + "step": 17975 + }, + { + "epoch": 10.01, + "learning_rate": 5.571645569620253e-06, + "loss": 0.0187, + "step": 18000 + }, + { + "epoch": 10.01, + "eval_loss": 0.2605174481868744, + "eval_runtime": 1553.7854, + "eval_samples_per_second": 6.7, + "eval_steps_per_second": 0.419, + "eval_wer": 25.330493178750874, + "step": 18000 + }, + { + "epoch": 10.01, + "learning_rate": 5.565316455696203e-06, + "loss": 0.0214, + "step": 18025 + }, + { + "epoch": 10.01, + "learning_rate": 5.558987341772152e-06, + "loss": 0.0193, + "step": 18050 + }, + { + "epoch": 10.01, + "learning_rate": 5.552658227848102e-06, + "loss": 0.0168, + "step": 18075 + }, + { + "epoch": 10.01, + "learning_rate": 5.546329113924052e-06, + "loss": 0.0147, + "step": 18100 + }, + { + "epoch": 10.02, + "learning_rate": 5.540000000000001e-06, + "loss": 0.0192, + "step": 18125 + }, + { + "epoch": 10.02, + "learning_rate": 5.533670886075951e-06, + "loss": 0.014, + "step": 18150 + }, + { + "epoch": 10.02, + "learning_rate": 5.527341772151899e-06, + "loss": 0.0163, + "step": 18175 + }, + { + "epoch": 10.02, + "learning_rate": 5.521012658227849e-06, + "loss": 0.0139, + "step": 18200 + }, + { + "epoch": 10.02, + "learning_rate": 5.514683544303798e-06, + "loss": 0.0168, + "step": 18225 + }, + { + "epoch": 10.02, + "learning_rate": 5.5083544303797475e-06, + "loss": 0.0118, + "step": 18250 + }, + { + "epoch": 10.02, + "learning_rate": 5.502025316455697e-06, + "loss": 0.0119, + "step": 18275 + }, + { + "epoch": 10.02, + "learning_rate": 5.495696202531646e-06, + "loss": 0.0136, + "step": 18300 + }, + { + "epoch": 10.02, + "learning_rate": 5.489367088607596e-06, + "loss": 0.0132, + "step": 18325 + }, + { + "epoch": 10.02, + "learning_rate": 5.483037974683545e-06, + "loss": 0.0148, + "step": 18350 + }, + { + "epoch": 10.02, + "learning_rate": 5.476708860759495e-06, + "loss": 0.015, + "step": 18375 + }, + { + "epoch": 10.02, + "learning_rate": 5.470379746835443e-06, + "loss": 0.0154, + "step": 18400 + }, + { + "epoch": 10.02, + "learning_rate": 5.464050632911393e-06, + "loss": 0.0139, + "step": 18425 + }, + { + "epoch": 10.02, + "learning_rate": 5.457721518987342e-06, + "loss": 0.0239, + "step": 18450 + }, + { + "epoch": 10.02, + "learning_rate": 5.4513924050632915e-06, + "loss": 0.0203, + "step": 18475 + }, + { + "epoch": 10.02, + "learning_rate": 5.445063291139241e-06, + "loss": 0.0199, + "step": 18500 + }, + { + "epoch": 10.03, + "learning_rate": 5.43873417721519e-06, + "loss": 0.013, + "step": 18525 + }, + { + "epoch": 10.03, + "learning_rate": 5.43240506329114e-06, + "loss": 0.0128, + "step": 18550 + }, + { + "epoch": 10.03, + "learning_rate": 5.426075949367089e-06, + "loss": 0.0161, + "step": 18575 + }, + { + "epoch": 10.03, + "learning_rate": 5.419746835443039e-06, + "loss": 0.0129, + "step": 18600 + }, + { + "epoch": 10.03, + "learning_rate": 5.413417721518987e-06, + "loss": 0.0124, + "step": 18625 + }, + { + "epoch": 10.03, + "learning_rate": 5.407088607594937e-06, + "loss": 0.0161, + "step": 18650 + }, + { + "epoch": 10.03, + "learning_rate": 5.400759493670886e-06, + "loss": 0.0104, + "step": 18675 + }, + { + "epoch": 10.03, + "learning_rate": 5.3944303797468355e-06, + "loss": 0.0109, + "step": 18700 + }, + { + "epoch": 10.03, + "learning_rate": 5.388101265822785e-06, + "loss": 0.0128, + "step": 18725 + }, + { + "epoch": 10.03, + "learning_rate": 5.381772151898734e-06, + "loss": 0.014, + "step": 18750 + }, + { + "epoch": 10.03, + "learning_rate": 5.375443037974684e-06, + "loss": 0.0159, + "step": 18775 + }, + { + "epoch": 10.03, + "learning_rate": 5.369113924050633e-06, + "loss": 0.017, + "step": 18800 + }, + { + "epoch": 10.03, + "learning_rate": 5.362784810126583e-06, + "loss": 0.012, + "step": 18825 + }, + { + "epoch": 10.03, + "learning_rate": 5.356455696202531e-06, + "loss": 0.0165, + "step": 18850 + }, + { + "epoch": 10.03, + "learning_rate": 5.350126582278481e-06, + "loss": 0.0153, + "step": 18875 + }, + { + "epoch": 10.03, + "learning_rate": 5.343797468354432e-06, + "loss": 0.0182, + "step": 18900 + }, + { + "epoch": 10.04, + "learning_rate": 5.33746835443038e-06, + "loss": 0.0194, + "step": 18925 + }, + { + "epoch": 10.04, + "learning_rate": 5.33113924050633e-06, + "loss": 0.0144, + "step": 18950 + }, + { + "epoch": 10.04, + "learning_rate": 5.324810126582279e-06, + "loss": 0.0157, + "step": 18975 + }, + { + "epoch": 10.04, + "learning_rate": 5.318481012658229e-06, + "loss": 0.0111, + "step": 19000 + }, + { + "epoch": 10.04, + "eval_loss": 0.2696678638458252, + "eval_runtime": 1463.5986, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.445, + "eval_wer": 24.79877721588969, + "step": 19000 + }, + { + "epoch": 10.04, + "learning_rate": 5.312151898734178e-06, + "loss": 0.0176, + "step": 19025 + }, + { + "epoch": 10.04, + "learning_rate": 5.3058227848101275e-06, + "loss": 0.0144, + "step": 19050 + }, + { + "epoch": 10.04, + "learning_rate": 5.299493670886077e-06, + "loss": 0.0129, + "step": 19075 + }, + { + "epoch": 10.04, + "learning_rate": 5.293164556962026e-06, + "loss": 0.0162, + "step": 19100 + }, + { + "epoch": 10.04, + "learning_rate": 5.286835443037976e-06, + "loss": 0.0128, + "step": 19125 + }, + { + "epoch": 10.04, + "learning_rate": 5.280506329113924e-06, + "loss": 0.0152, + "step": 19150 + }, + { + "epoch": 10.04, + "learning_rate": 5.274177215189874e-06, + "loss": 0.0183, + "step": 19175 + }, + { + "epoch": 10.04, + "learning_rate": 5.267848101265823e-06, + "loss": 0.0198, + "step": 19200 + }, + { + "epoch": 10.04, + "learning_rate": 5.261518987341773e-06, + "loss": 0.0179, + "step": 19225 + }, + { + "epoch": 10.04, + "learning_rate": 5.255443037974683e-06, + "loss": 0.019, + "step": 19250 + }, + { + "epoch": 11.0, + "learning_rate": 5.249113924050633e-06, + "loss": 0.0159, + "step": 19275 + }, + { + "epoch": 11.0, + "learning_rate": 5.242784810126583e-06, + "loss": 0.0156, + "step": 19300 + }, + { + "epoch": 11.0, + "learning_rate": 5.236455696202532e-06, + "loss": 0.015, + "step": 19325 + }, + { + "epoch": 11.0, + "learning_rate": 5.230126582278482e-06, + "loss": 0.0162, + "step": 19350 + }, + { + "epoch": 11.0, + "learning_rate": 5.223797468354431e-06, + "loss": 0.0262, + "step": 19375 + }, + { + "epoch": 11.0, + "learning_rate": 5.217468354430381e-06, + "loss": 0.0207, + "step": 19400 + }, + { + "epoch": 11.0, + "learning_rate": 5.21113924050633e-06, + "loss": 0.0222, + "step": 19425 + }, + { + "epoch": 11.0, + "learning_rate": 5.2048101265822795e-06, + "loss": 0.016, + "step": 19450 + }, + { + "epoch": 11.01, + "learning_rate": 5.198481012658229e-06, + "loss": 0.0176, + "step": 19475 + }, + { + "epoch": 11.01, + "learning_rate": 5.192151898734178e-06, + "loss": 0.0209, + "step": 19500 + }, + { + "epoch": 11.01, + "learning_rate": 5.185822784810127e-06, + "loss": 0.0212, + "step": 19525 + }, + { + "epoch": 11.01, + "learning_rate": 5.179493670886076e-06, + "loss": 0.0143, + "step": 19550 + }, + { + "epoch": 11.01, + "learning_rate": 5.173164556962026e-06, + "loss": 0.0224, + "step": 19575 + }, + { + "epoch": 11.01, + "learning_rate": 5.166835443037975e-06, + "loss": 0.0171, + "step": 19600 + }, + { + "epoch": 11.01, + "learning_rate": 5.160506329113925e-06, + "loss": 0.0166, + "step": 19625 + }, + { + "epoch": 11.01, + "learning_rate": 5.154177215189874e-06, + "loss": 0.0135, + "step": 19650 + }, + { + "epoch": 11.01, + "learning_rate": 5.1478481012658235e-06, + "loss": 0.0164, + "step": 19675 + }, + { + "epoch": 11.01, + "learning_rate": 5.141518987341773e-06, + "loss": 0.0154, + "step": 19700 + }, + { + "epoch": 11.01, + "learning_rate": 5.135189873417722e-06, + "loss": 0.0183, + "step": 19725 + }, + { + "epoch": 11.01, + "learning_rate": 5.128860759493671e-06, + "loss": 0.0149, + "step": 19750 + }, + { + "epoch": 11.01, + "learning_rate": 5.12253164556962e-06, + "loss": 0.0176, + "step": 19775 + }, + { + "epoch": 11.01, + "learning_rate": 5.11620253164557e-06, + "loss": 0.0135, + "step": 19800 + }, + { + "epoch": 11.01, + "learning_rate": 5.109873417721519e-06, + "loss": 0.0121, + "step": 19825 + }, + { + "epoch": 11.01, + "learning_rate": 5.103544303797469e-06, + "loss": 0.0158, + "step": 19850 + }, + { + "epoch": 11.02, + "learning_rate": 5.097215189873418e-06, + "loss": 0.0191, + "step": 19875 + }, + { + "epoch": 11.02, + "learning_rate": 5.0908860759493675e-06, + "loss": 0.0127, + "step": 19900 + }, + { + "epoch": 11.02, + "learning_rate": 5.084556962025317e-06, + "loss": 0.0149, + "step": 19925 + }, + { + "epoch": 11.02, + "learning_rate": 5.0782278481012655e-06, + "loss": 0.0152, + "step": 19950 + }, + { + "epoch": 11.02, + "learning_rate": 5.071898734177215e-06, + "loss": 0.0139, + "step": 19975 + }, + { + "epoch": 11.02, + "learning_rate": 5.065569620253164e-06, + "loss": 0.015, + "step": 20000 + }, + { + "epoch": 11.02, + "eval_loss": 0.2623814344406128, + "eval_runtime": 1462.1621, + "eval_samples_per_second": 7.12, + "eval_steps_per_second": 0.445, + "eval_wer": 24.392266540919362, + "step": 20000 + }, + { + "epoch": 11.02, + "learning_rate": 5.059240506329114e-06, + "loss": 0.0094, + "step": 20025 + }, + { + "epoch": 11.02, + "learning_rate": 5.052911392405063e-06, + "loss": 0.0108, + "step": 20050 + }, + { + "epoch": 11.02, + "learning_rate": 5.046582278481013e-06, + "loss": 0.0155, + "step": 20075 + }, + { + "epoch": 11.02, + "learning_rate": 5.040253164556962e-06, + "loss": 0.0103, + "step": 20100 + }, + { + "epoch": 11.02, + "learning_rate": 5.033924050632912e-06, + "loss": 0.0109, + "step": 20125 + }, + { + "epoch": 11.02, + "learning_rate": 5.027594936708862e-06, + "loss": 0.0188, + "step": 20150 + }, + { + "epoch": 11.02, + "learning_rate": 5.021265822784811e-06, + "loss": 0.0178, + "step": 20175 + }, + { + "epoch": 11.02, + "learning_rate": 5.014936708860761e-06, + "loss": 0.0262, + "step": 20200 + }, + { + "epoch": 11.02, + "learning_rate": 5.00860759493671e-06, + "loss": 0.0194, + "step": 20225 + }, + { + "epoch": 11.02, + "learning_rate": 5.002278481012659e-06, + "loss": 0.0107, + "step": 20250 + }, + { + "epoch": 11.03, + "learning_rate": 4.995949367088608e-06, + "loss": 0.0147, + "step": 20275 + }, + { + "epoch": 11.03, + "learning_rate": 4.9896202531645575e-06, + "loss": 0.0084, + "step": 20300 + }, + { + "epoch": 11.03, + "learning_rate": 4.983291139240507e-06, + "loss": 0.0214, + "step": 20325 + }, + { + "epoch": 11.03, + "learning_rate": 4.976962025316456e-06, + "loss": 0.014, + "step": 20350 + }, + { + "epoch": 11.03, + "learning_rate": 4.970632911392406e-06, + "loss": 0.0129, + "step": 20375 + }, + { + "epoch": 11.03, + "learning_rate": 4.964303797468355e-06, + "loss": 0.0145, + "step": 20400 + }, + { + "epoch": 11.03, + "learning_rate": 4.957974683544305e-06, + "loss": 0.0109, + "step": 20425 + }, + { + "epoch": 11.03, + "learning_rate": 4.951645569620254e-06, + "loss": 0.0117, + "step": 20450 + }, + { + "epoch": 11.03, + "learning_rate": 4.945316455696203e-06, + "loss": 0.0137, + "step": 20475 + }, + { + "epoch": 11.03, + "learning_rate": 4.938987341772152e-06, + "loss": 0.0086, + "step": 20500 + }, + { + "epoch": 11.03, + "learning_rate": 4.9326582278481015e-06, + "loss": 0.0128, + "step": 20525 + }, + { + "epoch": 11.03, + "learning_rate": 4.926329113924051e-06, + "loss": 0.0101, + "step": 20550 + }, + { + "epoch": 11.03, + "learning_rate": 4.92e-06, + "loss": 0.0132, + "step": 20575 + }, + { + "epoch": 11.03, + "learning_rate": 4.91367088607595e-06, + "loss": 0.0147, + "step": 20600 + }, + { + "epoch": 11.03, + "learning_rate": 4.907341772151899e-06, + "loss": 0.0077, + "step": 20625 + }, + { + "epoch": 11.03, + "learning_rate": 4.901012658227849e-06, + "loss": 0.0125, + "step": 20650 + }, + { + "epoch": 11.04, + "learning_rate": 4.894683544303798e-06, + "loss": 0.0115, + "step": 20675 + }, + { + "epoch": 11.04, + "learning_rate": 4.888354430379747e-06, + "loss": 0.0136, + "step": 20700 + }, + { + "epoch": 11.04, + "learning_rate": 4.882025316455696e-06, + "loss": 0.0098, + "step": 20725 + }, + { + "epoch": 11.04, + "learning_rate": 4.875696202531646e-06, + "loss": 0.0154, + "step": 20750 + }, + { + "epoch": 11.04, + "learning_rate": 4.869367088607596e-06, + "loss": 0.0126, + "step": 20775 + }, + { + "epoch": 11.04, + "learning_rate": 4.863037974683545e-06, + "loss": 0.0116, + "step": 20800 + }, + { + "epoch": 11.04, + "learning_rate": 4.856708860759495e-06, + "loss": 0.0133, + "step": 20825 + }, + { + "epoch": 11.04, + "learning_rate": 4.850379746835443e-06, + "loss": 0.0106, + "step": 20850 + }, + { + "epoch": 11.04, + "learning_rate": 4.844050632911393e-06, + "loss": 0.0151, + "step": 20875 + }, + { + "epoch": 11.04, + "learning_rate": 4.837721518987342e-06, + "loss": 0.0148, + "step": 20900 + }, + { + "epoch": 11.04, + "learning_rate": 4.8313924050632915e-06, + "loss": 0.0173, + "step": 20925 + }, + { + "epoch": 11.04, + "learning_rate": 4.825063291139241e-06, + "loss": 0.0126, + "step": 20950 + }, + { + "epoch": 11.04, + "learning_rate": 4.81873417721519e-06, + "loss": 0.0163, + "step": 20975 + }, + { + "epoch": 11.04, + "learning_rate": 4.81240506329114e-06, + "loss": 0.0203, + "step": 21000 + }, + { + "epoch": 11.04, + "eval_loss": 0.27788785099983215, + "eval_runtime": 1546.852, + "eval_samples_per_second": 6.73, + "eval_steps_per_second": 0.421, + "eval_wer": 25.284963983154196, + "step": 21000 + }, + { + "epoch": 12.0, + "learning_rate": 4.806075949367089e-06, + "loss": 0.0176, + "step": 21025 + }, + { + "epoch": 12.0, + "learning_rate": 4.799746835443039e-06, + "loss": 0.0157, + "step": 21050 + }, + { + "epoch": 12.0, + "learning_rate": 4.793417721518987e-06, + "loss": 0.0192, + "step": 21075 + }, + { + "epoch": 12.0, + "learning_rate": 4.787088607594937e-06, + "loss": 0.0124, + "step": 21100 + }, + { + "epoch": 12.0, + "learning_rate": 4.780759493670886e-06, + "loss": 0.0194, + "step": 21125 + }, + { + "epoch": 12.0, + "learning_rate": 4.774430379746836e-06, + "loss": 0.0138, + "step": 21150 + }, + { + "epoch": 12.0, + "learning_rate": 4.768101265822786e-06, + "loss": 0.0191, + "step": 21175 + }, + { + "epoch": 12.0, + "learning_rate": 4.761772151898735e-06, + "loss": 0.0124, + "step": 21200 + }, + { + "epoch": 12.01, + "learning_rate": 4.755443037974684e-06, + "loss": 0.0175, + "step": 21225 + }, + { + "epoch": 12.01, + "learning_rate": 4.749113924050633e-06, + "loss": 0.0118, + "step": 21250 + }, + { + "epoch": 12.01, + "learning_rate": 4.742784810126583e-06, + "loss": 0.0172, + "step": 21275 + }, + { + "epoch": 12.01, + "learning_rate": 4.736455696202532e-06, + "loss": 0.0136, + "step": 21300 + }, + { + "epoch": 12.01, + "learning_rate": 4.7301265822784815e-06, + "loss": 0.018, + "step": 21325 + }, + { + "epoch": 12.01, + "learning_rate": 4.723797468354431e-06, + "loss": 0.0184, + "step": 21350 + }, + { + "epoch": 12.01, + "learning_rate": 4.71746835443038e-06, + "loss": 0.0166, + "step": 21375 + }, + { + "epoch": 12.01, + "learning_rate": 4.71113924050633e-06, + "loss": 0.0107, + "step": 21400 + }, + { + "epoch": 12.01, + "learning_rate": 4.704810126582279e-06, + "loss": 0.0125, + "step": 21425 + }, + { + "epoch": 12.01, + "learning_rate": 4.698481012658228e-06, + "loss": 0.0098, + "step": 21450 + }, + { + "epoch": 12.01, + "learning_rate": 4.692151898734177e-06, + "loss": 0.0103, + "step": 21475 + }, + { + "epoch": 12.01, + "learning_rate": 4.685822784810127e-06, + "loss": 0.0211, + "step": 21500 + }, + { + "epoch": 12.01, + "learning_rate": 4.679493670886076e-06, + "loss": 0.0125, + "step": 21525 + }, + { + "epoch": 12.01, + "learning_rate": 4.6731645569620255e-06, + "loss": 0.0083, + "step": 21550 + }, + { + "epoch": 12.01, + "learning_rate": 4.666835443037975e-06, + "loss": 0.0156, + "step": 21575 + }, + { + "epoch": 12.01, + "learning_rate": 4.660506329113924e-06, + "loss": 0.0137, + "step": 21600 + }, + { + "epoch": 12.02, + "learning_rate": 4.654177215189874e-06, + "loss": 0.0137, + "step": 21625 + }, + { + "epoch": 12.02, + "learning_rate": 4.647848101265823e-06, + "loss": 0.0092, + "step": 21650 + }, + { + "epoch": 12.02, + "learning_rate": 4.641518987341773e-06, + "loss": 0.0101, + "step": 21675 + }, + { + "epoch": 12.02, + "learning_rate": 4.635189873417722e-06, + "loss": 0.0118, + "step": 21700 + }, + { + "epoch": 12.02, + "learning_rate": 4.6288607594936715e-06, + "loss": 0.0119, + "step": 21725 + }, + { + "epoch": 12.02, + "learning_rate": 4.622531645569621e-06, + "loss": 0.009, + "step": 21750 + }, + { + "epoch": 12.02, + "learning_rate": 4.61620253164557e-06, + "loss": 0.014, + "step": 21775 + }, + { + "epoch": 12.02, + "learning_rate": 4.609873417721519e-06, + "loss": 0.0108, + "step": 21800 + }, + { + "epoch": 12.02, + "learning_rate": 4.603544303797468e-06, + "loss": 0.0117, + "step": 21825 + }, + { + "epoch": 12.02, + "learning_rate": 4.597215189873418e-06, + "loss": 0.0147, + "step": 21850 + }, + { + "epoch": 12.02, + "learning_rate": 4.590886075949367e-06, + "loss": 0.0133, + "step": 21875 + }, + { + "epoch": 12.02, + "learning_rate": 4.584556962025317e-06, + "loss": 0.0131, + "step": 21900 + }, + { + "epoch": 12.02, + "learning_rate": 4.578227848101266e-06, + "loss": 0.0156, + "step": 21925 + }, + { + "epoch": 12.02, + "learning_rate": 4.5718987341772155e-06, + "loss": 0.0211, + "step": 21950 + }, + { + "epoch": 12.02, + "learning_rate": 4.565569620253165e-06, + "loss": 0.0127, + "step": 21975 + }, + { + "epoch": 12.02, + "learning_rate": 4.559240506329114e-06, + "loss": 0.011, + "step": 22000 + }, + { + "epoch": 12.02, + "eval_loss": 0.26152506470680237, + "eval_runtime": 1497.9019, + "eval_samples_per_second": 6.95, + "eval_steps_per_second": 0.435, + "eval_wer": 26.236198962584755, + "step": 22000 + }, + { + "epoch": 12.03, + "learning_rate": 4.552911392405064e-06, + "loss": 0.0164, + "step": 22025 + }, + { + "epoch": 12.03, + "learning_rate": 4.546582278481013e-06, + "loss": 0.01, + "step": 22050 + }, + { + "epoch": 12.03, + "learning_rate": 4.540253164556963e-06, + "loss": 0.0127, + "step": 22075 + }, + { + "epoch": 12.03, + "learning_rate": 4.533924050632912e-06, + "loss": 0.0089, + "step": 22100 + }, + { + "epoch": 12.03, + "learning_rate": 4.5275949367088615e-06, + "loss": 0.0077, + "step": 22125 + }, + { + "epoch": 12.03, + "learning_rate": 4.521265822784811e-06, + "loss": 0.0098, + "step": 22150 + }, + { + "epoch": 12.03, + "learning_rate": 4.5149367088607595e-06, + "loss": 0.0084, + "step": 22175 + }, + { + "epoch": 12.03, + "learning_rate": 4.508607594936709e-06, + "loss": 0.0096, + "step": 22200 + }, + { + "epoch": 12.03, + "learning_rate": 4.502278481012658e-06, + "loss": 0.0097, + "step": 22225 + }, + { + "epoch": 12.03, + "learning_rate": 4.495949367088608e-06, + "loss": 0.0114, + "step": 22250 + }, + { + "epoch": 12.03, + "learning_rate": 4.489620253164557e-06, + "loss": 0.0109, + "step": 22275 + }, + { + "epoch": 12.03, + "learning_rate": 4.483291139240507e-06, + "loss": 0.0096, + "step": 22300 + }, + { + "epoch": 12.03, + "learning_rate": 4.476962025316456e-06, + "loss": 0.0117, + "step": 22325 + }, + { + "epoch": 12.03, + "learning_rate": 4.4706329113924055e-06, + "loss": 0.0105, + "step": 22350 + }, + { + "epoch": 12.03, + "learning_rate": 4.464303797468355e-06, + "loss": 0.0103, + "step": 22375 + }, + { + "epoch": 12.03, + "learning_rate": 4.457974683544304e-06, + "loss": 0.0085, + "step": 22400 + }, + { + "epoch": 12.04, + "learning_rate": 4.451645569620254e-06, + "loss": 0.0129, + "step": 22425 + }, + { + "epoch": 12.04, + "learning_rate": 4.445316455696203e-06, + "loss": 0.0188, + "step": 22450 + }, + { + "epoch": 12.04, + "learning_rate": 4.438987341772153e-06, + "loss": 0.013, + "step": 22475 + }, + { + "epoch": 12.04, + "learning_rate": 4.432658227848102e-06, + "loss": 0.0095, + "step": 22500 + }, + { + "epoch": 12.04, + "learning_rate": 4.4263291139240515e-06, + "loss": 0.0111, + "step": 22525 + }, + { + "epoch": 12.04, + "learning_rate": 4.42e-06, + "loss": 0.0115, + "step": 22550 + }, + { + "epoch": 12.04, + "learning_rate": 4.4136708860759495e-06, + "loss": 0.0096, + "step": 22575 + }, + { + "epoch": 12.04, + "learning_rate": 4.407341772151899e-06, + "loss": 0.0108, + "step": 22600 + }, + { + "epoch": 12.04, + "learning_rate": 4.401012658227848e-06, + "loss": 0.0118, + "step": 22625 + }, + { + "epoch": 12.04, + "learning_rate": 4.394683544303798e-06, + "loss": 0.0136, + "step": 22650 + }, + { + "epoch": 12.04, + "learning_rate": 4.388354430379747e-06, + "loss": 0.0176, + "step": 22675 + }, + { + "epoch": 12.04, + "learning_rate": 4.382025316455697e-06, + "loss": 0.011, + "step": 22700 + }, + { + "epoch": 12.04, + "learning_rate": 4.375696202531646e-06, + "loss": 0.0125, + "step": 22725 + }, + { + "epoch": 12.04, + "learning_rate": 4.3693670886075955e-06, + "loss": 0.0164, + "step": 22750 + }, + { + "epoch": 13.0, + "learning_rate": 4.363037974683544e-06, + "loss": 0.0192, + "step": 22775 + }, + { + "epoch": 13.0, + "learning_rate": 4.356708860759494e-06, + "loss": 0.0145, + "step": 22800 + }, + { + "epoch": 13.0, + "learning_rate": 4.350379746835444e-06, + "loss": 0.0174, + "step": 22825 + }, + { + "epoch": 13.0, + "learning_rate": 4.344050632911393e-06, + "loss": 0.0142, + "step": 22850 + }, + { + "epoch": 13.0, + "learning_rate": 4.337721518987343e-06, + "loss": 0.0228, + "step": 22875 + }, + { + "epoch": 13.0, + "learning_rate": 4.331392405063292e-06, + "loss": 0.0201, + "step": 22900 + }, + { + "epoch": 13.0, + "learning_rate": 4.325063291139241e-06, + "loss": 0.0188, + "step": 22925 + }, + { + "epoch": 13.0, + "learning_rate": 4.31873417721519e-06, + "loss": 0.0185, + "step": 22950 + }, + { + "epoch": 13.01, + "learning_rate": 4.3124050632911395e-06, + "loss": 0.0186, + "step": 22975 + }, + { + "epoch": 13.01, + "learning_rate": 4.306075949367089e-06, + "loss": 0.0191, + "step": 23000 + }, + { + "epoch": 13.01, + "eval_loss": 0.25467297434806824, + "eval_runtime": 1486.1182, + "eval_samples_per_second": 7.005, + "eval_steps_per_second": 0.438, + "eval_wer": 23.982503780549276, + "step": 23000 + }, + { + "epoch": 13.01, + "learning_rate": 4.299746835443038e-06, + "loss": 0.0145, + "step": 23025 + }, + { + "epoch": 13.01, + "learning_rate": 4.293417721518988e-06, + "loss": 0.0106, + "step": 23050 + }, + { + "epoch": 13.01, + "learning_rate": 4.287088607594937e-06, + "loss": 0.0135, + "step": 23075 + }, + { + "epoch": 13.01, + "learning_rate": 4.280759493670887e-06, + "loss": 0.0173, + "step": 23100 + }, + { + "epoch": 13.01, + "learning_rate": 4.274430379746836e-06, + "loss": 0.0126, + "step": 23125 + }, + { + "epoch": 13.01, + "learning_rate": 4.268101265822785e-06, + "loss": 0.0166, + "step": 23150 + }, + { + "epoch": 13.01, + "learning_rate": 4.261772151898734e-06, + "loss": 0.0097, + "step": 23175 + }, + { + "epoch": 13.01, + "learning_rate": 4.255443037974684e-06, + "loss": 0.0094, + "step": 23200 + }, + { + "epoch": 13.01, + "learning_rate": 4.249113924050634e-06, + "loss": 0.012, + "step": 23225 + }, + { + "epoch": 13.01, + "learning_rate": 4.242784810126583e-06, + "loss": 0.0153, + "step": 23250 + }, + { + "epoch": 13.01, + "learning_rate": 4.236455696202532e-06, + "loss": 0.0169, + "step": 23275 + }, + { + "epoch": 13.01, + "learning_rate": 4.230379746835443e-06, + "loss": 0.0097, + "step": 23300 + }, + { + "epoch": 13.01, + "learning_rate": 4.224050632911393e-06, + "loss": 0.0098, + "step": 23325 + }, + { + "epoch": 13.01, + "learning_rate": 4.217721518987342e-06, + "loss": 0.0129, + "step": 23350 + }, + { + "epoch": 13.02, + "learning_rate": 4.2113924050632915e-06, + "loss": 0.0157, + "step": 23375 + }, + { + "epoch": 13.02, + "learning_rate": 4.205063291139241e-06, + "loss": 0.0088, + "step": 23400 + }, + { + "epoch": 13.02, + "learning_rate": 4.19873417721519e-06, + "loss": 0.0105, + "step": 23425 + }, + { + "epoch": 13.02, + "learning_rate": 4.19240506329114e-06, + "loss": 0.0131, + "step": 23450 + }, + { + "epoch": 13.02, + "learning_rate": 4.186075949367089e-06, + "loss": 0.0133, + "step": 23475 + }, + { + "epoch": 13.02, + "learning_rate": 4.179746835443038e-06, + "loss": 0.0054, + "step": 23500 + }, + { + "epoch": 13.02, + "learning_rate": 4.173417721518987e-06, + "loss": 0.01, + "step": 23525 + }, + { + "epoch": 13.02, + "learning_rate": 4.167088607594937e-06, + "loss": 0.013, + "step": 23550 + }, + { + "epoch": 13.02, + "learning_rate": 4.160759493670886e-06, + "loss": 0.0141, + "step": 23575 + }, + { + "epoch": 13.02, + "learning_rate": 4.154430379746836e-06, + "loss": 0.0126, + "step": 23600 + }, + { + "epoch": 13.02, + "learning_rate": 4.148101265822786e-06, + "loss": 0.015, + "step": 23625 + }, + { + "epoch": 13.02, + "learning_rate": 4.141772151898734e-06, + "loss": 0.0164, + "step": 23650 + }, + { + "epoch": 13.02, + "learning_rate": 4.135443037974684e-06, + "loss": 0.0135, + "step": 23675 + }, + { + "epoch": 13.02, + "learning_rate": 4.129113924050633e-06, + "loss": 0.0151, + "step": 23700 + }, + { + "epoch": 13.02, + "learning_rate": 4.122784810126583e-06, + "loss": 0.0157, + "step": 23725 + }, + { + "epoch": 13.02, + "learning_rate": 4.116455696202532e-06, + "loss": 0.0087, + "step": 23750 + }, + { + "epoch": 13.03, + "learning_rate": 4.1101265822784815e-06, + "loss": 0.0097, + "step": 23775 + }, + { + "epoch": 13.03, + "learning_rate": 4.103797468354431e-06, + "loss": 0.0141, + "step": 23800 + }, + { + "epoch": 13.03, + "learning_rate": 4.09746835443038e-06, + "loss": 0.0079, + "step": 23825 + }, + { + "epoch": 13.03, + "learning_rate": 4.09113924050633e-06, + "loss": 0.0116, + "step": 23850 + }, + { + "epoch": 13.03, + "learning_rate": 4.084810126582278e-06, + "loss": 0.0093, + "step": 23875 + }, + { + "epoch": 13.03, + "learning_rate": 4.078481012658228e-06, + "loss": 0.0149, + "step": 23900 + }, + { + "epoch": 13.03, + "learning_rate": 4.072151898734177e-06, + "loss": 0.0125, + "step": 23925 + }, + { + "epoch": 13.03, + "learning_rate": 4.065822784810127e-06, + "loss": 0.0106, + "step": 23950 + }, + { + "epoch": 13.03, + "learning_rate": 4.059493670886076e-06, + "loss": 0.0124, + "step": 23975 + }, + { + "epoch": 13.03, + "learning_rate": 4.0531645569620255e-06, + "loss": 0.0074, + "step": 24000 + }, + { + "epoch": 13.03, + "eval_loss": 0.27036768198013306, + "eval_runtime": 1458.9195, + "eval_samples_per_second": 7.136, + "eval_steps_per_second": 0.446, + "eval_wer": 24.153238264036816, + "step": 24000 + }, + { + "epoch": 13.03, + "learning_rate": 4.046835443037975e-06, + "loss": 0.0128, + "step": 24025 + }, + { + "epoch": 13.03, + "learning_rate": 4.040506329113924e-06, + "loss": 0.012, + "step": 24050 + }, + { + "epoch": 13.03, + "learning_rate": 4.034177215189874e-06, + "loss": 0.0098, + "step": 24075 + }, + { + "epoch": 13.03, + "learning_rate": 4.027848101265823e-06, + "loss": 0.0091, + "step": 24100 + }, + { + "epoch": 13.03, + "learning_rate": 4.021518987341773e-06, + "loss": 0.0077, + "step": 24125 + }, + { + "epoch": 13.03, + "learning_rate": 4.015189873417722e-06, + "loss": 0.0135, + "step": 24150 + }, + { + "epoch": 13.04, + "learning_rate": 4.0088607594936715e-06, + "loss": 0.0124, + "step": 24175 + }, + { + "epoch": 13.04, + "learning_rate": 4.002531645569621e-06, + "loss": 0.0152, + "step": 24200 + }, + { + "epoch": 13.04, + "learning_rate": 3.99620253164557e-06, + "loss": 0.0127, + "step": 24225 + }, + { + "epoch": 13.04, + "learning_rate": 3.989873417721519e-06, + "loss": 0.0066, + "step": 24250 + }, + { + "epoch": 13.04, + "learning_rate": 3.983544303797468e-06, + "loss": 0.0163, + "step": 24275 + }, + { + "epoch": 13.04, + "learning_rate": 3.977215189873418e-06, + "loss": 0.0095, + "step": 24300 + }, + { + "epoch": 13.04, + "learning_rate": 3.970886075949367e-06, + "loss": 0.0103, + "step": 24325 + }, + { + "epoch": 13.04, + "learning_rate": 3.964556962025317e-06, + "loss": 0.0112, + "step": 24350 + }, + { + "epoch": 13.04, + "learning_rate": 3.958227848101266e-06, + "loss": 0.01, + "step": 24375 + }, + { + "epoch": 13.04, + "learning_rate": 3.9518987341772155e-06, + "loss": 0.0131, + "step": 24400 + }, + { + "epoch": 13.04, + "learning_rate": 3.945569620253165e-06, + "loss": 0.0143, + "step": 24425 + }, + { + "epoch": 13.04, + "learning_rate": 3.939240506329114e-06, + "loss": 0.0151, + "step": 24450 + }, + { + "epoch": 13.04, + "learning_rate": 3.932911392405064e-06, + "loss": 0.0129, + "step": 24475 + }, + { + "epoch": 13.04, + "learning_rate": 3.926582278481013e-06, + "loss": 0.0125, + "step": 24500 + }, + { + "epoch": 14.0, + "learning_rate": 3.920506329113925e-06, + "loss": 0.0165, + "step": 24525 + }, + { + "epoch": 14.0, + "learning_rate": 3.914177215189874e-06, + "loss": 0.0114, + "step": 24550 + }, + { + "epoch": 14.0, + "learning_rate": 3.9078481012658235e-06, + "loss": 0.0111, + "step": 24575 + }, + { + "epoch": 14.0, + "learning_rate": 3.901518987341772e-06, + "loss": 0.0134, + "step": 24600 + }, + { + "epoch": 14.0, + "learning_rate": 3.8951898734177215e-06, + "loss": 0.0138, + "step": 24625 + }, + { + "epoch": 14.0, + "learning_rate": 3.888860759493671e-06, + "loss": 0.0151, + "step": 24650 + }, + { + "epoch": 14.0, + "learning_rate": 3.88253164556962e-06, + "loss": 0.0101, + "step": 24675 + }, + { + "epoch": 14.0, + "learning_rate": 3.87620253164557e-06, + "loss": 0.0108, + "step": 24700 + }, + { + "epoch": 14.01, + "learning_rate": 3.869873417721519e-06, + "loss": 0.0151, + "step": 24725 + }, + { + "epoch": 14.01, + "learning_rate": 3.863544303797469e-06, + "loss": 0.0132, + "step": 24750 + }, + { + "epoch": 14.01, + "learning_rate": 3.857215189873418e-06, + "loss": 0.012, + "step": 24775 + }, + { + "epoch": 14.01, + "learning_rate": 3.8508860759493675e-06, + "loss": 0.017, + "step": 24800 + }, + { + "epoch": 14.01, + "learning_rate": 3.844556962025317e-06, + "loss": 0.0141, + "step": 24825 + }, + { + "epoch": 14.01, + "learning_rate": 3.838227848101266e-06, + "loss": 0.0166, + "step": 24850 + }, + { + "epoch": 14.01, + "learning_rate": 3.831898734177216e-06, + "loss": 0.0112, + "step": 24875 + }, + { + "epoch": 14.01, + "learning_rate": 3.825569620253165e-06, + "loss": 0.0097, + "step": 24900 + }, + { + "epoch": 14.01, + "learning_rate": 3.819240506329115e-06, + "loss": 0.0138, + "step": 24925 + }, + { + "epoch": 14.01, + "learning_rate": 3.8129113924050636e-06, + "loss": 0.0069, + "step": 24950 + }, + { + "epoch": 14.01, + "learning_rate": 3.806582278481013e-06, + "loss": 0.0111, + "step": 24975 + }, + { + "epoch": 14.01, + "learning_rate": 3.8002531645569625e-06, + "loss": 0.0149, + "step": 25000 + }, + { + "epoch": 14.01, + "eval_loss": 0.2640444040298462, + "eval_runtime": 1478.4977, + "eval_samples_per_second": 7.042, + "eval_steps_per_second": 0.44, + "eval_wer": 24.063805915543345, + "step": 25000 + }, + { + "epoch": 14.01, + "learning_rate": 3.7939240506329115e-06, + "loss": 0.0176, + "step": 25025 + }, + { + "epoch": 14.01, + "learning_rate": 3.787594936708861e-06, + "loss": 0.0104, + "step": 25050 + }, + { + "epoch": 14.01, + "learning_rate": 3.7812658227848103e-06, + "loss": 0.0084, + "step": 25075 + }, + { + "epoch": 14.01, + "learning_rate": 3.7749367088607598e-06, + "loss": 0.0118, + "step": 25100 + }, + { + "epoch": 14.02, + "learning_rate": 3.768607594936709e-06, + "loss": 0.0099, + "step": 25125 + }, + { + "epoch": 14.02, + "learning_rate": 3.762278481012658e-06, + "loss": 0.0089, + "step": 25150 + }, + { + "epoch": 14.02, + "learning_rate": 3.7559493670886076e-06, + "loss": 0.0119, + "step": 25175 + }, + { + "epoch": 14.02, + "learning_rate": 3.749620253164557e-06, + "loss": 0.0118, + "step": 25200 + }, + { + "epoch": 14.02, + "learning_rate": 3.743291139240507e-06, + "loss": 0.0108, + "step": 25225 + }, + { + "epoch": 14.02, + "learning_rate": 3.7369620253164563e-06, + "loss": 0.0101, + "step": 25250 + }, + { + "epoch": 14.02, + "learning_rate": 3.7306329113924058e-06, + "loss": 0.0095, + "step": 25275 + }, + { + "epoch": 14.02, + "learning_rate": 3.7243037974683548e-06, + "loss": 0.0106, + "step": 25300 + }, + { + "epoch": 14.02, + "learning_rate": 3.717974683544304e-06, + "loss": 0.0135, + "step": 25325 + }, + { + "epoch": 14.02, + "learning_rate": 3.7116455696202536e-06, + "loss": 0.0115, + "step": 25350 + }, + { + "epoch": 14.02, + "learning_rate": 3.705316455696203e-06, + "loss": 0.0114, + "step": 25375 + }, + { + "epoch": 14.02, + "learning_rate": 3.698987341772152e-06, + "loss": 0.0119, + "step": 25400 + }, + { + "epoch": 14.02, + "learning_rate": 3.6926582278481015e-06, + "loss": 0.0123, + "step": 25425 + }, + { + "epoch": 14.02, + "learning_rate": 3.686329113924051e-06, + "loss": 0.0138, + "step": 25450 + }, + { + "epoch": 14.02, + "learning_rate": 3.6800000000000003e-06, + "loss": 0.0133, + "step": 25475 + }, + { + "epoch": 14.02, + "learning_rate": 3.6736708860759498e-06, + "loss": 0.0106, + "step": 25500 + }, + { + "epoch": 14.03, + "learning_rate": 3.6673417721518988e-06, + "loss": 0.0099, + "step": 25525 + }, + { + "epoch": 14.03, + "learning_rate": 3.661012658227848e-06, + "loss": 0.0104, + "step": 25550 + }, + { + "epoch": 14.03, + "learning_rate": 3.6546835443037976e-06, + "loss": 0.0143, + "step": 25575 + }, + { + "epoch": 14.03, + "learning_rate": 3.648354430379747e-06, + "loss": 0.0096, + "step": 25600 + }, + { + "epoch": 14.03, + "learning_rate": 3.642025316455696e-06, + "loss": 0.0094, + "step": 25625 + }, + { + "epoch": 14.03, + "learning_rate": 3.6356962025316463e-06, + "loss": 0.0162, + "step": 25650 + }, + { + "epoch": 14.03, + "learning_rate": 3.6293670886075953e-06, + "loss": 0.008, + "step": 25675 + }, + { + "epoch": 14.03, + "learning_rate": 3.6230379746835448e-06, + "loss": 0.0094, + "step": 25700 + }, + { + "epoch": 14.03, + "learning_rate": 3.616708860759494e-06, + "loss": 0.0122, + "step": 25725 + }, + { + "epoch": 14.03, + "learning_rate": 3.6103797468354436e-06, + "loss": 0.0128, + "step": 25750 + }, + { + "epoch": 14.03, + "learning_rate": 3.6040506329113926e-06, + "loss": 0.0117, + "step": 25775 + }, + { + "epoch": 14.03, + "learning_rate": 3.597721518987342e-06, + "loss": 0.0067, + "step": 25800 + }, + { + "epoch": 14.03, + "learning_rate": 3.5913924050632915e-06, + "loss": 0.0078, + "step": 25825 + }, + { + "epoch": 14.03, + "learning_rate": 3.585063291139241e-06, + "loss": 0.0071, + "step": 25850 + }, + { + "epoch": 14.03, + "learning_rate": 3.57873417721519e-06, + "loss": 0.0093, + "step": 25875 + }, + { + "epoch": 14.03, + "learning_rate": 3.5724050632911393e-06, + "loss": 0.0064, + "step": 25900 + }, + { + "epoch": 14.04, + "learning_rate": 3.5660759493670888e-06, + "loss": 0.0067, + "step": 25925 + }, + { + "epoch": 14.04, + "learning_rate": 3.559746835443038e-06, + "loss": 0.0076, + "step": 25950 + }, + { + "epoch": 14.04, + "learning_rate": 3.5534177215189876e-06, + "loss": 0.0099, + "step": 25975 + }, + { + "epoch": 14.04, + "learning_rate": 3.5470886075949366e-06, + "loss": 0.0111, + "step": 26000 + }, + { + "epoch": 14.04, + "eval_loss": 0.2734878659248352, + "eval_runtime": 1456.7487, + "eval_samples_per_second": 7.147, + "eval_steps_per_second": 0.447, + "eval_wer": 24.359745686921737, + "step": 26000 + }, + { + "epoch": 14.04, + "learning_rate": 3.540759493670886e-06, + "loss": 0.0134, + "step": 26025 + }, + { + "epoch": 14.04, + "learning_rate": 3.534430379746836e-06, + "loss": 0.008, + "step": 26050 + }, + { + "epoch": 14.04, + "learning_rate": 3.5281012658227853e-06, + "loss": 0.0138, + "step": 26075 + }, + { + "epoch": 14.04, + "learning_rate": 3.5217721518987348e-06, + "loss": 0.0075, + "step": 26100 + }, + { + "epoch": 14.04, + "learning_rate": 3.515443037974684e-06, + "loss": 0.0097, + "step": 26125 + }, + { + "epoch": 14.04, + "learning_rate": 3.509113924050633e-06, + "loss": 0.0139, + "step": 26150 + }, + { + "epoch": 14.04, + "learning_rate": 3.5027848101265826e-06, + "loss": 0.0099, + "step": 26175 + }, + { + "epoch": 14.04, + "learning_rate": 3.496455696202532e-06, + "loss": 0.0148, + "step": 26200 + }, + { + "epoch": 14.04, + "learning_rate": 3.4901265822784815e-06, + "loss": 0.0119, + "step": 26225 + }, + { + "epoch": 14.04, + "learning_rate": 3.4837974683544305e-06, + "loss": 0.0086, + "step": 26250 + }, + { + "epoch": 15.0, + "learning_rate": 3.47746835443038e-06, + "loss": 0.0091, + "step": 26275 + }, + { + "epoch": 15.0, + "learning_rate": 3.4711392405063293e-06, + "loss": 0.0105, + "step": 26300 + }, + { + "epoch": 15.0, + "learning_rate": 3.465063291139241e-06, + "loss": 0.0138, + "step": 26325 + }, + { + "epoch": 15.0, + "learning_rate": 3.45873417721519e-06, + "loss": 0.0185, + "step": 26350 + }, + { + "epoch": 15.0, + "learning_rate": 3.4524050632911392e-06, + "loss": 0.015, + "step": 26375 + }, + { + "epoch": 15.0, + "learning_rate": 3.4460759493670886e-06, + "loss": 0.0177, + "step": 26400 + }, + { + "epoch": 15.0, + "learning_rate": 3.439746835443038e-06, + "loss": 0.014, + "step": 26425 + }, + { + "epoch": 15.0, + "learning_rate": 3.433417721518988e-06, + "loss": 0.0127, + "step": 26450 + }, + { + "epoch": 15.01, + "learning_rate": 3.4270886075949374e-06, + "loss": 0.0135, + "step": 26475 + }, + { + "epoch": 15.01, + "learning_rate": 3.4207594936708864e-06, + "loss": 0.0171, + "step": 26500 + }, + { + "epoch": 15.01, + "learning_rate": 3.4144303797468358e-06, + "loss": 0.0114, + "step": 26525 + }, + { + "epoch": 15.01, + "learning_rate": 3.4081012658227852e-06, + "loss": 0.0099, + "step": 26550 + }, + { + "epoch": 15.01, + "learning_rate": 3.4017721518987346e-06, + "loss": 0.0126, + "step": 26575 + }, + { + "epoch": 15.01, + "learning_rate": 3.395443037974684e-06, + "loss": 0.0116, + "step": 26600 + }, + { + "epoch": 15.01, + "learning_rate": 3.389113924050633e-06, + "loss": 0.0087, + "step": 26625 + }, + { + "epoch": 15.01, + "learning_rate": 3.3827848101265825e-06, + "loss": 0.0073, + "step": 26650 + }, + { + "epoch": 15.01, + "learning_rate": 3.376455696202532e-06, + "loss": 0.0102, + "step": 26675 + }, + { + "epoch": 15.01, + "learning_rate": 3.3701265822784814e-06, + "loss": 0.0106, + "step": 26700 + }, + { + "epoch": 15.01, + "learning_rate": 3.3637974683544304e-06, + "loss": 0.0094, + "step": 26725 + }, + { + "epoch": 15.01, + "learning_rate": 3.35746835443038e-06, + "loss": 0.0113, + "step": 26750 + }, + { + "epoch": 15.01, + "learning_rate": 3.3511392405063292e-06, + "loss": 0.0169, + "step": 26775 + }, + { + "epoch": 15.01, + "learning_rate": 3.3448101265822786e-06, + "loss": 0.0084, + "step": 26800 + }, + { + "epoch": 15.01, + "learning_rate": 3.338481012658228e-06, + "loss": 0.0153, + "step": 26825 + }, + { + "epoch": 15.01, + "learning_rate": 3.332151898734177e-06, + "loss": 0.012, + "step": 26850 + }, + { + "epoch": 15.02, + "learning_rate": 3.325822784810127e-06, + "loss": 0.0106, + "step": 26875 + }, + { + "epoch": 15.02, + "learning_rate": 3.3194936708860764e-06, + "loss": 0.0099, + "step": 26900 + }, + { + "epoch": 15.02, + "learning_rate": 3.3131645569620258e-06, + "loss": 0.008, + "step": 26925 + }, + { + "epoch": 15.02, + "learning_rate": 3.306835443037975e-06, + "loss": 0.0076, + "step": 26950 + }, + { + "epoch": 15.02, + "learning_rate": 3.3005063291139246e-06, + "loss": 0.0088, + "step": 26975 + }, + { + "epoch": 15.02, + "learning_rate": 3.2941772151898736e-06, + "loss": 0.009, + "step": 27000 + }, + { + "epoch": 15.02, + "eval_loss": 0.2693029046058655, + "eval_runtime": 1479.1374, + "eval_samples_per_second": 7.039, + "eval_steps_per_second": 0.44, + "eval_wer": 24.28819980812696, + "step": 27000 + }, + { + "epoch": 15.02, + "learning_rate": 3.287848101265823e-06, + "loss": 0.0115, + "step": 27025 + }, + { + "epoch": 15.02, + "learning_rate": 3.2815189873417725e-06, + "loss": 0.0099, + "step": 27050 + }, + { + "epoch": 15.02, + "learning_rate": 3.275189873417722e-06, + "loss": 0.0073, + "step": 27075 + }, + { + "epoch": 15.02, + "learning_rate": 3.268860759493671e-06, + "loss": 0.0061, + "step": 27100 + }, + { + "epoch": 15.02, + "learning_rate": 3.2625316455696204e-06, + "loss": 0.007, + "step": 27125 + }, + { + "epoch": 15.02, + "learning_rate": 3.2562025316455698e-06, + "loss": 0.0109, + "step": 27150 + }, + { + "epoch": 15.02, + "learning_rate": 3.2498734177215192e-06, + "loss": 0.0123, + "step": 27175 + }, + { + "epoch": 15.02, + "learning_rate": 3.2435443037974686e-06, + "loss": 0.0121, + "step": 27200 + }, + { + "epoch": 15.02, + "learning_rate": 3.2372151898734176e-06, + "loss": 0.0129, + "step": 27225 + }, + { + "epoch": 15.02, + "learning_rate": 3.230886075949367e-06, + "loss": 0.0128, + "step": 27250 + }, + { + "epoch": 15.03, + "learning_rate": 3.224556962025317e-06, + "loss": 0.0094, + "step": 27275 + }, + { + "epoch": 15.03, + "learning_rate": 3.2182278481012664e-06, + "loss": 0.0081, + "step": 27300 + }, + { + "epoch": 15.03, + "learning_rate": 3.2118987341772158e-06, + "loss": 0.0087, + "step": 27325 + }, + { + "epoch": 15.03, + "learning_rate": 3.2055696202531648e-06, + "loss": 0.0131, + "step": 27350 + }, + { + "epoch": 15.03, + "learning_rate": 3.199240506329114e-06, + "loss": 0.0073, + "step": 27375 + }, + { + "epoch": 15.03, + "learning_rate": 3.1929113924050636e-06, + "loss": 0.0086, + "step": 27400 + }, + { + "epoch": 15.03, + "learning_rate": 3.186582278481013e-06, + "loss": 0.0108, + "step": 27425 + }, + { + "epoch": 15.03, + "learning_rate": 3.1802531645569625e-06, + "loss": 0.01, + "step": 27450 + }, + { + "epoch": 15.03, + "learning_rate": 3.1739240506329115e-06, + "loss": 0.0123, + "step": 27475 + }, + { + "epoch": 15.03, + "learning_rate": 3.167594936708861e-06, + "loss": 0.0081, + "step": 27500 + }, + { + "epoch": 15.03, + "learning_rate": 3.1612658227848104e-06, + "loss": 0.0123, + "step": 27525 + }, + { + "epoch": 15.03, + "learning_rate": 3.1549367088607598e-06, + "loss": 0.0096, + "step": 27550 + }, + { + "epoch": 15.03, + "learning_rate": 3.1486075949367088e-06, + "loss": 0.0054, + "step": 27575 + }, + { + "epoch": 15.03, + "learning_rate": 3.1422784810126582e-06, + "loss": 0.006, + "step": 27600 + }, + { + "epoch": 15.03, + "learning_rate": 3.1359493670886076e-06, + "loss": 0.0089, + "step": 27625 + }, + { + "epoch": 15.03, + "learning_rate": 3.129620253164557e-06, + "loss": 0.0109, + "step": 27650 + }, + { + "epoch": 15.04, + "learning_rate": 3.1232911392405065e-06, + "loss": 0.0089, + "step": 27675 + }, + { + "epoch": 15.04, + "learning_rate": 3.1169620253164563e-06, + "loss": 0.0094, + "step": 27700 + }, + { + "epoch": 15.04, + "learning_rate": 3.1106329113924054e-06, + "loss": 0.0102, + "step": 27725 + }, + { + "epoch": 15.04, + "learning_rate": 3.1043037974683548e-06, + "loss": 0.0097, + "step": 27750 + }, + { + "epoch": 15.04, + "learning_rate": 3.097974683544304e-06, + "loss": 0.0078, + "step": 27775 + }, + { + "epoch": 15.04, + "learning_rate": 3.0916455696202536e-06, + "loss": 0.0073, + "step": 27800 + }, + { + "epoch": 15.04, + "learning_rate": 3.085316455696203e-06, + "loss": 0.0105, + "step": 27825 + }, + { + "epoch": 15.04, + "learning_rate": 3.078987341772152e-06, + "loss": 0.0083, + "step": 27850 + }, + { + "epoch": 15.04, + "learning_rate": 3.0726582278481015e-06, + "loss": 0.0138, + "step": 27875 + }, + { + "epoch": 15.04, + "learning_rate": 3.066329113924051e-06, + "loss": 0.0049, + "step": 27900 + }, + { + "epoch": 15.04, + "learning_rate": 3.0600000000000003e-06, + "loss": 0.0107, + "step": 27925 + }, + { + "epoch": 15.04, + "learning_rate": 3.0536708860759494e-06, + "loss": 0.0105, + "step": 27950 + }, + { + "epoch": 15.04, + "learning_rate": 3.0473417721518988e-06, + "loss": 0.0134, + "step": 27975 + }, + { + "epoch": 15.04, + "learning_rate": 3.041012658227848e-06, + "loss": 0.0137, + "step": 28000 + }, + { + "epoch": 15.04, + "eval_loss": 0.2780563235282898, + "eval_runtime": 1465.9542, + "eval_samples_per_second": 7.102, + "eval_steps_per_second": 0.444, + "eval_wer": 25.010162766874256, + "step": 28000 + }, + { + "epoch": 16.0, + "learning_rate": 3.0346835443037976e-06, + "loss": 0.009, + "step": 28025 + }, + { + "epoch": 16.0, + "learning_rate": 3.028354430379747e-06, + "loss": 0.0081, + "step": 28050 + }, + { + "epoch": 16.0, + "learning_rate": 3.022025316455696e-06, + "loss": 0.0109, + "step": 28075 + }, + { + "epoch": 16.0, + "learning_rate": 3.015696202531646e-06, + "loss": 0.0132, + "step": 28100 + }, + { + "epoch": 16.0, + "learning_rate": 3.0093670886075953e-06, + "loss": 0.0142, + "step": 28125 + }, + { + "epoch": 16.0, + "learning_rate": 3.0030379746835448e-06, + "loss": 0.0152, + "step": 28150 + }, + { + "epoch": 16.0, + "learning_rate": 2.996708860759494e-06, + "loss": 0.0074, + "step": 28175 + }, + { + "epoch": 16.0, + "learning_rate": 2.990379746835443e-06, + "loss": 0.0129, + "step": 28200 + }, + { + "epoch": 16.01, + "learning_rate": 2.9840506329113926e-06, + "loss": 0.0118, + "step": 28225 + }, + { + "epoch": 16.01, + "learning_rate": 2.977721518987342e-06, + "loss": 0.0153, + "step": 28250 + }, + { + "epoch": 16.01, + "learning_rate": 2.9713924050632915e-06, + "loss": 0.0121, + "step": 28275 + }, + { + "epoch": 16.01, + "learning_rate": 2.965063291139241e-06, + "loss": 0.0097, + "step": 28300 + }, + { + "epoch": 16.01, + "learning_rate": 2.95873417721519e-06, + "loss": 0.0083, + "step": 28325 + }, + { + "epoch": 16.01, + "learning_rate": 2.9524050632911393e-06, + "loss": 0.01, + "step": 28350 + }, + { + "epoch": 16.01, + "learning_rate": 2.9460759493670888e-06, + "loss": 0.0084, + "step": 28375 + }, + { + "epoch": 16.01, + "learning_rate": 2.939746835443038e-06, + "loss": 0.0124, + "step": 28400 + }, + { + "epoch": 16.01, + "learning_rate": 2.933417721518987e-06, + "loss": 0.0071, + "step": 28425 + }, + { + "epoch": 16.01, + "learning_rate": 2.9270886075949366e-06, + "loss": 0.0113, + "step": 28450 + }, + { + "epoch": 16.01, + "learning_rate": 2.920759493670886e-06, + "loss": 0.0085, + "step": 28475 + }, + { + "epoch": 16.01, + "learning_rate": 2.914430379746836e-06, + "loss": 0.0133, + "step": 28500 + }, + { + "epoch": 16.01, + "learning_rate": 2.9081012658227853e-06, + "loss": 0.014, + "step": 28525 + }, + { + "epoch": 16.01, + "learning_rate": 2.9017721518987348e-06, + "loss": 0.0105, + "step": 28550 + }, + { + "epoch": 16.01, + "learning_rate": 2.8954430379746838e-06, + "loss": 0.01, + "step": 28575 + }, + { + "epoch": 16.01, + "learning_rate": 2.889113924050633e-06, + "loss": 0.0135, + "step": 28600 + }, + { + "epoch": 16.02, + "learning_rate": 2.8827848101265826e-06, + "loss": 0.0118, + "step": 28625 + }, + { + "epoch": 16.02, + "learning_rate": 2.876455696202532e-06, + "loss": 0.0111, + "step": 28650 + }, + { + "epoch": 16.02, + "learning_rate": 2.8701265822784815e-06, + "loss": 0.008, + "step": 28675 + }, + { + "epoch": 16.02, + "learning_rate": 2.8637974683544305e-06, + "loss": 0.0103, + "step": 28700 + }, + { + "epoch": 16.02, + "learning_rate": 2.85746835443038e-06, + "loss": 0.0082, + "step": 28725 + }, + { + "epoch": 16.02, + "learning_rate": 2.8511392405063293e-06, + "loss": 0.0057, + "step": 28750 + }, + { + "epoch": 16.02, + "learning_rate": 2.8448101265822788e-06, + "loss": 0.0062, + "step": 28775 + }, + { + "epoch": 16.02, + "learning_rate": 2.8384810126582278e-06, + "loss": 0.0092, + "step": 28800 + }, + { + "epoch": 16.02, + "learning_rate": 2.832151898734177e-06, + "loss": 0.0084, + "step": 28825 + }, + { + "epoch": 16.02, + "learning_rate": 2.8258227848101266e-06, + "loss": 0.0099, + "step": 28850 + }, + { + "epoch": 16.02, + "learning_rate": 2.819493670886076e-06, + "loss": 0.0117, + "step": 28875 + }, + { + "epoch": 16.02, + "learning_rate": 2.8131645569620255e-06, + "loss": 0.0117, + "step": 28900 + }, + { + "epoch": 16.02, + "learning_rate": 2.8068354430379753e-06, + "loss": 0.0127, + "step": 28925 + }, + { + "epoch": 16.02, + "learning_rate": 2.8005063291139243e-06, + "loss": 0.0123, + "step": 28950 + }, + { + "epoch": 16.02, + "learning_rate": 2.7941772151898738e-06, + "loss": 0.0128, + "step": 28975 + }, + { + "epoch": 16.02, + "learning_rate": 2.787848101265823e-06, + "loss": 0.0073, + "step": 29000 + }, + { + "epoch": 16.02, + "eval_loss": 0.2685891091823578, + "eval_runtime": 1456.562, + "eval_samples_per_second": 7.148, + "eval_steps_per_second": 0.447, + "eval_wer": 23.278427291500677, + "step": 29000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.338574520844288e+20, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-29000/training_args.bin b/checkpoint-29000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-29000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-3000/config.json b/checkpoint-3000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-3000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-3000/generation_config.json b/checkpoint-3000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-3000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-3000/optimizer.pt b/checkpoint-3000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..81c66ce403783997f541f3f4eca3f499f93697b1 --- /dev/null +++ b/checkpoint-3000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b62f9e1bbcb2b59f48192bb79273456e432aa0109d2c8c3169b6f1a4a1619c5 +size 1934161093 diff --git a/checkpoint-3000/preprocessor_config.json b/checkpoint-3000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-3000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-3000/pytorch_model.bin b/checkpoint-3000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..f7baccb2f00f773c54ac8c1d9cb9ebe46c1f6789 --- /dev/null +++ b/checkpoint-3000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95839ed3ea7668287f9a402b2eeb7bda7060b70c772cc72314306a7baeb06e9d +size 967102601 diff --git a/checkpoint-3000/rng_state.pth b/checkpoint-3000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..24faf58329fd18ff2bad42cc14cd9409816b65e7 --- /dev/null +++ b/checkpoint-3000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7201c85144279c594de0bc8e12b9fea5d8fbec4edbaac99af09f7c225489e0c4 +size 14575 diff --git a/checkpoint-3000/scaler.pt b/checkpoint-3000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..333528b5fb44076a5b473c9954e9cfd4bc1dca85 --- /dev/null +++ b/checkpoint-3000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ec9feb0f78b9af55c8d3da1e9b2f9c3f0e064df466379c94e97c1e0b5c64a34 +size 557 diff --git a/checkpoint-3000/scheduler.pt b/checkpoint-3000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7848cb52c97a51dd4e3c732cd1ac8f935cbfc10b --- /dev/null +++ b/checkpoint-3000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe4eb5b12174ab16ff34c25abcbc085eeaef4b637faa0ed42eadd934a36f87d8 +size 627 diff --git a/checkpoint-3000/trainer_state.json b/checkpoint-3000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6365e9b4183122b152d0f83961918bef446a71b2 --- /dev/null +++ b/checkpoint-3000/trainer_state.json @@ -0,0 +1,763 @@ +{ + "best_metric": 35.12252231743606, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-2000", + "epoch": 1.031225, + "global_step": 3000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.38492133613568e+19, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3000/training_args.bin b/checkpoint-3000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-3000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-30000/config.json b/checkpoint-30000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-30000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-30000/generation_config.json b/checkpoint-30000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-30000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-30000/optimizer.pt b/checkpoint-30000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d27e29329f1c531c23557e97b00cc9310828d759 --- /dev/null +++ b/checkpoint-30000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cf454552dcfc8db9ede592b3bfa443c96eda5a596c3182e81be7c16426d44fd +size 1934161093 diff --git a/checkpoint-30000/preprocessor_config.json b/checkpoint-30000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-30000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-30000/pytorch_model.bin b/checkpoint-30000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..f0b095ce2811947bb116be19eabc99b4ce2c3302 --- /dev/null +++ b/checkpoint-30000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48da2266ad6031b30052f5dea900ce8970b8506ddc0cb54fd11d04f6688d3f98 +size 967102601 diff --git a/checkpoint-30000/rng_state.pth b/checkpoint-30000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..29fbae97f5620d002e62f542ca71d8dce0a94f22 --- /dev/null +++ b/checkpoint-30000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20cc2923c803afb46681098eaea0ba032e4d12abf825fbc58b649111255b7b3 +size 14575 diff --git a/checkpoint-30000/scaler.pt b/checkpoint-30000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a9099578d0034c3c67d35123c8de96fb4e5c6ac --- /dev/null +++ b/checkpoint-30000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc53ac474c59b2ed0b6c9b5a51ba406561a06f6957a15e89b9adc4c139f28825 +size 557 diff --git a/checkpoint-30000/scheduler.pt b/checkpoint-30000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2b2560effe578d0f2e382b480391fa6ee9c57c4f --- /dev/null +++ b/checkpoint-30000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bce07583b5be8f6187f07143ced50bdc168e23f8666136b82af3e471fb297d2 +size 627 diff --git a/checkpoint-30000/trainer_state.json b/checkpoint-30000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..77b4d8050095412266882fed85c26b97f94bdd91 --- /dev/null +++ b/checkpoint-30000/trainer_state.json @@ -0,0 +1,7486 @@ +{ + "best_metric": 23.029642758418838, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-30000", + "epoch": 17.005825, + "global_step": 30000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + }, + { + "epoch": 7.02, + "learning_rate": 6.830632911392405e-06, + "loss": 0.0159, + "step": 13025 + }, + { + "epoch": 7.02, + "learning_rate": 6.824303797468354e-06, + "loss": 0.0186, + "step": 13050 + }, + { + "epoch": 7.02, + "learning_rate": 6.817974683544304e-06, + "loss": 0.0246, + "step": 13075 + }, + { + "epoch": 7.02, + "learning_rate": 6.811645569620253e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 7.02, + "learning_rate": 6.805316455696203e-06, + "loss": 0.0287, + "step": 13125 + }, + { + "epoch": 7.02, + "learning_rate": 6.798987341772152e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 7.02, + "learning_rate": 6.792658227848102e-06, + "loss": 0.035, + "step": 13175 + }, + { + "epoch": 7.02, + "learning_rate": 6.786329113924052e-06, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 7.02, + "learning_rate": 6.780000000000001e-06, + "loss": 0.0283, + "step": 13225 + }, + { + "epoch": 7.02, + "learning_rate": 6.773670886075951e-06, + "loss": 0.0203, + "step": 13250 + }, + { + "epoch": 7.03, + "learning_rate": 6.7673417721519e-06, + "loss": 0.0193, + "step": 13275 + }, + { + "epoch": 7.03, + "learning_rate": 6.7610126582278494e-06, + "loss": 0.0174, + "step": 13300 + }, + { + "epoch": 7.03, + "learning_rate": 6.754683544303798e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 7.03, + "learning_rate": 6.7483544303797474e-06, + "loss": 0.023, + "step": 13350 + }, + { + "epoch": 7.03, + "learning_rate": 6.742025316455697e-06, + "loss": 0.0266, + "step": 13375 + }, + { + "epoch": 7.03, + "learning_rate": 6.735696202531646e-06, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 7.03, + "learning_rate": 6.729367088607596e-06, + "loss": 0.0164, + "step": 13425 + }, + { + "epoch": 7.03, + "learning_rate": 6.723037974683545e-06, + "loss": 0.019, + "step": 13450 + }, + { + "epoch": 7.03, + "learning_rate": 6.716708860759495e-06, + "loss": 0.0209, + "step": 13475 + }, + { + "epoch": 7.03, + "learning_rate": 6.710379746835444e-06, + "loss": 0.0241, + "step": 13500 + }, + { + "epoch": 7.03, + "learning_rate": 6.7040506329113934e-06, + "loss": 0.0233, + "step": 13525 + }, + { + "epoch": 7.03, + "learning_rate": 6.697721518987342e-06, + "loss": 0.0204, + "step": 13550 + }, + { + "epoch": 7.03, + "learning_rate": 6.6913924050632914e-06, + "loss": 0.0131, + "step": 13575 + }, + { + "epoch": 7.03, + "learning_rate": 6.685063291139241e-06, + "loss": 0.0268, + "step": 13600 + }, + { + "epoch": 7.03, + "learning_rate": 6.67873417721519e-06, + "loss": 0.0156, + "step": 13625 + }, + { + "epoch": 7.03, + "learning_rate": 6.67240506329114e-06, + "loss": 0.0205, + "step": 13650 + }, + { + "epoch": 7.04, + "learning_rate": 6.666075949367089e-06, + "loss": 0.0217, + "step": 13675 + }, + { + "epoch": 7.04, + "learning_rate": 6.659746835443039e-06, + "loss": 0.0258, + "step": 13700 + }, + { + "epoch": 7.04, + "learning_rate": 6.653417721518988e-06, + "loss": 0.0211, + "step": 13725 + }, + { + "epoch": 7.04, + "learning_rate": 6.647088607594937e-06, + "loss": 0.0188, + "step": 13750 + }, + { + "epoch": 7.04, + "learning_rate": 6.640759493670886e-06, + "loss": 0.0234, + "step": 13775 + }, + { + "epoch": 7.04, + "learning_rate": 6.6344303797468355e-06, + "loss": 0.0202, + "step": 13800 + }, + { + "epoch": 7.04, + "learning_rate": 6.628101265822785e-06, + "loss": 0.0251, + "step": 13825 + }, + { + "epoch": 7.04, + "learning_rate": 6.621772151898734e-06, + "loss": 0.017, + "step": 13850 + }, + { + "epoch": 7.04, + "learning_rate": 6.615443037974684e-06, + "loss": 0.0254, + "step": 13875 + }, + { + "epoch": 7.04, + "learning_rate": 6.609113924050633e-06, + "loss": 0.0289, + "step": 13900 + }, + { + "epoch": 7.04, + "learning_rate": 6.602784810126583e-06, + "loss": 0.0238, + "step": 13925 + }, + { + "epoch": 7.04, + "learning_rate": 6.596455696202532e-06, + "loss": 0.0218, + "step": 13950 + }, + { + "epoch": 7.04, + "learning_rate": 6.590126582278481e-06, + "loss": 0.0289, + "step": 13975 + }, + { + "epoch": 7.04, + "learning_rate": 6.583797468354432e-06, + "loss": 0.0271, + "step": 14000 + }, + { + "epoch": 7.04, + "eval_loss": 0.26684999465942383, + "eval_runtime": 1448.0228, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "eval_wer": 42.464105107400115, + "step": 14000 + }, + { + "epoch": 8.0, + "learning_rate": 6.577468354430381e-06, + "loss": 0.0292, + "step": 14025 + }, + { + "epoch": 8.0, + "learning_rate": 6.57113924050633e-06, + "loss": 0.0192, + "step": 14050 + }, + { + "epoch": 8.0, + "learning_rate": 6.564810126582279e-06, + "loss": 0.027, + "step": 14075 + }, + { + "epoch": 8.0, + "learning_rate": 6.558481012658229e-06, + "loss": 0.03, + "step": 14100 + }, + { + "epoch": 8.0, + "learning_rate": 6.552151898734178e-06, + "loss": 0.0375, + "step": 14125 + }, + { + "epoch": 8.0, + "learning_rate": 6.5458227848101274e-06, + "loss": 0.0338, + "step": 14150 + }, + { + "epoch": 8.0, + "learning_rate": 6.539493670886077e-06, + "loss": 0.0305, + "step": 14175 + }, + { + "epoch": 8.0, + "learning_rate": 6.533164556962026e-06, + "loss": 0.0266, + "step": 14200 + }, + { + "epoch": 8.01, + "learning_rate": 6.526835443037976e-06, + "loss": 0.0378, + "step": 14225 + }, + { + "epoch": 8.01, + "learning_rate": 6.520506329113925e-06, + "loss": 0.0256, + "step": 14250 + }, + { + "epoch": 8.01, + "learning_rate": 6.514177215189874e-06, + "loss": 0.0203, + "step": 14275 + }, + { + "epoch": 8.01, + "learning_rate": 6.507848101265823e-06, + "loss": 0.0212, + "step": 14300 + }, + { + "epoch": 8.01, + "learning_rate": 6.501518987341773e-06, + "loss": 0.0204, + "step": 14325 + }, + { + "epoch": 8.01, + "learning_rate": 6.495189873417722e-06, + "loss": 0.0259, + "step": 14350 + }, + { + "epoch": 8.01, + "learning_rate": 6.4888607594936714e-06, + "loss": 0.0214, + "step": 14375 + }, + { + "epoch": 8.01, + "learning_rate": 6.482531645569621e-06, + "loss": 0.021, + "step": 14400 + }, + { + "epoch": 8.01, + "learning_rate": 6.47620253164557e-06, + "loss": 0.0241, + "step": 14425 + }, + { + "epoch": 8.01, + "learning_rate": 6.46987341772152e-06, + "loss": 0.0216, + "step": 14450 + }, + { + "epoch": 8.01, + "learning_rate": 6.463544303797469e-06, + "loss": 0.0209, + "step": 14475 + }, + { + "epoch": 8.01, + "learning_rate": 6.457215189873418e-06, + "loss": 0.0292, + "step": 14500 + }, + { + "epoch": 8.01, + "learning_rate": 6.450886075949367e-06, + "loss": 0.0286, + "step": 14525 + }, + { + "epoch": 8.01, + "learning_rate": 6.444556962025317e-06, + "loss": 0.0218, + "step": 14550 + }, + { + "epoch": 8.01, + "learning_rate": 6.438227848101266e-06, + "loss": 0.0209, + "step": 14575 + }, + { + "epoch": 8.01, + "learning_rate": 6.4318987341772154e-06, + "loss": 0.0244, + "step": 14600 + }, + { + "epoch": 8.02, + "learning_rate": 6.425569620253165e-06, + "loss": 0.0212, + "step": 14625 + }, + { + "epoch": 8.02, + "learning_rate": 6.419240506329114e-06, + "loss": 0.0155, + "step": 14650 + }, + { + "epoch": 8.02, + "learning_rate": 6.412911392405064e-06, + "loss": 0.0209, + "step": 14675 + }, + { + "epoch": 8.02, + "learning_rate": 6.406582278481013e-06, + "loss": 0.0203, + "step": 14700 + }, + { + "epoch": 8.02, + "learning_rate": 6.400253164556962e-06, + "loss": 0.0192, + "step": 14725 + }, + { + "epoch": 8.02, + "learning_rate": 6.393924050632911e-06, + "loss": 0.0216, + "step": 14750 + }, + { + "epoch": 8.02, + "learning_rate": 6.387594936708861e-06, + "loss": 0.012, + "step": 14775 + }, + { + "epoch": 8.02, + "learning_rate": 6.38126582278481e-06, + "loss": 0.0241, + "step": 14800 + }, + { + "epoch": 8.02, + "learning_rate": 6.37493670886076e-06, + "loss": 0.0192, + "step": 14825 + }, + { + "epoch": 8.02, + "learning_rate": 6.36860759493671e-06, + "loss": 0.0202, + "step": 14850 + }, + { + "epoch": 8.02, + "learning_rate": 6.362278481012659e-06, + "loss": 0.0257, + "step": 14875 + }, + { + "epoch": 8.02, + "learning_rate": 6.3559493670886086e-06, + "loss": 0.0218, + "step": 14900 + }, + { + "epoch": 8.02, + "learning_rate": 6.349620253164558e-06, + "loss": 0.0232, + "step": 14925 + }, + { + "epoch": 8.02, + "learning_rate": 6.3432911392405074e-06, + "loss": 0.03, + "step": 14950 + }, + { + "epoch": 8.02, + "learning_rate": 6.337215189873418e-06, + "loss": 0.0252, + "step": 14975 + }, + { + "epoch": 8.02, + "learning_rate": 6.3308860759493675e-06, + "loss": 0.0176, + "step": 15000 + }, + { + "epoch": 8.02, + "eval_loss": 0.2596448063850403, + "eval_runtime": 1417.6481, + "eval_samples_per_second": 7.344, + "eval_steps_per_second": 0.459, + "eval_wer": 28.52241499861786, + "step": 15000 + }, + { + "epoch": 8.03, + "learning_rate": 6.324556962025317e-06, + "loss": 0.0166, + "step": 15025 + }, + { + "epoch": 8.03, + "learning_rate": 6.318227848101266e-06, + "loss": 0.017, + "step": 15050 + }, + { + "epoch": 8.03, + "learning_rate": 6.311898734177216e-06, + "loss": 0.0291, + "step": 15075 + }, + { + "epoch": 8.03, + "learning_rate": 6.305569620253164e-06, + "loss": 0.0149, + "step": 15100 + }, + { + "epoch": 8.03, + "learning_rate": 6.299240506329114e-06, + "loss": 0.0223, + "step": 15125 + }, + { + "epoch": 8.03, + "learning_rate": 6.292911392405063e-06, + "loss": 0.0212, + "step": 15150 + }, + { + "epoch": 8.03, + "learning_rate": 6.286582278481013e-06, + "loss": 0.0125, + "step": 15175 + }, + { + "epoch": 8.03, + "learning_rate": 6.280253164556962e-06, + "loss": 0.0169, + "step": 15200 + }, + { + "epoch": 8.03, + "learning_rate": 6.273924050632912e-06, + "loss": 0.0244, + "step": 15225 + }, + { + "epoch": 8.03, + "learning_rate": 6.267594936708862e-06, + "loss": 0.015, + "step": 15250 + }, + { + "epoch": 8.03, + "learning_rate": 6.261265822784811e-06, + "loss": 0.0171, + "step": 15275 + }, + { + "epoch": 8.03, + "learning_rate": 6.254936708860761e-06, + "loss": 0.017, + "step": 15300 + }, + { + "epoch": 8.03, + "learning_rate": 6.24860759493671e-06, + "loss": 0.0162, + "step": 15325 + }, + { + "epoch": 8.03, + "learning_rate": 6.2422784810126594e-06, + "loss": 0.0133, + "step": 15350 + }, + { + "epoch": 8.03, + "learning_rate": 6.235949367088608e-06, + "loss": 0.0139, + "step": 15375 + }, + { + "epoch": 8.03, + "learning_rate": 6.2296202531645575e-06, + "loss": 0.0177, + "step": 15400 + }, + { + "epoch": 8.04, + "learning_rate": 6.223291139240507e-06, + "loss": 0.0207, + "step": 15425 + }, + { + "epoch": 8.04, + "learning_rate": 6.216962025316456e-06, + "loss": 0.0192, + "step": 15450 + }, + { + "epoch": 8.04, + "learning_rate": 6.210632911392406e-06, + "loss": 0.0176, + "step": 15475 + }, + { + "epoch": 8.04, + "learning_rate": 6.204303797468355e-06, + "loss": 0.0133, + "step": 15500 + }, + { + "epoch": 8.04, + "learning_rate": 6.197974683544305e-06, + "loss": 0.0199, + "step": 15525 + }, + { + "epoch": 8.04, + "learning_rate": 6.191645569620254e-06, + "loss": 0.0135, + "step": 15550 + }, + { + "epoch": 8.04, + "learning_rate": 6.1853164556962035e-06, + "loss": 0.0121, + "step": 15575 + }, + { + "epoch": 8.04, + "learning_rate": 6.178987341772152e-06, + "loss": 0.0152, + "step": 15600 + }, + { + "epoch": 8.04, + "learning_rate": 6.1726582278481015e-06, + "loss": 0.0201, + "step": 15625 + }, + { + "epoch": 8.04, + "learning_rate": 6.166329113924051e-06, + "loss": 0.0226, + "step": 15650 + }, + { + "epoch": 8.04, + "learning_rate": 6.16e-06, + "loss": 0.0222, + "step": 15675 + }, + { + "epoch": 8.04, + "learning_rate": 6.15367088607595e-06, + "loss": 0.0184, + "step": 15700 + }, + { + "epoch": 8.04, + "learning_rate": 6.147341772151899e-06, + "loss": 0.0219, + "step": 15725 + }, + { + "epoch": 8.04, + "learning_rate": 6.141012658227849e-06, + "loss": 0.0219, + "step": 15750 + }, + { + "epoch": 9.0, + "learning_rate": 6.134683544303798e-06, + "loss": 0.0208, + "step": 15775 + }, + { + "epoch": 9.0, + "learning_rate": 6.1283544303797475e-06, + "loss": 0.0143, + "step": 15800 + }, + { + "epoch": 9.0, + "learning_rate": 6.122025316455696e-06, + "loss": 0.0254, + "step": 15825 + }, + { + "epoch": 9.0, + "learning_rate": 6.1156962025316455e-06, + "loss": 0.0287, + "step": 15850 + }, + { + "epoch": 9.0, + "learning_rate": 6.109367088607595e-06, + "loss": 0.0252, + "step": 15875 + }, + { + "epoch": 9.0, + "learning_rate": 6.103037974683544e-06, + "loss": 0.0275, + "step": 15900 + }, + { + "epoch": 9.0, + "learning_rate": 6.096708860759494e-06, + "loss": 0.0193, + "step": 15925 + }, + { + "epoch": 9.0, + "learning_rate": 6.090379746835443e-06, + "loss": 0.0255, + "step": 15950 + }, + { + "epoch": 9.01, + "learning_rate": 6.084050632911393e-06, + "loss": 0.0248, + "step": 15975 + }, + { + "epoch": 9.01, + "learning_rate": 6.077721518987342e-06, + "loss": 0.0328, + "step": 16000 + }, + { + "epoch": 9.01, + "eval_loss": 0.2512108087539673, + "eval_runtime": 1474.2883, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.442, + "eval_wer": 35.66236849379665, + "step": 16000 + }, + { + "epoch": 9.01, + "learning_rate": 6.0713924050632915e-06, + "loss": 0.0181, + "step": 16025 + }, + { + "epoch": 9.01, + "learning_rate": 6.065063291139242e-06, + "loss": 0.0134, + "step": 16050 + }, + { + "epoch": 9.01, + "learning_rate": 6.058734177215191e-06, + "loss": 0.0203, + "step": 16075 + }, + { + "epoch": 9.01, + "learning_rate": 6.052405063291141e-06, + "loss": 0.0224, + "step": 16100 + }, + { + "epoch": 9.01, + "learning_rate": 6.046075949367089e-06, + "loss": 0.0164, + "step": 16125 + }, + { + "epoch": 9.01, + "learning_rate": 6.039746835443039e-06, + "loss": 0.019, + "step": 16150 + }, + { + "epoch": 9.01, + "learning_rate": 6.033417721518988e-06, + "loss": 0.0152, + "step": 16175 + }, + { + "epoch": 9.01, + "learning_rate": 6.0270886075949374e-06, + "loss": 0.0197, + "step": 16200 + }, + { + "epoch": 9.01, + "learning_rate": 6.020759493670887e-06, + "loss": 0.0188, + "step": 16225 + }, + { + "epoch": 9.01, + "learning_rate": 6.014430379746836e-06, + "loss": 0.0245, + "step": 16250 + }, + { + "epoch": 9.01, + "learning_rate": 6.008101265822786e-06, + "loss": 0.0202, + "step": 16275 + }, + { + "epoch": 9.01, + "learning_rate": 6.001772151898735e-06, + "loss": 0.0184, + "step": 16300 + }, + { + "epoch": 9.01, + "learning_rate": 5.995443037974685e-06, + "loss": 0.0158, + "step": 16325 + }, + { + "epoch": 9.01, + "learning_rate": 5.989113924050633e-06, + "loss": 0.0175, + "step": 16350 + }, + { + "epoch": 9.02, + "learning_rate": 5.982784810126583e-06, + "loss": 0.014, + "step": 16375 + }, + { + "epoch": 9.02, + "learning_rate": 5.976455696202532e-06, + "loss": 0.0149, + "step": 16400 + }, + { + "epoch": 9.02, + "learning_rate": 5.9701265822784815e-06, + "loss": 0.0188, + "step": 16425 + }, + { + "epoch": 9.02, + "learning_rate": 5.963797468354431e-06, + "loss": 0.0215, + "step": 16450 + }, + { + "epoch": 9.02, + "learning_rate": 5.95746835443038e-06, + "loss": 0.017, + "step": 16475 + }, + { + "epoch": 9.02, + "learning_rate": 5.95113924050633e-06, + "loss": 0.0095, + "step": 16500 + }, + { + "epoch": 9.02, + "learning_rate": 5.944810126582279e-06, + "loss": 0.0112, + "step": 16525 + }, + { + "epoch": 9.02, + "learning_rate": 5.938481012658229e-06, + "loss": 0.0198, + "step": 16550 + }, + { + "epoch": 9.02, + "learning_rate": 5.932151898734177e-06, + "loss": 0.0252, + "step": 16575 + }, + { + "epoch": 9.02, + "learning_rate": 5.925822784810127e-06, + "loss": 0.0226, + "step": 16600 + }, + { + "epoch": 9.02, + "learning_rate": 5.919493670886076e-06, + "loss": 0.0167, + "step": 16625 + }, + { + "epoch": 9.02, + "learning_rate": 5.9131645569620255e-06, + "loss": 0.0259, + "step": 16650 + }, + { + "epoch": 9.02, + "learning_rate": 5.906835443037975e-06, + "loss": 0.0229, + "step": 16675 + }, + { + "epoch": 9.02, + "learning_rate": 5.900506329113924e-06, + "loss": 0.0238, + "step": 16700 + }, + { + "epoch": 9.02, + "learning_rate": 5.894177215189874e-06, + "loss": 0.02, + "step": 16725 + }, + { + "epoch": 9.02, + "learning_rate": 5.887848101265823e-06, + "loss": 0.0182, + "step": 16750 + }, + { + "epoch": 9.03, + "learning_rate": 5.881518987341773e-06, + "loss": 0.0157, + "step": 16775 + }, + { + "epoch": 9.03, + "learning_rate": 5.875189873417721e-06, + "loss": 0.0145, + "step": 16800 + }, + { + "epoch": 9.03, + "learning_rate": 5.868860759493671e-06, + "loss": 0.0166, + "step": 16825 + }, + { + "epoch": 9.03, + "learning_rate": 5.86253164556962e-06, + "loss": 0.0108, + "step": 16850 + }, + { + "epoch": 9.03, + "learning_rate": 5.85620253164557e-06, + "loss": 0.014, + "step": 16875 + }, + { + "epoch": 9.03, + "learning_rate": 5.84987341772152e-06, + "loss": 0.0167, + "step": 16900 + }, + { + "epoch": 9.03, + "learning_rate": 5.843544303797469e-06, + "loss": 0.0121, + "step": 16925 + }, + { + "epoch": 9.03, + "learning_rate": 5.837215189873419e-06, + "loss": 0.0149, + "step": 16950 + }, + { + "epoch": 9.03, + "learning_rate": 5.831139240506329e-06, + "loss": 0.0143, + "step": 16975 + }, + { + "epoch": 9.03, + "learning_rate": 5.824810126582279e-06, + "loss": 0.0139, + "step": 17000 + }, + { + "epoch": 9.03, + "eval_loss": 0.2688957154750824, + "eval_runtime": 1423.1196, + "eval_samples_per_second": 7.316, + "eval_steps_per_second": 0.457, + "eval_wer": 25.920746678807788, + "step": 17000 + }, + { + "epoch": 9.03, + "learning_rate": 5.818481012658228e-06, + "loss": 0.0169, + "step": 17025 + }, + { + "epoch": 9.03, + "learning_rate": 5.8121518987341775e-06, + "loss": 0.013, + "step": 17050 + }, + { + "epoch": 9.03, + "learning_rate": 5.805822784810127e-06, + "loss": 0.0123, + "step": 17075 + }, + { + "epoch": 9.03, + "learning_rate": 5.799493670886076e-06, + "loss": 0.0161, + "step": 17100 + }, + { + "epoch": 9.03, + "learning_rate": 5.793164556962026e-06, + "loss": 0.0114, + "step": 17125 + }, + { + "epoch": 9.03, + "learning_rate": 5.786835443037974e-06, + "loss": 0.0194, + "step": 17150 + }, + { + "epoch": 9.04, + "learning_rate": 5.780506329113924e-06, + "loss": 0.0179, + "step": 17175 + }, + { + "epoch": 9.04, + "learning_rate": 5.774177215189873e-06, + "loss": 0.0165, + "step": 17200 + }, + { + "epoch": 9.04, + "learning_rate": 5.767848101265823e-06, + "loss": 0.0165, + "step": 17225 + }, + { + "epoch": 9.04, + "learning_rate": 5.761518987341773e-06, + "loss": 0.0125, + "step": 17250 + }, + { + "epoch": 9.04, + "learning_rate": 5.755189873417722e-06, + "loss": 0.0172, + "step": 17275 + }, + { + "epoch": 9.04, + "learning_rate": 5.748860759493672e-06, + "loss": 0.0158, + "step": 17300 + }, + { + "epoch": 9.04, + "learning_rate": 5.742531645569621e-06, + "loss": 0.0174, + "step": 17325 + }, + { + "epoch": 9.04, + "learning_rate": 5.736202531645571e-06, + "loss": 0.0162, + "step": 17350 + }, + { + "epoch": 9.04, + "learning_rate": 5.72987341772152e-06, + "loss": 0.0203, + "step": 17375 + }, + { + "epoch": 9.04, + "learning_rate": 5.7235443037974695e-06, + "loss": 0.0157, + "step": 17400 + }, + { + "epoch": 9.04, + "learning_rate": 5.717215189873419e-06, + "loss": 0.018, + "step": 17425 + }, + { + "epoch": 9.04, + "learning_rate": 5.7108860759493675e-06, + "loss": 0.0199, + "step": 17450 + }, + { + "epoch": 9.04, + "learning_rate": 5.704556962025317e-06, + "loss": 0.0153, + "step": 17475 + }, + { + "epoch": 9.04, + "learning_rate": 5.698227848101266e-06, + "loss": 0.0234, + "step": 17500 + }, + { + "epoch": 10.0, + "learning_rate": 5.691898734177216e-06, + "loss": 0.0168, + "step": 17525 + }, + { + "epoch": 10.0, + "learning_rate": 5.685569620253165e-06, + "loss": 0.0214, + "step": 17550 + }, + { + "epoch": 10.0, + "learning_rate": 5.679240506329115e-06, + "loss": 0.0185, + "step": 17575 + }, + { + "epoch": 10.0, + "learning_rate": 5.672911392405064e-06, + "loss": 0.0228, + "step": 17600 + }, + { + "epoch": 10.0, + "learning_rate": 5.6665822784810135e-06, + "loss": 0.024, + "step": 17625 + }, + { + "epoch": 10.0, + "learning_rate": 5.660253164556963e-06, + "loss": 0.0235, + "step": 17650 + }, + { + "epoch": 10.0, + "learning_rate": 5.6539240506329115e-06, + "loss": 0.0262, + "step": 17675 + }, + { + "epoch": 10.0, + "learning_rate": 5.647594936708861e-06, + "loss": 0.0214, + "step": 17700 + }, + { + "epoch": 10.01, + "learning_rate": 5.64126582278481e-06, + "loss": 0.0279, + "step": 17725 + }, + { + "epoch": 10.01, + "learning_rate": 5.63493670886076e-06, + "loss": 0.0248, + "step": 17750 + }, + { + "epoch": 10.01, + "learning_rate": 5.628607594936709e-06, + "loss": 0.0155, + "step": 17775 + }, + { + "epoch": 10.01, + "learning_rate": 5.622278481012659e-06, + "loss": 0.017, + "step": 17800 + }, + { + "epoch": 10.01, + "learning_rate": 5.615949367088608e-06, + "loss": 0.0205, + "step": 17825 + }, + { + "epoch": 10.01, + "learning_rate": 5.6096202531645575e-06, + "loss": 0.021, + "step": 17850 + }, + { + "epoch": 10.01, + "learning_rate": 5.603291139240507e-06, + "loss": 0.0178, + "step": 17875 + }, + { + "epoch": 10.01, + "learning_rate": 5.5969620253164555e-06, + "loss": 0.0169, + "step": 17900 + }, + { + "epoch": 10.01, + "learning_rate": 5.590632911392405e-06, + "loss": 0.0158, + "step": 17925 + }, + { + "epoch": 10.01, + "learning_rate": 5.584303797468354e-06, + "loss": 0.0195, + "step": 17950 + }, + { + "epoch": 10.01, + "learning_rate": 5.577974683544304e-06, + "loss": 0.0195, + "step": 17975 + }, + { + "epoch": 10.01, + "learning_rate": 5.571645569620253e-06, + "loss": 0.0187, + "step": 18000 + }, + { + "epoch": 10.01, + "eval_loss": 0.2605174481868744, + "eval_runtime": 1553.7854, + "eval_samples_per_second": 6.7, + "eval_steps_per_second": 0.419, + "eval_wer": 25.330493178750874, + "step": 18000 + }, + { + "epoch": 10.01, + "learning_rate": 5.565316455696203e-06, + "loss": 0.0214, + "step": 18025 + }, + { + "epoch": 10.01, + "learning_rate": 5.558987341772152e-06, + "loss": 0.0193, + "step": 18050 + }, + { + "epoch": 10.01, + "learning_rate": 5.552658227848102e-06, + "loss": 0.0168, + "step": 18075 + }, + { + "epoch": 10.01, + "learning_rate": 5.546329113924052e-06, + "loss": 0.0147, + "step": 18100 + }, + { + "epoch": 10.02, + "learning_rate": 5.540000000000001e-06, + "loss": 0.0192, + "step": 18125 + }, + { + "epoch": 10.02, + "learning_rate": 5.533670886075951e-06, + "loss": 0.014, + "step": 18150 + }, + { + "epoch": 10.02, + "learning_rate": 5.527341772151899e-06, + "loss": 0.0163, + "step": 18175 + }, + { + "epoch": 10.02, + "learning_rate": 5.521012658227849e-06, + "loss": 0.0139, + "step": 18200 + }, + { + "epoch": 10.02, + "learning_rate": 5.514683544303798e-06, + "loss": 0.0168, + "step": 18225 + }, + { + "epoch": 10.02, + "learning_rate": 5.5083544303797475e-06, + "loss": 0.0118, + "step": 18250 + }, + { + "epoch": 10.02, + "learning_rate": 5.502025316455697e-06, + "loss": 0.0119, + "step": 18275 + }, + { + "epoch": 10.02, + "learning_rate": 5.495696202531646e-06, + "loss": 0.0136, + "step": 18300 + }, + { + "epoch": 10.02, + "learning_rate": 5.489367088607596e-06, + "loss": 0.0132, + "step": 18325 + }, + { + "epoch": 10.02, + "learning_rate": 5.483037974683545e-06, + "loss": 0.0148, + "step": 18350 + }, + { + "epoch": 10.02, + "learning_rate": 5.476708860759495e-06, + "loss": 0.015, + "step": 18375 + }, + { + "epoch": 10.02, + "learning_rate": 5.470379746835443e-06, + "loss": 0.0154, + "step": 18400 + }, + { + "epoch": 10.02, + "learning_rate": 5.464050632911393e-06, + "loss": 0.0139, + "step": 18425 + }, + { + "epoch": 10.02, + "learning_rate": 5.457721518987342e-06, + "loss": 0.0239, + "step": 18450 + }, + { + "epoch": 10.02, + "learning_rate": 5.4513924050632915e-06, + "loss": 0.0203, + "step": 18475 + }, + { + "epoch": 10.02, + "learning_rate": 5.445063291139241e-06, + "loss": 0.0199, + "step": 18500 + }, + { + "epoch": 10.03, + "learning_rate": 5.43873417721519e-06, + "loss": 0.013, + "step": 18525 + }, + { + "epoch": 10.03, + "learning_rate": 5.43240506329114e-06, + "loss": 0.0128, + "step": 18550 + }, + { + "epoch": 10.03, + "learning_rate": 5.426075949367089e-06, + "loss": 0.0161, + "step": 18575 + }, + { + "epoch": 10.03, + "learning_rate": 5.419746835443039e-06, + "loss": 0.0129, + "step": 18600 + }, + { + "epoch": 10.03, + "learning_rate": 5.413417721518987e-06, + "loss": 0.0124, + "step": 18625 + }, + { + "epoch": 10.03, + "learning_rate": 5.407088607594937e-06, + "loss": 0.0161, + "step": 18650 + }, + { + "epoch": 10.03, + "learning_rate": 5.400759493670886e-06, + "loss": 0.0104, + "step": 18675 + }, + { + "epoch": 10.03, + "learning_rate": 5.3944303797468355e-06, + "loss": 0.0109, + "step": 18700 + }, + { + "epoch": 10.03, + "learning_rate": 5.388101265822785e-06, + "loss": 0.0128, + "step": 18725 + }, + { + "epoch": 10.03, + "learning_rate": 5.381772151898734e-06, + "loss": 0.014, + "step": 18750 + }, + { + "epoch": 10.03, + "learning_rate": 5.375443037974684e-06, + "loss": 0.0159, + "step": 18775 + }, + { + "epoch": 10.03, + "learning_rate": 5.369113924050633e-06, + "loss": 0.017, + "step": 18800 + }, + { + "epoch": 10.03, + "learning_rate": 5.362784810126583e-06, + "loss": 0.012, + "step": 18825 + }, + { + "epoch": 10.03, + "learning_rate": 5.356455696202531e-06, + "loss": 0.0165, + "step": 18850 + }, + { + "epoch": 10.03, + "learning_rate": 5.350126582278481e-06, + "loss": 0.0153, + "step": 18875 + }, + { + "epoch": 10.03, + "learning_rate": 5.343797468354432e-06, + "loss": 0.0182, + "step": 18900 + }, + { + "epoch": 10.04, + "learning_rate": 5.33746835443038e-06, + "loss": 0.0194, + "step": 18925 + }, + { + "epoch": 10.04, + "learning_rate": 5.33113924050633e-06, + "loss": 0.0144, + "step": 18950 + }, + { + "epoch": 10.04, + "learning_rate": 5.324810126582279e-06, + "loss": 0.0157, + "step": 18975 + }, + { + "epoch": 10.04, + "learning_rate": 5.318481012658229e-06, + "loss": 0.0111, + "step": 19000 + }, + { + "epoch": 10.04, + "eval_loss": 0.2696678638458252, + "eval_runtime": 1463.5986, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.445, + "eval_wer": 24.79877721588969, + "step": 19000 + }, + { + "epoch": 10.04, + "learning_rate": 5.312151898734178e-06, + "loss": 0.0176, + "step": 19025 + }, + { + "epoch": 10.04, + "learning_rate": 5.3058227848101275e-06, + "loss": 0.0144, + "step": 19050 + }, + { + "epoch": 10.04, + "learning_rate": 5.299493670886077e-06, + "loss": 0.0129, + "step": 19075 + }, + { + "epoch": 10.04, + "learning_rate": 5.293164556962026e-06, + "loss": 0.0162, + "step": 19100 + }, + { + "epoch": 10.04, + "learning_rate": 5.286835443037976e-06, + "loss": 0.0128, + "step": 19125 + }, + { + "epoch": 10.04, + "learning_rate": 5.280506329113924e-06, + "loss": 0.0152, + "step": 19150 + }, + { + "epoch": 10.04, + "learning_rate": 5.274177215189874e-06, + "loss": 0.0183, + "step": 19175 + }, + { + "epoch": 10.04, + "learning_rate": 5.267848101265823e-06, + "loss": 0.0198, + "step": 19200 + }, + { + "epoch": 10.04, + "learning_rate": 5.261518987341773e-06, + "loss": 0.0179, + "step": 19225 + }, + { + "epoch": 10.04, + "learning_rate": 5.255443037974683e-06, + "loss": 0.019, + "step": 19250 + }, + { + "epoch": 11.0, + "learning_rate": 5.249113924050633e-06, + "loss": 0.0159, + "step": 19275 + }, + { + "epoch": 11.0, + "learning_rate": 5.242784810126583e-06, + "loss": 0.0156, + "step": 19300 + }, + { + "epoch": 11.0, + "learning_rate": 5.236455696202532e-06, + "loss": 0.015, + "step": 19325 + }, + { + "epoch": 11.0, + "learning_rate": 5.230126582278482e-06, + "loss": 0.0162, + "step": 19350 + }, + { + "epoch": 11.0, + "learning_rate": 5.223797468354431e-06, + "loss": 0.0262, + "step": 19375 + }, + { + "epoch": 11.0, + "learning_rate": 5.217468354430381e-06, + "loss": 0.0207, + "step": 19400 + }, + { + "epoch": 11.0, + "learning_rate": 5.21113924050633e-06, + "loss": 0.0222, + "step": 19425 + }, + { + "epoch": 11.0, + "learning_rate": 5.2048101265822795e-06, + "loss": 0.016, + "step": 19450 + }, + { + "epoch": 11.01, + "learning_rate": 5.198481012658229e-06, + "loss": 0.0176, + "step": 19475 + }, + { + "epoch": 11.01, + "learning_rate": 5.192151898734178e-06, + "loss": 0.0209, + "step": 19500 + }, + { + "epoch": 11.01, + "learning_rate": 5.185822784810127e-06, + "loss": 0.0212, + "step": 19525 + }, + { + "epoch": 11.01, + "learning_rate": 5.179493670886076e-06, + "loss": 0.0143, + "step": 19550 + }, + { + "epoch": 11.01, + "learning_rate": 5.173164556962026e-06, + "loss": 0.0224, + "step": 19575 + }, + { + "epoch": 11.01, + "learning_rate": 5.166835443037975e-06, + "loss": 0.0171, + "step": 19600 + }, + { + "epoch": 11.01, + "learning_rate": 5.160506329113925e-06, + "loss": 0.0166, + "step": 19625 + }, + { + "epoch": 11.01, + "learning_rate": 5.154177215189874e-06, + "loss": 0.0135, + "step": 19650 + }, + { + "epoch": 11.01, + "learning_rate": 5.1478481012658235e-06, + "loss": 0.0164, + "step": 19675 + }, + { + "epoch": 11.01, + "learning_rate": 5.141518987341773e-06, + "loss": 0.0154, + "step": 19700 + }, + { + "epoch": 11.01, + "learning_rate": 5.135189873417722e-06, + "loss": 0.0183, + "step": 19725 + }, + { + "epoch": 11.01, + "learning_rate": 5.128860759493671e-06, + "loss": 0.0149, + "step": 19750 + }, + { + "epoch": 11.01, + "learning_rate": 5.12253164556962e-06, + "loss": 0.0176, + "step": 19775 + }, + { + "epoch": 11.01, + "learning_rate": 5.11620253164557e-06, + "loss": 0.0135, + "step": 19800 + }, + { + "epoch": 11.01, + "learning_rate": 5.109873417721519e-06, + "loss": 0.0121, + "step": 19825 + }, + { + "epoch": 11.01, + "learning_rate": 5.103544303797469e-06, + "loss": 0.0158, + "step": 19850 + }, + { + "epoch": 11.02, + "learning_rate": 5.097215189873418e-06, + "loss": 0.0191, + "step": 19875 + }, + { + "epoch": 11.02, + "learning_rate": 5.0908860759493675e-06, + "loss": 0.0127, + "step": 19900 + }, + { + "epoch": 11.02, + "learning_rate": 5.084556962025317e-06, + "loss": 0.0149, + "step": 19925 + }, + { + "epoch": 11.02, + "learning_rate": 5.0782278481012655e-06, + "loss": 0.0152, + "step": 19950 + }, + { + "epoch": 11.02, + "learning_rate": 5.071898734177215e-06, + "loss": 0.0139, + "step": 19975 + }, + { + "epoch": 11.02, + "learning_rate": 5.065569620253164e-06, + "loss": 0.015, + "step": 20000 + }, + { + "epoch": 11.02, + "eval_loss": 0.2623814344406128, + "eval_runtime": 1462.1621, + "eval_samples_per_second": 7.12, + "eval_steps_per_second": 0.445, + "eval_wer": 24.392266540919362, + "step": 20000 + }, + { + "epoch": 11.02, + "learning_rate": 5.059240506329114e-06, + "loss": 0.0094, + "step": 20025 + }, + { + "epoch": 11.02, + "learning_rate": 5.052911392405063e-06, + "loss": 0.0108, + "step": 20050 + }, + { + "epoch": 11.02, + "learning_rate": 5.046582278481013e-06, + "loss": 0.0155, + "step": 20075 + }, + { + "epoch": 11.02, + "learning_rate": 5.040253164556962e-06, + "loss": 0.0103, + "step": 20100 + }, + { + "epoch": 11.02, + "learning_rate": 5.033924050632912e-06, + "loss": 0.0109, + "step": 20125 + }, + { + "epoch": 11.02, + "learning_rate": 5.027594936708862e-06, + "loss": 0.0188, + "step": 20150 + }, + { + "epoch": 11.02, + "learning_rate": 5.021265822784811e-06, + "loss": 0.0178, + "step": 20175 + }, + { + "epoch": 11.02, + "learning_rate": 5.014936708860761e-06, + "loss": 0.0262, + "step": 20200 + }, + { + "epoch": 11.02, + "learning_rate": 5.00860759493671e-06, + "loss": 0.0194, + "step": 20225 + }, + { + "epoch": 11.02, + "learning_rate": 5.002278481012659e-06, + "loss": 0.0107, + "step": 20250 + }, + { + "epoch": 11.03, + "learning_rate": 4.995949367088608e-06, + "loss": 0.0147, + "step": 20275 + }, + { + "epoch": 11.03, + "learning_rate": 4.9896202531645575e-06, + "loss": 0.0084, + "step": 20300 + }, + { + "epoch": 11.03, + "learning_rate": 4.983291139240507e-06, + "loss": 0.0214, + "step": 20325 + }, + { + "epoch": 11.03, + "learning_rate": 4.976962025316456e-06, + "loss": 0.014, + "step": 20350 + }, + { + "epoch": 11.03, + "learning_rate": 4.970632911392406e-06, + "loss": 0.0129, + "step": 20375 + }, + { + "epoch": 11.03, + "learning_rate": 4.964303797468355e-06, + "loss": 0.0145, + "step": 20400 + }, + { + "epoch": 11.03, + "learning_rate": 4.957974683544305e-06, + "loss": 0.0109, + "step": 20425 + }, + { + "epoch": 11.03, + "learning_rate": 4.951645569620254e-06, + "loss": 0.0117, + "step": 20450 + }, + { + "epoch": 11.03, + "learning_rate": 4.945316455696203e-06, + "loss": 0.0137, + "step": 20475 + }, + { + "epoch": 11.03, + "learning_rate": 4.938987341772152e-06, + "loss": 0.0086, + "step": 20500 + }, + { + "epoch": 11.03, + "learning_rate": 4.9326582278481015e-06, + "loss": 0.0128, + "step": 20525 + }, + { + "epoch": 11.03, + "learning_rate": 4.926329113924051e-06, + "loss": 0.0101, + "step": 20550 + }, + { + "epoch": 11.03, + "learning_rate": 4.92e-06, + "loss": 0.0132, + "step": 20575 + }, + { + "epoch": 11.03, + "learning_rate": 4.91367088607595e-06, + "loss": 0.0147, + "step": 20600 + }, + { + "epoch": 11.03, + "learning_rate": 4.907341772151899e-06, + "loss": 0.0077, + "step": 20625 + }, + { + "epoch": 11.03, + "learning_rate": 4.901012658227849e-06, + "loss": 0.0125, + "step": 20650 + }, + { + "epoch": 11.04, + "learning_rate": 4.894683544303798e-06, + "loss": 0.0115, + "step": 20675 + }, + { + "epoch": 11.04, + "learning_rate": 4.888354430379747e-06, + "loss": 0.0136, + "step": 20700 + }, + { + "epoch": 11.04, + "learning_rate": 4.882025316455696e-06, + "loss": 0.0098, + "step": 20725 + }, + { + "epoch": 11.04, + "learning_rate": 4.875696202531646e-06, + "loss": 0.0154, + "step": 20750 + }, + { + "epoch": 11.04, + "learning_rate": 4.869367088607596e-06, + "loss": 0.0126, + "step": 20775 + }, + { + "epoch": 11.04, + "learning_rate": 4.863037974683545e-06, + "loss": 0.0116, + "step": 20800 + }, + { + "epoch": 11.04, + "learning_rate": 4.856708860759495e-06, + "loss": 0.0133, + "step": 20825 + }, + { + "epoch": 11.04, + "learning_rate": 4.850379746835443e-06, + "loss": 0.0106, + "step": 20850 + }, + { + "epoch": 11.04, + "learning_rate": 4.844050632911393e-06, + "loss": 0.0151, + "step": 20875 + }, + { + "epoch": 11.04, + "learning_rate": 4.837721518987342e-06, + "loss": 0.0148, + "step": 20900 + }, + { + "epoch": 11.04, + "learning_rate": 4.8313924050632915e-06, + "loss": 0.0173, + "step": 20925 + }, + { + "epoch": 11.04, + "learning_rate": 4.825063291139241e-06, + "loss": 0.0126, + "step": 20950 + }, + { + "epoch": 11.04, + "learning_rate": 4.81873417721519e-06, + "loss": 0.0163, + "step": 20975 + }, + { + "epoch": 11.04, + "learning_rate": 4.81240506329114e-06, + "loss": 0.0203, + "step": 21000 + }, + { + "epoch": 11.04, + "eval_loss": 0.27788785099983215, + "eval_runtime": 1546.852, + "eval_samples_per_second": 6.73, + "eval_steps_per_second": 0.421, + "eval_wer": 25.284963983154196, + "step": 21000 + }, + { + "epoch": 12.0, + "learning_rate": 4.806075949367089e-06, + "loss": 0.0176, + "step": 21025 + }, + { + "epoch": 12.0, + "learning_rate": 4.799746835443039e-06, + "loss": 0.0157, + "step": 21050 + }, + { + "epoch": 12.0, + "learning_rate": 4.793417721518987e-06, + "loss": 0.0192, + "step": 21075 + }, + { + "epoch": 12.0, + "learning_rate": 4.787088607594937e-06, + "loss": 0.0124, + "step": 21100 + }, + { + "epoch": 12.0, + "learning_rate": 4.780759493670886e-06, + "loss": 0.0194, + "step": 21125 + }, + { + "epoch": 12.0, + "learning_rate": 4.774430379746836e-06, + "loss": 0.0138, + "step": 21150 + }, + { + "epoch": 12.0, + "learning_rate": 4.768101265822786e-06, + "loss": 0.0191, + "step": 21175 + }, + { + "epoch": 12.0, + "learning_rate": 4.761772151898735e-06, + "loss": 0.0124, + "step": 21200 + }, + { + "epoch": 12.01, + "learning_rate": 4.755443037974684e-06, + "loss": 0.0175, + "step": 21225 + }, + { + "epoch": 12.01, + "learning_rate": 4.749113924050633e-06, + "loss": 0.0118, + "step": 21250 + }, + { + "epoch": 12.01, + "learning_rate": 4.742784810126583e-06, + "loss": 0.0172, + "step": 21275 + }, + { + "epoch": 12.01, + "learning_rate": 4.736455696202532e-06, + "loss": 0.0136, + "step": 21300 + }, + { + "epoch": 12.01, + "learning_rate": 4.7301265822784815e-06, + "loss": 0.018, + "step": 21325 + }, + { + "epoch": 12.01, + "learning_rate": 4.723797468354431e-06, + "loss": 0.0184, + "step": 21350 + }, + { + "epoch": 12.01, + "learning_rate": 4.71746835443038e-06, + "loss": 0.0166, + "step": 21375 + }, + { + "epoch": 12.01, + "learning_rate": 4.71113924050633e-06, + "loss": 0.0107, + "step": 21400 + }, + { + "epoch": 12.01, + "learning_rate": 4.704810126582279e-06, + "loss": 0.0125, + "step": 21425 + }, + { + "epoch": 12.01, + "learning_rate": 4.698481012658228e-06, + "loss": 0.0098, + "step": 21450 + }, + { + "epoch": 12.01, + "learning_rate": 4.692151898734177e-06, + "loss": 0.0103, + "step": 21475 + }, + { + "epoch": 12.01, + "learning_rate": 4.685822784810127e-06, + "loss": 0.0211, + "step": 21500 + }, + { + "epoch": 12.01, + "learning_rate": 4.679493670886076e-06, + "loss": 0.0125, + "step": 21525 + }, + { + "epoch": 12.01, + "learning_rate": 4.6731645569620255e-06, + "loss": 0.0083, + "step": 21550 + }, + { + "epoch": 12.01, + "learning_rate": 4.666835443037975e-06, + "loss": 0.0156, + "step": 21575 + }, + { + "epoch": 12.01, + "learning_rate": 4.660506329113924e-06, + "loss": 0.0137, + "step": 21600 + }, + { + "epoch": 12.02, + "learning_rate": 4.654177215189874e-06, + "loss": 0.0137, + "step": 21625 + }, + { + "epoch": 12.02, + "learning_rate": 4.647848101265823e-06, + "loss": 0.0092, + "step": 21650 + }, + { + "epoch": 12.02, + "learning_rate": 4.641518987341773e-06, + "loss": 0.0101, + "step": 21675 + }, + { + "epoch": 12.02, + "learning_rate": 4.635189873417722e-06, + "loss": 0.0118, + "step": 21700 + }, + { + "epoch": 12.02, + "learning_rate": 4.6288607594936715e-06, + "loss": 0.0119, + "step": 21725 + }, + { + "epoch": 12.02, + "learning_rate": 4.622531645569621e-06, + "loss": 0.009, + "step": 21750 + }, + { + "epoch": 12.02, + "learning_rate": 4.61620253164557e-06, + "loss": 0.014, + "step": 21775 + }, + { + "epoch": 12.02, + "learning_rate": 4.609873417721519e-06, + "loss": 0.0108, + "step": 21800 + }, + { + "epoch": 12.02, + "learning_rate": 4.603544303797468e-06, + "loss": 0.0117, + "step": 21825 + }, + { + "epoch": 12.02, + "learning_rate": 4.597215189873418e-06, + "loss": 0.0147, + "step": 21850 + }, + { + "epoch": 12.02, + "learning_rate": 4.590886075949367e-06, + "loss": 0.0133, + "step": 21875 + }, + { + "epoch": 12.02, + "learning_rate": 4.584556962025317e-06, + "loss": 0.0131, + "step": 21900 + }, + { + "epoch": 12.02, + "learning_rate": 4.578227848101266e-06, + "loss": 0.0156, + "step": 21925 + }, + { + "epoch": 12.02, + "learning_rate": 4.5718987341772155e-06, + "loss": 0.0211, + "step": 21950 + }, + { + "epoch": 12.02, + "learning_rate": 4.565569620253165e-06, + "loss": 0.0127, + "step": 21975 + }, + { + "epoch": 12.02, + "learning_rate": 4.559240506329114e-06, + "loss": 0.011, + "step": 22000 + }, + { + "epoch": 12.02, + "eval_loss": 0.26152506470680237, + "eval_runtime": 1497.9019, + "eval_samples_per_second": 6.95, + "eval_steps_per_second": 0.435, + "eval_wer": 26.236198962584755, + "step": 22000 + }, + { + "epoch": 12.03, + "learning_rate": 4.552911392405064e-06, + "loss": 0.0164, + "step": 22025 + }, + { + "epoch": 12.03, + "learning_rate": 4.546582278481013e-06, + "loss": 0.01, + "step": 22050 + }, + { + "epoch": 12.03, + "learning_rate": 4.540253164556963e-06, + "loss": 0.0127, + "step": 22075 + }, + { + "epoch": 12.03, + "learning_rate": 4.533924050632912e-06, + "loss": 0.0089, + "step": 22100 + }, + { + "epoch": 12.03, + "learning_rate": 4.5275949367088615e-06, + "loss": 0.0077, + "step": 22125 + }, + { + "epoch": 12.03, + "learning_rate": 4.521265822784811e-06, + "loss": 0.0098, + "step": 22150 + }, + { + "epoch": 12.03, + "learning_rate": 4.5149367088607595e-06, + "loss": 0.0084, + "step": 22175 + }, + { + "epoch": 12.03, + "learning_rate": 4.508607594936709e-06, + "loss": 0.0096, + "step": 22200 + }, + { + "epoch": 12.03, + "learning_rate": 4.502278481012658e-06, + "loss": 0.0097, + "step": 22225 + }, + { + "epoch": 12.03, + "learning_rate": 4.495949367088608e-06, + "loss": 0.0114, + "step": 22250 + }, + { + "epoch": 12.03, + "learning_rate": 4.489620253164557e-06, + "loss": 0.0109, + "step": 22275 + }, + { + "epoch": 12.03, + "learning_rate": 4.483291139240507e-06, + "loss": 0.0096, + "step": 22300 + }, + { + "epoch": 12.03, + "learning_rate": 4.476962025316456e-06, + "loss": 0.0117, + "step": 22325 + }, + { + "epoch": 12.03, + "learning_rate": 4.4706329113924055e-06, + "loss": 0.0105, + "step": 22350 + }, + { + "epoch": 12.03, + "learning_rate": 4.464303797468355e-06, + "loss": 0.0103, + "step": 22375 + }, + { + "epoch": 12.03, + "learning_rate": 4.457974683544304e-06, + "loss": 0.0085, + "step": 22400 + }, + { + "epoch": 12.04, + "learning_rate": 4.451645569620254e-06, + "loss": 0.0129, + "step": 22425 + }, + { + "epoch": 12.04, + "learning_rate": 4.445316455696203e-06, + "loss": 0.0188, + "step": 22450 + }, + { + "epoch": 12.04, + "learning_rate": 4.438987341772153e-06, + "loss": 0.013, + "step": 22475 + }, + { + "epoch": 12.04, + "learning_rate": 4.432658227848102e-06, + "loss": 0.0095, + "step": 22500 + }, + { + "epoch": 12.04, + "learning_rate": 4.4263291139240515e-06, + "loss": 0.0111, + "step": 22525 + }, + { + "epoch": 12.04, + "learning_rate": 4.42e-06, + "loss": 0.0115, + "step": 22550 + }, + { + "epoch": 12.04, + "learning_rate": 4.4136708860759495e-06, + "loss": 0.0096, + "step": 22575 + }, + { + "epoch": 12.04, + "learning_rate": 4.407341772151899e-06, + "loss": 0.0108, + "step": 22600 + }, + { + "epoch": 12.04, + "learning_rate": 4.401012658227848e-06, + "loss": 0.0118, + "step": 22625 + }, + { + "epoch": 12.04, + "learning_rate": 4.394683544303798e-06, + "loss": 0.0136, + "step": 22650 + }, + { + "epoch": 12.04, + "learning_rate": 4.388354430379747e-06, + "loss": 0.0176, + "step": 22675 + }, + { + "epoch": 12.04, + "learning_rate": 4.382025316455697e-06, + "loss": 0.011, + "step": 22700 + }, + { + "epoch": 12.04, + "learning_rate": 4.375696202531646e-06, + "loss": 0.0125, + "step": 22725 + }, + { + "epoch": 12.04, + "learning_rate": 4.3693670886075955e-06, + "loss": 0.0164, + "step": 22750 + }, + { + "epoch": 13.0, + "learning_rate": 4.363037974683544e-06, + "loss": 0.0192, + "step": 22775 + }, + { + "epoch": 13.0, + "learning_rate": 4.356708860759494e-06, + "loss": 0.0145, + "step": 22800 + }, + { + "epoch": 13.0, + "learning_rate": 4.350379746835444e-06, + "loss": 0.0174, + "step": 22825 + }, + { + "epoch": 13.0, + "learning_rate": 4.344050632911393e-06, + "loss": 0.0142, + "step": 22850 + }, + { + "epoch": 13.0, + "learning_rate": 4.337721518987343e-06, + "loss": 0.0228, + "step": 22875 + }, + { + "epoch": 13.0, + "learning_rate": 4.331392405063292e-06, + "loss": 0.0201, + "step": 22900 + }, + { + "epoch": 13.0, + "learning_rate": 4.325063291139241e-06, + "loss": 0.0188, + "step": 22925 + }, + { + "epoch": 13.0, + "learning_rate": 4.31873417721519e-06, + "loss": 0.0185, + "step": 22950 + }, + { + "epoch": 13.01, + "learning_rate": 4.3124050632911395e-06, + "loss": 0.0186, + "step": 22975 + }, + { + "epoch": 13.01, + "learning_rate": 4.306075949367089e-06, + "loss": 0.0191, + "step": 23000 + }, + { + "epoch": 13.01, + "eval_loss": 0.25467297434806824, + "eval_runtime": 1486.1182, + "eval_samples_per_second": 7.005, + "eval_steps_per_second": 0.438, + "eval_wer": 23.982503780549276, + "step": 23000 + }, + { + "epoch": 13.01, + "learning_rate": 4.299746835443038e-06, + "loss": 0.0145, + "step": 23025 + }, + { + "epoch": 13.01, + "learning_rate": 4.293417721518988e-06, + "loss": 0.0106, + "step": 23050 + }, + { + "epoch": 13.01, + "learning_rate": 4.287088607594937e-06, + "loss": 0.0135, + "step": 23075 + }, + { + "epoch": 13.01, + "learning_rate": 4.280759493670887e-06, + "loss": 0.0173, + "step": 23100 + }, + { + "epoch": 13.01, + "learning_rate": 4.274430379746836e-06, + "loss": 0.0126, + "step": 23125 + }, + { + "epoch": 13.01, + "learning_rate": 4.268101265822785e-06, + "loss": 0.0166, + "step": 23150 + }, + { + "epoch": 13.01, + "learning_rate": 4.261772151898734e-06, + "loss": 0.0097, + "step": 23175 + }, + { + "epoch": 13.01, + "learning_rate": 4.255443037974684e-06, + "loss": 0.0094, + "step": 23200 + }, + { + "epoch": 13.01, + "learning_rate": 4.249113924050634e-06, + "loss": 0.012, + "step": 23225 + }, + { + "epoch": 13.01, + "learning_rate": 4.242784810126583e-06, + "loss": 0.0153, + "step": 23250 + }, + { + "epoch": 13.01, + "learning_rate": 4.236455696202532e-06, + "loss": 0.0169, + "step": 23275 + }, + { + "epoch": 13.01, + "learning_rate": 4.230379746835443e-06, + "loss": 0.0097, + "step": 23300 + }, + { + "epoch": 13.01, + "learning_rate": 4.224050632911393e-06, + "loss": 0.0098, + "step": 23325 + }, + { + "epoch": 13.01, + "learning_rate": 4.217721518987342e-06, + "loss": 0.0129, + "step": 23350 + }, + { + "epoch": 13.02, + "learning_rate": 4.2113924050632915e-06, + "loss": 0.0157, + "step": 23375 + }, + { + "epoch": 13.02, + "learning_rate": 4.205063291139241e-06, + "loss": 0.0088, + "step": 23400 + }, + { + "epoch": 13.02, + "learning_rate": 4.19873417721519e-06, + "loss": 0.0105, + "step": 23425 + }, + { + "epoch": 13.02, + "learning_rate": 4.19240506329114e-06, + "loss": 0.0131, + "step": 23450 + }, + { + "epoch": 13.02, + "learning_rate": 4.186075949367089e-06, + "loss": 0.0133, + "step": 23475 + }, + { + "epoch": 13.02, + "learning_rate": 4.179746835443038e-06, + "loss": 0.0054, + "step": 23500 + }, + { + "epoch": 13.02, + "learning_rate": 4.173417721518987e-06, + "loss": 0.01, + "step": 23525 + }, + { + "epoch": 13.02, + "learning_rate": 4.167088607594937e-06, + "loss": 0.013, + "step": 23550 + }, + { + "epoch": 13.02, + "learning_rate": 4.160759493670886e-06, + "loss": 0.0141, + "step": 23575 + }, + { + "epoch": 13.02, + "learning_rate": 4.154430379746836e-06, + "loss": 0.0126, + "step": 23600 + }, + { + "epoch": 13.02, + "learning_rate": 4.148101265822786e-06, + "loss": 0.015, + "step": 23625 + }, + { + "epoch": 13.02, + "learning_rate": 4.141772151898734e-06, + "loss": 0.0164, + "step": 23650 + }, + { + "epoch": 13.02, + "learning_rate": 4.135443037974684e-06, + "loss": 0.0135, + "step": 23675 + }, + { + "epoch": 13.02, + "learning_rate": 4.129113924050633e-06, + "loss": 0.0151, + "step": 23700 + }, + { + "epoch": 13.02, + "learning_rate": 4.122784810126583e-06, + "loss": 0.0157, + "step": 23725 + }, + { + "epoch": 13.02, + "learning_rate": 4.116455696202532e-06, + "loss": 0.0087, + "step": 23750 + }, + { + "epoch": 13.03, + "learning_rate": 4.1101265822784815e-06, + "loss": 0.0097, + "step": 23775 + }, + { + "epoch": 13.03, + "learning_rate": 4.103797468354431e-06, + "loss": 0.0141, + "step": 23800 + }, + { + "epoch": 13.03, + "learning_rate": 4.09746835443038e-06, + "loss": 0.0079, + "step": 23825 + }, + { + "epoch": 13.03, + "learning_rate": 4.09113924050633e-06, + "loss": 0.0116, + "step": 23850 + }, + { + "epoch": 13.03, + "learning_rate": 4.084810126582278e-06, + "loss": 0.0093, + "step": 23875 + }, + { + "epoch": 13.03, + "learning_rate": 4.078481012658228e-06, + "loss": 0.0149, + "step": 23900 + }, + { + "epoch": 13.03, + "learning_rate": 4.072151898734177e-06, + "loss": 0.0125, + "step": 23925 + }, + { + "epoch": 13.03, + "learning_rate": 4.065822784810127e-06, + "loss": 0.0106, + "step": 23950 + }, + { + "epoch": 13.03, + "learning_rate": 4.059493670886076e-06, + "loss": 0.0124, + "step": 23975 + }, + { + "epoch": 13.03, + "learning_rate": 4.0531645569620255e-06, + "loss": 0.0074, + "step": 24000 + }, + { + "epoch": 13.03, + "eval_loss": 0.27036768198013306, + "eval_runtime": 1458.9195, + "eval_samples_per_second": 7.136, + "eval_steps_per_second": 0.446, + "eval_wer": 24.153238264036816, + "step": 24000 + }, + { + "epoch": 13.03, + "learning_rate": 4.046835443037975e-06, + "loss": 0.0128, + "step": 24025 + }, + { + "epoch": 13.03, + "learning_rate": 4.040506329113924e-06, + "loss": 0.012, + "step": 24050 + }, + { + "epoch": 13.03, + "learning_rate": 4.034177215189874e-06, + "loss": 0.0098, + "step": 24075 + }, + { + "epoch": 13.03, + "learning_rate": 4.027848101265823e-06, + "loss": 0.0091, + "step": 24100 + }, + { + "epoch": 13.03, + "learning_rate": 4.021518987341773e-06, + "loss": 0.0077, + "step": 24125 + }, + { + "epoch": 13.03, + "learning_rate": 4.015189873417722e-06, + "loss": 0.0135, + "step": 24150 + }, + { + "epoch": 13.04, + "learning_rate": 4.0088607594936715e-06, + "loss": 0.0124, + "step": 24175 + }, + { + "epoch": 13.04, + "learning_rate": 4.002531645569621e-06, + "loss": 0.0152, + "step": 24200 + }, + { + "epoch": 13.04, + "learning_rate": 3.99620253164557e-06, + "loss": 0.0127, + "step": 24225 + }, + { + "epoch": 13.04, + "learning_rate": 3.989873417721519e-06, + "loss": 0.0066, + "step": 24250 + }, + { + "epoch": 13.04, + "learning_rate": 3.983544303797468e-06, + "loss": 0.0163, + "step": 24275 + }, + { + "epoch": 13.04, + "learning_rate": 3.977215189873418e-06, + "loss": 0.0095, + "step": 24300 + }, + { + "epoch": 13.04, + "learning_rate": 3.970886075949367e-06, + "loss": 0.0103, + "step": 24325 + }, + { + "epoch": 13.04, + "learning_rate": 3.964556962025317e-06, + "loss": 0.0112, + "step": 24350 + }, + { + "epoch": 13.04, + "learning_rate": 3.958227848101266e-06, + "loss": 0.01, + "step": 24375 + }, + { + "epoch": 13.04, + "learning_rate": 3.9518987341772155e-06, + "loss": 0.0131, + "step": 24400 + }, + { + "epoch": 13.04, + "learning_rate": 3.945569620253165e-06, + "loss": 0.0143, + "step": 24425 + }, + { + "epoch": 13.04, + "learning_rate": 3.939240506329114e-06, + "loss": 0.0151, + "step": 24450 + }, + { + "epoch": 13.04, + "learning_rate": 3.932911392405064e-06, + "loss": 0.0129, + "step": 24475 + }, + { + "epoch": 13.04, + "learning_rate": 3.926582278481013e-06, + "loss": 0.0125, + "step": 24500 + }, + { + "epoch": 14.0, + "learning_rate": 3.920506329113925e-06, + "loss": 0.0165, + "step": 24525 + }, + { + "epoch": 14.0, + "learning_rate": 3.914177215189874e-06, + "loss": 0.0114, + "step": 24550 + }, + { + "epoch": 14.0, + "learning_rate": 3.9078481012658235e-06, + "loss": 0.0111, + "step": 24575 + }, + { + "epoch": 14.0, + "learning_rate": 3.901518987341772e-06, + "loss": 0.0134, + "step": 24600 + }, + { + "epoch": 14.0, + "learning_rate": 3.8951898734177215e-06, + "loss": 0.0138, + "step": 24625 + }, + { + "epoch": 14.0, + "learning_rate": 3.888860759493671e-06, + "loss": 0.0151, + "step": 24650 + }, + { + "epoch": 14.0, + "learning_rate": 3.88253164556962e-06, + "loss": 0.0101, + "step": 24675 + }, + { + "epoch": 14.0, + "learning_rate": 3.87620253164557e-06, + "loss": 0.0108, + "step": 24700 + }, + { + "epoch": 14.01, + "learning_rate": 3.869873417721519e-06, + "loss": 0.0151, + "step": 24725 + }, + { + "epoch": 14.01, + "learning_rate": 3.863544303797469e-06, + "loss": 0.0132, + "step": 24750 + }, + { + "epoch": 14.01, + "learning_rate": 3.857215189873418e-06, + "loss": 0.012, + "step": 24775 + }, + { + "epoch": 14.01, + "learning_rate": 3.8508860759493675e-06, + "loss": 0.017, + "step": 24800 + }, + { + "epoch": 14.01, + "learning_rate": 3.844556962025317e-06, + "loss": 0.0141, + "step": 24825 + }, + { + "epoch": 14.01, + "learning_rate": 3.838227848101266e-06, + "loss": 0.0166, + "step": 24850 + }, + { + "epoch": 14.01, + "learning_rate": 3.831898734177216e-06, + "loss": 0.0112, + "step": 24875 + }, + { + "epoch": 14.01, + "learning_rate": 3.825569620253165e-06, + "loss": 0.0097, + "step": 24900 + }, + { + "epoch": 14.01, + "learning_rate": 3.819240506329115e-06, + "loss": 0.0138, + "step": 24925 + }, + { + "epoch": 14.01, + "learning_rate": 3.8129113924050636e-06, + "loss": 0.0069, + "step": 24950 + }, + { + "epoch": 14.01, + "learning_rate": 3.806582278481013e-06, + "loss": 0.0111, + "step": 24975 + }, + { + "epoch": 14.01, + "learning_rate": 3.8002531645569625e-06, + "loss": 0.0149, + "step": 25000 + }, + { + "epoch": 14.01, + "eval_loss": 0.2640444040298462, + "eval_runtime": 1478.4977, + "eval_samples_per_second": 7.042, + "eval_steps_per_second": 0.44, + "eval_wer": 24.063805915543345, + "step": 25000 + }, + { + "epoch": 14.01, + "learning_rate": 3.7939240506329115e-06, + "loss": 0.0176, + "step": 25025 + }, + { + "epoch": 14.01, + "learning_rate": 3.787594936708861e-06, + "loss": 0.0104, + "step": 25050 + }, + { + "epoch": 14.01, + "learning_rate": 3.7812658227848103e-06, + "loss": 0.0084, + "step": 25075 + }, + { + "epoch": 14.01, + "learning_rate": 3.7749367088607598e-06, + "loss": 0.0118, + "step": 25100 + }, + { + "epoch": 14.02, + "learning_rate": 3.768607594936709e-06, + "loss": 0.0099, + "step": 25125 + }, + { + "epoch": 14.02, + "learning_rate": 3.762278481012658e-06, + "loss": 0.0089, + "step": 25150 + }, + { + "epoch": 14.02, + "learning_rate": 3.7559493670886076e-06, + "loss": 0.0119, + "step": 25175 + }, + { + "epoch": 14.02, + "learning_rate": 3.749620253164557e-06, + "loss": 0.0118, + "step": 25200 + }, + { + "epoch": 14.02, + "learning_rate": 3.743291139240507e-06, + "loss": 0.0108, + "step": 25225 + }, + { + "epoch": 14.02, + "learning_rate": 3.7369620253164563e-06, + "loss": 0.0101, + "step": 25250 + }, + { + "epoch": 14.02, + "learning_rate": 3.7306329113924058e-06, + "loss": 0.0095, + "step": 25275 + }, + { + "epoch": 14.02, + "learning_rate": 3.7243037974683548e-06, + "loss": 0.0106, + "step": 25300 + }, + { + "epoch": 14.02, + "learning_rate": 3.717974683544304e-06, + "loss": 0.0135, + "step": 25325 + }, + { + "epoch": 14.02, + "learning_rate": 3.7116455696202536e-06, + "loss": 0.0115, + "step": 25350 + }, + { + "epoch": 14.02, + "learning_rate": 3.705316455696203e-06, + "loss": 0.0114, + "step": 25375 + }, + { + "epoch": 14.02, + "learning_rate": 3.698987341772152e-06, + "loss": 0.0119, + "step": 25400 + }, + { + "epoch": 14.02, + "learning_rate": 3.6926582278481015e-06, + "loss": 0.0123, + "step": 25425 + }, + { + "epoch": 14.02, + "learning_rate": 3.686329113924051e-06, + "loss": 0.0138, + "step": 25450 + }, + { + "epoch": 14.02, + "learning_rate": 3.6800000000000003e-06, + "loss": 0.0133, + "step": 25475 + }, + { + "epoch": 14.02, + "learning_rate": 3.6736708860759498e-06, + "loss": 0.0106, + "step": 25500 + }, + { + "epoch": 14.03, + "learning_rate": 3.6673417721518988e-06, + "loss": 0.0099, + "step": 25525 + }, + { + "epoch": 14.03, + "learning_rate": 3.661012658227848e-06, + "loss": 0.0104, + "step": 25550 + }, + { + "epoch": 14.03, + "learning_rate": 3.6546835443037976e-06, + "loss": 0.0143, + "step": 25575 + }, + { + "epoch": 14.03, + "learning_rate": 3.648354430379747e-06, + "loss": 0.0096, + "step": 25600 + }, + { + "epoch": 14.03, + "learning_rate": 3.642025316455696e-06, + "loss": 0.0094, + "step": 25625 + }, + { + "epoch": 14.03, + "learning_rate": 3.6356962025316463e-06, + "loss": 0.0162, + "step": 25650 + }, + { + "epoch": 14.03, + "learning_rate": 3.6293670886075953e-06, + "loss": 0.008, + "step": 25675 + }, + { + "epoch": 14.03, + "learning_rate": 3.6230379746835448e-06, + "loss": 0.0094, + "step": 25700 + }, + { + "epoch": 14.03, + "learning_rate": 3.616708860759494e-06, + "loss": 0.0122, + "step": 25725 + }, + { + "epoch": 14.03, + "learning_rate": 3.6103797468354436e-06, + "loss": 0.0128, + "step": 25750 + }, + { + "epoch": 14.03, + "learning_rate": 3.6040506329113926e-06, + "loss": 0.0117, + "step": 25775 + }, + { + "epoch": 14.03, + "learning_rate": 3.597721518987342e-06, + "loss": 0.0067, + "step": 25800 + }, + { + "epoch": 14.03, + "learning_rate": 3.5913924050632915e-06, + "loss": 0.0078, + "step": 25825 + }, + { + "epoch": 14.03, + "learning_rate": 3.585063291139241e-06, + "loss": 0.0071, + "step": 25850 + }, + { + "epoch": 14.03, + "learning_rate": 3.57873417721519e-06, + "loss": 0.0093, + "step": 25875 + }, + { + "epoch": 14.03, + "learning_rate": 3.5724050632911393e-06, + "loss": 0.0064, + "step": 25900 + }, + { + "epoch": 14.04, + "learning_rate": 3.5660759493670888e-06, + "loss": 0.0067, + "step": 25925 + }, + { + "epoch": 14.04, + "learning_rate": 3.559746835443038e-06, + "loss": 0.0076, + "step": 25950 + }, + { + "epoch": 14.04, + "learning_rate": 3.5534177215189876e-06, + "loss": 0.0099, + "step": 25975 + }, + { + "epoch": 14.04, + "learning_rate": 3.5470886075949366e-06, + "loss": 0.0111, + "step": 26000 + }, + { + "epoch": 14.04, + "eval_loss": 0.2734878659248352, + "eval_runtime": 1456.7487, + "eval_samples_per_second": 7.147, + "eval_steps_per_second": 0.447, + "eval_wer": 24.359745686921737, + "step": 26000 + }, + { + "epoch": 14.04, + "learning_rate": 3.540759493670886e-06, + "loss": 0.0134, + "step": 26025 + }, + { + "epoch": 14.04, + "learning_rate": 3.534430379746836e-06, + "loss": 0.008, + "step": 26050 + }, + { + "epoch": 14.04, + "learning_rate": 3.5281012658227853e-06, + "loss": 0.0138, + "step": 26075 + }, + { + "epoch": 14.04, + "learning_rate": 3.5217721518987348e-06, + "loss": 0.0075, + "step": 26100 + }, + { + "epoch": 14.04, + "learning_rate": 3.515443037974684e-06, + "loss": 0.0097, + "step": 26125 + }, + { + "epoch": 14.04, + "learning_rate": 3.509113924050633e-06, + "loss": 0.0139, + "step": 26150 + }, + { + "epoch": 14.04, + "learning_rate": 3.5027848101265826e-06, + "loss": 0.0099, + "step": 26175 + }, + { + "epoch": 14.04, + "learning_rate": 3.496455696202532e-06, + "loss": 0.0148, + "step": 26200 + }, + { + "epoch": 14.04, + "learning_rate": 3.4901265822784815e-06, + "loss": 0.0119, + "step": 26225 + }, + { + "epoch": 14.04, + "learning_rate": 3.4837974683544305e-06, + "loss": 0.0086, + "step": 26250 + }, + { + "epoch": 15.0, + "learning_rate": 3.47746835443038e-06, + "loss": 0.0091, + "step": 26275 + }, + { + "epoch": 15.0, + "learning_rate": 3.4711392405063293e-06, + "loss": 0.0105, + "step": 26300 + }, + { + "epoch": 15.0, + "learning_rate": 3.465063291139241e-06, + "loss": 0.0138, + "step": 26325 + }, + { + "epoch": 15.0, + "learning_rate": 3.45873417721519e-06, + "loss": 0.0185, + "step": 26350 + }, + { + "epoch": 15.0, + "learning_rate": 3.4524050632911392e-06, + "loss": 0.015, + "step": 26375 + }, + { + "epoch": 15.0, + "learning_rate": 3.4460759493670886e-06, + "loss": 0.0177, + "step": 26400 + }, + { + "epoch": 15.0, + "learning_rate": 3.439746835443038e-06, + "loss": 0.014, + "step": 26425 + }, + { + "epoch": 15.0, + "learning_rate": 3.433417721518988e-06, + "loss": 0.0127, + "step": 26450 + }, + { + "epoch": 15.01, + "learning_rate": 3.4270886075949374e-06, + "loss": 0.0135, + "step": 26475 + }, + { + "epoch": 15.01, + "learning_rate": 3.4207594936708864e-06, + "loss": 0.0171, + "step": 26500 + }, + { + "epoch": 15.01, + "learning_rate": 3.4144303797468358e-06, + "loss": 0.0114, + "step": 26525 + }, + { + "epoch": 15.01, + "learning_rate": 3.4081012658227852e-06, + "loss": 0.0099, + "step": 26550 + }, + { + "epoch": 15.01, + "learning_rate": 3.4017721518987346e-06, + "loss": 0.0126, + "step": 26575 + }, + { + "epoch": 15.01, + "learning_rate": 3.395443037974684e-06, + "loss": 0.0116, + "step": 26600 + }, + { + "epoch": 15.01, + "learning_rate": 3.389113924050633e-06, + "loss": 0.0087, + "step": 26625 + }, + { + "epoch": 15.01, + "learning_rate": 3.3827848101265825e-06, + "loss": 0.0073, + "step": 26650 + }, + { + "epoch": 15.01, + "learning_rate": 3.376455696202532e-06, + "loss": 0.0102, + "step": 26675 + }, + { + "epoch": 15.01, + "learning_rate": 3.3701265822784814e-06, + "loss": 0.0106, + "step": 26700 + }, + { + "epoch": 15.01, + "learning_rate": 3.3637974683544304e-06, + "loss": 0.0094, + "step": 26725 + }, + { + "epoch": 15.01, + "learning_rate": 3.35746835443038e-06, + "loss": 0.0113, + "step": 26750 + }, + { + "epoch": 15.01, + "learning_rate": 3.3511392405063292e-06, + "loss": 0.0169, + "step": 26775 + }, + { + "epoch": 15.01, + "learning_rate": 3.3448101265822786e-06, + "loss": 0.0084, + "step": 26800 + }, + { + "epoch": 15.01, + "learning_rate": 3.338481012658228e-06, + "loss": 0.0153, + "step": 26825 + }, + { + "epoch": 15.01, + "learning_rate": 3.332151898734177e-06, + "loss": 0.012, + "step": 26850 + }, + { + "epoch": 15.02, + "learning_rate": 3.325822784810127e-06, + "loss": 0.0106, + "step": 26875 + }, + { + "epoch": 15.02, + "learning_rate": 3.3194936708860764e-06, + "loss": 0.0099, + "step": 26900 + }, + { + "epoch": 15.02, + "learning_rate": 3.3131645569620258e-06, + "loss": 0.008, + "step": 26925 + }, + { + "epoch": 15.02, + "learning_rate": 3.306835443037975e-06, + "loss": 0.0076, + "step": 26950 + }, + { + "epoch": 15.02, + "learning_rate": 3.3005063291139246e-06, + "loss": 0.0088, + "step": 26975 + }, + { + "epoch": 15.02, + "learning_rate": 3.2941772151898736e-06, + "loss": 0.009, + "step": 27000 + }, + { + "epoch": 15.02, + "eval_loss": 0.2693029046058655, + "eval_runtime": 1479.1374, + "eval_samples_per_second": 7.039, + "eval_steps_per_second": 0.44, + "eval_wer": 24.28819980812696, + "step": 27000 + }, + { + "epoch": 15.02, + "learning_rate": 3.287848101265823e-06, + "loss": 0.0115, + "step": 27025 + }, + { + "epoch": 15.02, + "learning_rate": 3.2815189873417725e-06, + "loss": 0.0099, + "step": 27050 + }, + { + "epoch": 15.02, + "learning_rate": 3.275189873417722e-06, + "loss": 0.0073, + "step": 27075 + }, + { + "epoch": 15.02, + "learning_rate": 3.268860759493671e-06, + "loss": 0.0061, + "step": 27100 + }, + { + "epoch": 15.02, + "learning_rate": 3.2625316455696204e-06, + "loss": 0.007, + "step": 27125 + }, + { + "epoch": 15.02, + "learning_rate": 3.2562025316455698e-06, + "loss": 0.0109, + "step": 27150 + }, + { + "epoch": 15.02, + "learning_rate": 3.2498734177215192e-06, + "loss": 0.0123, + "step": 27175 + }, + { + "epoch": 15.02, + "learning_rate": 3.2435443037974686e-06, + "loss": 0.0121, + "step": 27200 + }, + { + "epoch": 15.02, + "learning_rate": 3.2372151898734176e-06, + "loss": 0.0129, + "step": 27225 + }, + { + "epoch": 15.02, + "learning_rate": 3.230886075949367e-06, + "loss": 0.0128, + "step": 27250 + }, + { + "epoch": 15.03, + "learning_rate": 3.224556962025317e-06, + "loss": 0.0094, + "step": 27275 + }, + { + "epoch": 15.03, + "learning_rate": 3.2182278481012664e-06, + "loss": 0.0081, + "step": 27300 + }, + { + "epoch": 15.03, + "learning_rate": 3.2118987341772158e-06, + "loss": 0.0087, + "step": 27325 + }, + { + "epoch": 15.03, + "learning_rate": 3.2055696202531648e-06, + "loss": 0.0131, + "step": 27350 + }, + { + "epoch": 15.03, + "learning_rate": 3.199240506329114e-06, + "loss": 0.0073, + "step": 27375 + }, + { + "epoch": 15.03, + "learning_rate": 3.1929113924050636e-06, + "loss": 0.0086, + "step": 27400 + }, + { + "epoch": 15.03, + "learning_rate": 3.186582278481013e-06, + "loss": 0.0108, + "step": 27425 + }, + { + "epoch": 15.03, + "learning_rate": 3.1802531645569625e-06, + "loss": 0.01, + "step": 27450 + }, + { + "epoch": 15.03, + "learning_rate": 3.1739240506329115e-06, + "loss": 0.0123, + "step": 27475 + }, + { + "epoch": 15.03, + "learning_rate": 3.167594936708861e-06, + "loss": 0.0081, + "step": 27500 + }, + { + "epoch": 15.03, + "learning_rate": 3.1612658227848104e-06, + "loss": 0.0123, + "step": 27525 + }, + { + "epoch": 15.03, + "learning_rate": 3.1549367088607598e-06, + "loss": 0.0096, + "step": 27550 + }, + { + "epoch": 15.03, + "learning_rate": 3.1486075949367088e-06, + "loss": 0.0054, + "step": 27575 + }, + { + "epoch": 15.03, + "learning_rate": 3.1422784810126582e-06, + "loss": 0.006, + "step": 27600 + }, + { + "epoch": 15.03, + "learning_rate": 3.1359493670886076e-06, + "loss": 0.0089, + "step": 27625 + }, + { + "epoch": 15.03, + "learning_rate": 3.129620253164557e-06, + "loss": 0.0109, + "step": 27650 + }, + { + "epoch": 15.04, + "learning_rate": 3.1232911392405065e-06, + "loss": 0.0089, + "step": 27675 + }, + { + "epoch": 15.04, + "learning_rate": 3.1169620253164563e-06, + "loss": 0.0094, + "step": 27700 + }, + { + "epoch": 15.04, + "learning_rate": 3.1106329113924054e-06, + "loss": 0.0102, + "step": 27725 + }, + { + "epoch": 15.04, + "learning_rate": 3.1043037974683548e-06, + "loss": 0.0097, + "step": 27750 + }, + { + "epoch": 15.04, + "learning_rate": 3.097974683544304e-06, + "loss": 0.0078, + "step": 27775 + }, + { + "epoch": 15.04, + "learning_rate": 3.0916455696202536e-06, + "loss": 0.0073, + "step": 27800 + }, + { + "epoch": 15.04, + "learning_rate": 3.085316455696203e-06, + "loss": 0.0105, + "step": 27825 + }, + { + "epoch": 15.04, + "learning_rate": 3.078987341772152e-06, + "loss": 0.0083, + "step": 27850 + }, + { + "epoch": 15.04, + "learning_rate": 3.0726582278481015e-06, + "loss": 0.0138, + "step": 27875 + }, + { + "epoch": 15.04, + "learning_rate": 3.066329113924051e-06, + "loss": 0.0049, + "step": 27900 + }, + { + "epoch": 15.04, + "learning_rate": 3.0600000000000003e-06, + "loss": 0.0107, + "step": 27925 + }, + { + "epoch": 15.04, + "learning_rate": 3.0536708860759494e-06, + "loss": 0.0105, + "step": 27950 + }, + { + "epoch": 15.04, + "learning_rate": 3.0473417721518988e-06, + "loss": 0.0134, + "step": 27975 + }, + { + "epoch": 15.04, + "learning_rate": 3.041012658227848e-06, + "loss": 0.0137, + "step": 28000 + }, + { + "epoch": 15.04, + "eval_loss": 0.2780563235282898, + "eval_runtime": 1465.9542, + "eval_samples_per_second": 7.102, + "eval_steps_per_second": 0.444, + "eval_wer": 25.010162766874256, + "step": 28000 + }, + { + "epoch": 16.0, + "learning_rate": 3.0346835443037976e-06, + "loss": 0.009, + "step": 28025 + }, + { + "epoch": 16.0, + "learning_rate": 3.028354430379747e-06, + "loss": 0.0081, + "step": 28050 + }, + { + "epoch": 16.0, + "learning_rate": 3.022025316455696e-06, + "loss": 0.0109, + "step": 28075 + }, + { + "epoch": 16.0, + "learning_rate": 3.015696202531646e-06, + "loss": 0.0132, + "step": 28100 + }, + { + "epoch": 16.0, + "learning_rate": 3.0093670886075953e-06, + "loss": 0.0142, + "step": 28125 + }, + { + "epoch": 16.0, + "learning_rate": 3.0030379746835448e-06, + "loss": 0.0152, + "step": 28150 + }, + { + "epoch": 16.0, + "learning_rate": 2.996708860759494e-06, + "loss": 0.0074, + "step": 28175 + }, + { + "epoch": 16.0, + "learning_rate": 2.990379746835443e-06, + "loss": 0.0129, + "step": 28200 + }, + { + "epoch": 16.01, + "learning_rate": 2.9840506329113926e-06, + "loss": 0.0118, + "step": 28225 + }, + { + "epoch": 16.01, + "learning_rate": 2.977721518987342e-06, + "loss": 0.0153, + "step": 28250 + }, + { + "epoch": 16.01, + "learning_rate": 2.9713924050632915e-06, + "loss": 0.0121, + "step": 28275 + }, + { + "epoch": 16.01, + "learning_rate": 2.965063291139241e-06, + "loss": 0.0097, + "step": 28300 + }, + { + "epoch": 16.01, + "learning_rate": 2.95873417721519e-06, + "loss": 0.0083, + "step": 28325 + }, + { + "epoch": 16.01, + "learning_rate": 2.9524050632911393e-06, + "loss": 0.01, + "step": 28350 + }, + { + "epoch": 16.01, + "learning_rate": 2.9460759493670888e-06, + "loss": 0.0084, + "step": 28375 + }, + { + "epoch": 16.01, + "learning_rate": 2.939746835443038e-06, + "loss": 0.0124, + "step": 28400 + }, + { + "epoch": 16.01, + "learning_rate": 2.933417721518987e-06, + "loss": 0.0071, + "step": 28425 + }, + { + "epoch": 16.01, + "learning_rate": 2.9270886075949366e-06, + "loss": 0.0113, + "step": 28450 + }, + { + "epoch": 16.01, + "learning_rate": 2.920759493670886e-06, + "loss": 0.0085, + "step": 28475 + }, + { + "epoch": 16.01, + "learning_rate": 2.914430379746836e-06, + "loss": 0.0133, + "step": 28500 + }, + { + "epoch": 16.01, + "learning_rate": 2.9081012658227853e-06, + "loss": 0.014, + "step": 28525 + }, + { + "epoch": 16.01, + "learning_rate": 2.9017721518987348e-06, + "loss": 0.0105, + "step": 28550 + }, + { + "epoch": 16.01, + "learning_rate": 2.8954430379746838e-06, + "loss": 0.01, + "step": 28575 + }, + { + "epoch": 16.01, + "learning_rate": 2.889113924050633e-06, + "loss": 0.0135, + "step": 28600 + }, + { + "epoch": 16.02, + "learning_rate": 2.8827848101265826e-06, + "loss": 0.0118, + "step": 28625 + }, + { + "epoch": 16.02, + "learning_rate": 2.876455696202532e-06, + "loss": 0.0111, + "step": 28650 + }, + { + "epoch": 16.02, + "learning_rate": 2.8701265822784815e-06, + "loss": 0.008, + "step": 28675 + }, + { + "epoch": 16.02, + "learning_rate": 2.8637974683544305e-06, + "loss": 0.0103, + "step": 28700 + }, + { + "epoch": 16.02, + "learning_rate": 2.85746835443038e-06, + "loss": 0.0082, + "step": 28725 + }, + { + "epoch": 16.02, + "learning_rate": 2.8511392405063293e-06, + "loss": 0.0057, + "step": 28750 + }, + { + "epoch": 16.02, + "learning_rate": 2.8448101265822788e-06, + "loss": 0.0062, + "step": 28775 + }, + { + "epoch": 16.02, + "learning_rate": 2.8384810126582278e-06, + "loss": 0.0092, + "step": 28800 + }, + { + "epoch": 16.02, + "learning_rate": 2.832151898734177e-06, + "loss": 0.0084, + "step": 28825 + }, + { + "epoch": 16.02, + "learning_rate": 2.8258227848101266e-06, + "loss": 0.0099, + "step": 28850 + }, + { + "epoch": 16.02, + "learning_rate": 2.819493670886076e-06, + "loss": 0.0117, + "step": 28875 + }, + { + "epoch": 16.02, + "learning_rate": 2.8131645569620255e-06, + "loss": 0.0117, + "step": 28900 + }, + { + "epoch": 16.02, + "learning_rate": 2.8068354430379753e-06, + "loss": 0.0127, + "step": 28925 + }, + { + "epoch": 16.02, + "learning_rate": 2.8005063291139243e-06, + "loss": 0.0123, + "step": 28950 + }, + { + "epoch": 16.02, + "learning_rate": 2.7941772151898738e-06, + "loss": 0.0128, + "step": 28975 + }, + { + "epoch": 16.02, + "learning_rate": 2.787848101265823e-06, + "loss": 0.0073, + "step": 29000 + }, + { + "epoch": 16.02, + "eval_loss": 0.2685891091823578, + "eval_runtime": 1456.562, + "eval_samples_per_second": 7.148, + "eval_steps_per_second": 0.447, + "eval_wer": 23.278427291500677, + "step": 29000 + }, + { + "epoch": 16.03, + "learning_rate": 2.7815189873417726e-06, + "loss": 0.0068, + "step": 29025 + }, + { + "epoch": 16.03, + "learning_rate": 2.7751898734177216e-06, + "loss": 0.0087, + "step": 29050 + }, + { + "epoch": 16.03, + "learning_rate": 2.768860759493671e-06, + "loss": 0.0084, + "step": 29075 + }, + { + "epoch": 16.03, + "learning_rate": 2.7625316455696205e-06, + "loss": 0.0039, + "step": 29100 + }, + { + "epoch": 16.03, + "learning_rate": 2.75620253164557e-06, + "loss": 0.0066, + "step": 29125 + }, + { + "epoch": 16.03, + "learning_rate": 2.7498734177215193e-06, + "loss": 0.0108, + "step": 29150 + }, + { + "epoch": 16.03, + "learning_rate": 2.7435443037974683e-06, + "loss": 0.0052, + "step": 29175 + }, + { + "epoch": 16.03, + "learning_rate": 2.7372151898734178e-06, + "loss": 0.0061, + "step": 29200 + }, + { + "epoch": 16.03, + "learning_rate": 2.730886075949367e-06, + "loss": 0.0083, + "step": 29225 + }, + { + "epoch": 16.03, + "learning_rate": 2.7245569620253166e-06, + "loss": 0.0062, + "step": 29250 + }, + { + "epoch": 16.03, + "learning_rate": 2.7182278481012656e-06, + "loss": 0.0078, + "step": 29275 + }, + { + "epoch": 16.03, + "learning_rate": 2.711898734177215e-06, + "loss": 0.0127, + "step": 29300 + }, + { + "epoch": 16.03, + "learning_rate": 2.705569620253165e-06, + "loss": 0.0092, + "step": 29325 + }, + { + "epoch": 16.03, + "learning_rate": 2.6992405063291143e-06, + "loss": 0.0087, + "step": 29350 + }, + { + "epoch": 16.03, + "learning_rate": 2.6929113924050638e-06, + "loss": 0.0062, + "step": 29375 + }, + { + "epoch": 16.03, + "learning_rate": 2.686582278481013e-06, + "loss": 0.0065, + "step": 29400 + }, + { + "epoch": 16.04, + "learning_rate": 2.680253164556962e-06, + "loss": 0.0057, + "step": 29425 + }, + { + "epoch": 16.04, + "learning_rate": 2.6739240506329116e-06, + "loss": 0.0071, + "step": 29450 + }, + { + "epoch": 16.04, + "learning_rate": 2.667594936708861e-06, + "loss": 0.01, + "step": 29475 + }, + { + "epoch": 16.04, + "learning_rate": 2.6612658227848105e-06, + "loss": 0.009, + "step": 29500 + }, + { + "epoch": 16.04, + "learning_rate": 2.65493670886076e-06, + "loss": 0.0091, + "step": 29525 + }, + { + "epoch": 16.04, + "learning_rate": 2.648607594936709e-06, + "loss": 0.0104, + "step": 29550 + }, + { + "epoch": 16.04, + "learning_rate": 2.6422784810126583e-06, + "loss": 0.0071, + "step": 29575 + }, + { + "epoch": 16.04, + "learning_rate": 2.6359493670886078e-06, + "loss": 0.0081, + "step": 29600 + }, + { + "epoch": 16.04, + "learning_rate": 2.629620253164557e-06, + "loss": 0.008, + "step": 29625 + }, + { + "epoch": 16.04, + "learning_rate": 2.623291139240506e-06, + "loss": 0.0067, + "step": 29650 + }, + { + "epoch": 16.04, + "learning_rate": 2.6169620253164556e-06, + "loss": 0.0066, + "step": 29675 + }, + { + "epoch": 16.04, + "learning_rate": 2.610632911392405e-06, + "loss": 0.0121, + "step": 29700 + }, + { + "epoch": 16.04, + "learning_rate": 2.6043037974683545e-06, + "loss": 0.0075, + "step": 29725 + }, + { + "epoch": 16.04, + "learning_rate": 2.5979746835443043e-06, + "loss": 0.0111, + "step": 29750 + }, + { + "epoch": 17.0, + "learning_rate": 2.5916455696202538e-06, + "loss": 0.0095, + "step": 29775 + }, + { + "epoch": 17.0, + "learning_rate": 2.5853164556962028e-06, + "loss": 0.0077, + "step": 29800 + }, + { + "epoch": 17.0, + "learning_rate": 2.578987341772152e-06, + "loss": 0.0047, + "step": 29825 + }, + { + "epoch": 17.0, + "learning_rate": 2.5726582278481016e-06, + "loss": 0.0092, + "step": 29850 + }, + { + "epoch": 17.0, + "learning_rate": 2.566329113924051e-06, + "loss": 0.0191, + "step": 29875 + }, + { + "epoch": 17.0, + "learning_rate": 2.56e-06, + "loss": 0.0102, + "step": 29900 + }, + { + "epoch": 17.0, + "learning_rate": 2.5536708860759495e-06, + "loss": 0.0146, + "step": 29925 + }, + { + "epoch": 17.0, + "learning_rate": 2.547341772151899e-06, + "loss": 0.0096, + "step": 29950 + }, + { + "epoch": 17.01, + "learning_rate": 2.5410126582278483e-06, + "loss": 0.0159, + "step": 29975 + }, + { + "epoch": 17.01, + "learning_rate": 2.5346835443037978e-06, + "loss": 0.0094, + "step": 30000 + }, + { + "epoch": 17.01, + "eval_loss": 0.2636851966381073, + "eval_runtime": 1508.7196, + "eval_samples_per_second": 6.901, + "eval_steps_per_second": 0.431, + "eval_wer": 23.029642758418838, + "step": 30000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.384719326355456e+20, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-30000/training_args.bin b/checkpoint-30000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-30000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-31000/config.json b/checkpoint-31000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-31000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-31000/generation_config.json b/checkpoint-31000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-31000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-31000/optimizer.pt b/checkpoint-31000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5683aaa1ac6d9d72c9ce09def4ff9f54f26f7417 --- /dev/null +++ b/checkpoint-31000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba6ee4ad7d25b999d2d3a46ba9a0e9863689184a7d5b646c7cb49736163e8586 +size 1934161093 diff --git a/checkpoint-31000/preprocessor_config.json b/checkpoint-31000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-31000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-31000/pytorch_model.bin b/checkpoint-31000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..dee09939ecb66fef9d6b2a33cb7da35940bcbb32 --- /dev/null +++ b/checkpoint-31000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3e05402cbfe178e32ca90185e7723cc4d3b29822ccd6a7636f1659c8621390d +size 967102601 diff --git a/checkpoint-31000/rng_state.pth b/checkpoint-31000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bbc0ad4e5e2a5ea9dd9660b4d62b90e666c472ec --- /dev/null +++ b/checkpoint-31000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f03e532ba4ccf42d0ee7964d5cc2baa7eaaf0a92fe8cf7b8564d5bf486a8b5b4 +size 14575 diff --git a/checkpoint-31000/scaler.pt b/checkpoint-31000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6bbbdb4a39d6e845736b118ef74f265ce99676f0 --- /dev/null +++ b/checkpoint-31000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ce107ac39f7269501d8304c86cf3be2864d6b0d3e9fc9c276994e9e4981c6d4 +size 557 diff --git a/checkpoint-31000/scheduler.pt b/checkpoint-31000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..896438354101fdb9f3ef26da165d2319adcb696d --- /dev/null +++ b/checkpoint-31000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5a966cffa40d6503e9afc7d57a787678f1a183611408a6b45cd44ce723138e2 +size 627 diff --git a/checkpoint-31000/trainer_state.json b/checkpoint-31000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..45e4c1ac93bf53fc7d2943e767781de16fcdd466 --- /dev/null +++ b/checkpoint-31000/trainer_state.json @@ -0,0 +1,7735 @@ +{ + "best_metric": 23.029642758418838, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-30000", + "epoch": 17.030825, + "global_step": 31000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + }, + { + "epoch": 7.02, + "learning_rate": 6.830632911392405e-06, + "loss": 0.0159, + "step": 13025 + }, + { + "epoch": 7.02, + "learning_rate": 6.824303797468354e-06, + "loss": 0.0186, + "step": 13050 + }, + { + "epoch": 7.02, + "learning_rate": 6.817974683544304e-06, + "loss": 0.0246, + "step": 13075 + }, + { + "epoch": 7.02, + "learning_rate": 6.811645569620253e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 7.02, + "learning_rate": 6.805316455696203e-06, + "loss": 0.0287, + "step": 13125 + }, + { + "epoch": 7.02, + "learning_rate": 6.798987341772152e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 7.02, + "learning_rate": 6.792658227848102e-06, + "loss": 0.035, + "step": 13175 + }, + { + "epoch": 7.02, + "learning_rate": 6.786329113924052e-06, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 7.02, + "learning_rate": 6.780000000000001e-06, + "loss": 0.0283, + "step": 13225 + }, + { + "epoch": 7.02, + "learning_rate": 6.773670886075951e-06, + "loss": 0.0203, + "step": 13250 + }, + { + "epoch": 7.03, + "learning_rate": 6.7673417721519e-06, + "loss": 0.0193, + "step": 13275 + }, + { + "epoch": 7.03, + "learning_rate": 6.7610126582278494e-06, + "loss": 0.0174, + "step": 13300 + }, + { + "epoch": 7.03, + "learning_rate": 6.754683544303798e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 7.03, + "learning_rate": 6.7483544303797474e-06, + "loss": 0.023, + "step": 13350 + }, + { + "epoch": 7.03, + "learning_rate": 6.742025316455697e-06, + "loss": 0.0266, + "step": 13375 + }, + { + "epoch": 7.03, + "learning_rate": 6.735696202531646e-06, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 7.03, + "learning_rate": 6.729367088607596e-06, + "loss": 0.0164, + "step": 13425 + }, + { + "epoch": 7.03, + "learning_rate": 6.723037974683545e-06, + "loss": 0.019, + "step": 13450 + }, + { + "epoch": 7.03, + "learning_rate": 6.716708860759495e-06, + "loss": 0.0209, + "step": 13475 + }, + { + "epoch": 7.03, + "learning_rate": 6.710379746835444e-06, + "loss": 0.0241, + "step": 13500 + }, + { + "epoch": 7.03, + "learning_rate": 6.7040506329113934e-06, + "loss": 0.0233, + "step": 13525 + }, + { + "epoch": 7.03, + "learning_rate": 6.697721518987342e-06, + "loss": 0.0204, + "step": 13550 + }, + { + "epoch": 7.03, + "learning_rate": 6.6913924050632914e-06, + "loss": 0.0131, + "step": 13575 + }, + { + "epoch": 7.03, + "learning_rate": 6.685063291139241e-06, + "loss": 0.0268, + "step": 13600 + }, + { + "epoch": 7.03, + "learning_rate": 6.67873417721519e-06, + "loss": 0.0156, + "step": 13625 + }, + { + "epoch": 7.03, + "learning_rate": 6.67240506329114e-06, + "loss": 0.0205, + "step": 13650 + }, + { + "epoch": 7.04, + "learning_rate": 6.666075949367089e-06, + "loss": 0.0217, + "step": 13675 + }, + { + "epoch": 7.04, + "learning_rate": 6.659746835443039e-06, + "loss": 0.0258, + "step": 13700 + }, + { + "epoch": 7.04, + "learning_rate": 6.653417721518988e-06, + "loss": 0.0211, + "step": 13725 + }, + { + "epoch": 7.04, + "learning_rate": 6.647088607594937e-06, + "loss": 0.0188, + "step": 13750 + }, + { + "epoch": 7.04, + "learning_rate": 6.640759493670886e-06, + "loss": 0.0234, + "step": 13775 + }, + { + "epoch": 7.04, + "learning_rate": 6.6344303797468355e-06, + "loss": 0.0202, + "step": 13800 + }, + { + "epoch": 7.04, + "learning_rate": 6.628101265822785e-06, + "loss": 0.0251, + "step": 13825 + }, + { + "epoch": 7.04, + "learning_rate": 6.621772151898734e-06, + "loss": 0.017, + "step": 13850 + }, + { + "epoch": 7.04, + "learning_rate": 6.615443037974684e-06, + "loss": 0.0254, + "step": 13875 + }, + { + "epoch": 7.04, + "learning_rate": 6.609113924050633e-06, + "loss": 0.0289, + "step": 13900 + }, + { + "epoch": 7.04, + "learning_rate": 6.602784810126583e-06, + "loss": 0.0238, + "step": 13925 + }, + { + "epoch": 7.04, + "learning_rate": 6.596455696202532e-06, + "loss": 0.0218, + "step": 13950 + }, + { + "epoch": 7.04, + "learning_rate": 6.590126582278481e-06, + "loss": 0.0289, + "step": 13975 + }, + { + "epoch": 7.04, + "learning_rate": 6.583797468354432e-06, + "loss": 0.0271, + "step": 14000 + }, + { + "epoch": 7.04, + "eval_loss": 0.26684999465942383, + "eval_runtime": 1448.0228, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "eval_wer": 42.464105107400115, + "step": 14000 + }, + { + "epoch": 8.0, + "learning_rate": 6.577468354430381e-06, + "loss": 0.0292, + "step": 14025 + }, + { + "epoch": 8.0, + "learning_rate": 6.57113924050633e-06, + "loss": 0.0192, + "step": 14050 + }, + { + "epoch": 8.0, + "learning_rate": 6.564810126582279e-06, + "loss": 0.027, + "step": 14075 + }, + { + "epoch": 8.0, + "learning_rate": 6.558481012658229e-06, + "loss": 0.03, + "step": 14100 + }, + { + "epoch": 8.0, + "learning_rate": 6.552151898734178e-06, + "loss": 0.0375, + "step": 14125 + }, + { + "epoch": 8.0, + "learning_rate": 6.5458227848101274e-06, + "loss": 0.0338, + "step": 14150 + }, + { + "epoch": 8.0, + "learning_rate": 6.539493670886077e-06, + "loss": 0.0305, + "step": 14175 + }, + { + "epoch": 8.0, + "learning_rate": 6.533164556962026e-06, + "loss": 0.0266, + "step": 14200 + }, + { + "epoch": 8.01, + "learning_rate": 6.526835443037976e-06, + "loss": 0.0378, + "step": 14225 + }, + { + "epoch": 8.01, + "learning_rate": 6.520506329113925e-06, + "loss": 0.0256, + "step": 14250 + }, + { + "epoch": 8.01, + "learning_rate": 6.514177215189874e-06, + "loss": 0.0203, + "step": 14275 + }, + { + "epoch": 8.01, + "learning_rate": 6.507848101265823e-06, + "loss": 0.0212, + "step": 14300 + }, + { + "epoch": 8.01, + "learning_rate": 6.501518987341773e-06, + "loss": 0.0204, + "step": 14325 + }, + { + "epoch": 8.01, + "learning_rate": 6.495189873417722e-06, + "loss": 0.0259, + "step": 14350 + }, + { + "epoch": 8.01, + "learning_rate": 6.4888607594936714e-06, + "loss": 0.0214, + "step": 14375 + }, + { + "epoch": 8.01, + "learning_rate": 6.482531645569621e-06, + "loss": 0.021, + "step": 14400 + }, + { + "epoch": 8.01, + "learning_rate": 6.47620253164557e-06, + "loss": 0.0241, + "step": 14425 + }, + { + "epoch": 8.01, + "learning_rate": 6.46987341772152e-06, + "loss": 0.0216, + "step": 14450 + }, + { + "epoch": 8.01, + "learning_rate": 6.463544303797469e-06, + "loss": 0.0209, + "step": 14475 + }, + { + "epoch": 8.01, + "learning_rate": 6.457215189873418e-06, + "loss": 0.0292, + "step": 14500 + }, + { + "epoch": 8.01, + "learning_rate": 6.450886075949367e-06, + "loss": 0.0286, + "step": 14525 + }, + { + "epoch": 8.01, + "learning_rate": 6.444556962025317e-06, + "loss": 0.0218, + "step": 14550 + }, + { + "epoch": 8.01, + "learning_rate": 6.438227848101266e-06, + "loss": 0.0209, + "step": 14575 + }, + { + "epoch": 8.01, + "learning_rate": 6.4318987341772154e-06, + "loss": 0.0244, + "step": 14600 + }, + { + "epoch": 8.02, + "learning_rate": 6.425569620253165e-06, + "loss": 0.0212, + "step": 14625 + }, + { + "epoch": 8.02, + "learning_rate": 6.419240506329114e-06, + "loss": 0.0155, + "step": 14650 + }, + { + "epoch": 8.02, + "learning_rate": 6.412911392405064e-06, + "loss": 0.0209, + "step": 14675 + }, + { + "epoch": 8.02, + "learning_rate": 6.406582278481013e-06, + "loss": 0.0203, + "step": 14700 + }, + { + "epoch": 8.02, + "learning_rate": 6.400253164556962e-06, + "loss": 0.0192, + "step": 14725 + }, + { + "epoch": 8.02, + "learning_rate": 6.393924050632911e-06, + "loss": 0.0216, + "step": 14750 + }, + { + "epoch": 8.02, + "learning_rate": 6.387594936708861e-06, + "loss": 0.012, + "step": 14775 + }, + { + "epoch": 8.02, + "learning_rate": 6.38126582278481e-06, + "loss": 0.0241, + "step": 14800 + }, + { + "epoch": 8.02, + "learning_rate": 6.37493670886076e-06, + "loss": 0.0192, + "step": 14825 + }, + { + "epoch": 8.02, + "learning_rate": 6.36860759493671e-06, + "loss": 0.0202, + "step": 14850 + }, + { + "epoch": 8.02, + "learning_rate": 6.362278481012659e-06, + "loss": 0.0257, + "step": 14875 + }, + { + "epoch": 8.02, + "learning_rate": 6.3559493670886086e-06, + "loss": 0.0218, + "step": 14900 + }, + { + "epoch": 8.02, + "learning_rate": 6.349620253164558e-06, + "loss": 0.0232, + "step": 14925 + }, + { + "epoch": 8.02, + "learning_rate": 6.3432911392405074e-06, + "loss": 0.03, + "step": 14950 + }, + { + "epoch": 8.02, + "learning_rate": 6.337215189873418e-06, + "loss": 0.0252, + "step": 14975 + }, + { + "epoch": 8.02, + "learning_rate": 6.3308860759493675e-06, + "loss": 0.0176, + "step": 15000 + }, + { + "epoch": 8.02, + "eval_loss": 0.2596448063850403, + "eval_runtime": 1417.6481, + "eval_samples_per_second": 7.344, + "eval_steps_per_second": 0.459, + "eval_wer": 28.52241499861786, + "step": 15000 + }, + { + "epoch": 8.03, + "learning_rate": 6.324556962025317e-06, + "loss": 0.0166, + "step": 15025 + }, + { + "epoch": 8.03, + "learning_rate": 6.318227848101266e-06, + "loss": 0.017, + "step": 15050 + }, + { + "epoch": 8.03, + "learning_rate": 6.311898734177216e-06, + "loss": 0.0291, + "step": 15075 + }, + { + "epoch": 8.03, + "learning_rate": 6.305569620253164e-06, + "loss": 0.0149, + "step": 15100 + }, + { + "epoch": 8.03, + "learning_rate": 6.299240506329114e-06, + "loss": 0.0223, + "step": 15125 + }, + { + "epoch": 8.03, + "learning_rate": 6.292911392405063e-06, + "loss": 0.0212, + "step": 15150 + }, + { + "epoch": 8.03, + "learning_rate": 6.286582278481013e-06, + "loss": 0.0125, + "step": 15175 + }, + { + "epoch": 8.03, + "learning_rate": 6.280253164556962e-06, + "loss": 0.0169, + "step": 15200 + }, + { + "epoch": 8.03, + "learning_rate": 6.273924050632912e-06, + "loss": 0.0244, + "step": 15225 + }, + { + "epoch": 8.03, + "learning_rate": 6.267594936708862e-06, + "loss": 0.015, + "step": 15250 + }, + { + "epoch": 8.03, + "learning_rate": 6.261265822784811e-06, + "loss": 0.0171, + "step": 15275 + }, + { + "epoch": 8.03, + "learning_rate": 6.254936708860761e-06, + "loss": 0.017, + "step": 15300 + }, + { + "epoch": 8.03, + "learning_rate": 6.24860759493671e-06, + "loss": 0.0162, + "step": 15325 + }, + { + "epoch": 8.03, + "learning_rate": 6.2422784810126594e-06, + "loss": 0.0133, + "step": 15350 + }, + { + "epoch": 8.03, + "learning_rate": 6.235949367088608e-06, + "loss": 0.0139, + "step": 15375 + }, + { + "epoch": 8.03, + "learning_rate": 6.2296202531645575e-06, + "loss": 0.0177, + "step": 15400 + }, + { + "epoch": 8.04, + "learning_rate": 6.223291139240507e-06, + "loss": 0.0207, + "step": 15425 + }, + { + "epoch": 8.04, + "learning_rate": 6.216962025316456e-06, + "loss": 0.0192, + "step": 15450 + }, + { + "epoch": 8.04, + "learning_rate": 6.210632911392406e-06, + "loss": 0.0176, + "step": 15475 + }, + { + "epoch": 8.04, + "learning_rate": 6.204303797468355e-06, + "loss": 0.0133, + "step": 15500 + }, + { + "epoch": 8.04, + "learning_rate": 6.197974683544305e-06, + "loss": 0.0199, + "step": 15525 + }, + { + "epoch": 8.04, + "learning_rate": 6.191645569620254e-06, + "loss": 0.0135, + "step": 15550 + }, + { + "epoch": 8.04, + "learning_rate": 6.1853164556962035e-06, + "loss": 0.0121, + "step": 15575 + }, + { + "epoch": 8.04, + "learning_rate": 6.178987341772152e-06, + "loss": 0.0152, + "step": 15600 + }, + { + "epoch": 8.04, + "learning_rate": 6.1726582278481015e-06, + "loss": 0.0201, + "step": 15625 + }, + { + "epoch": 8.04, + "learning_rate": 6.166329113924051e-06, + "loss": 0.0226, + "step": 15650 + }, + { + "epoch": 8.04, + "learning_rate": 6.16e-06, + "loss": 0.0222, + "step": 15675 + }, + { + "epoch": 8.04, + "learning_rate": 6.15367088607595e-06, + "loss": 0.0184, + "step": 15700 + }, + { + "epoch": 8.04, + "learning_rate": 6.147341772151899e-06, + "loss": 0.0219, + "step": 15725 + }, + { + "epoch": 8.04, + "learning_rate": 6.141012658227849e-06, + "loss": 0.0219, + "step": 15750 + }, + { + "epoch": 9.0, + "learning_rate": 6.134683544303798e-06, + "loss": 0.0208, + "step": 15775 + }, + { + "epoch": 9.0, + "learning_rate": 6.1283544303797475e-06, + "loss": 0.0143, + "step": 15800 + }, + { + "epoch": 9.0, + "learning_rate": 6.122025316455696e-06, + "loss": 0.0254, + "step": 15825 + }, + { + "epoch": 9.0, + "learning_rate": 6.1156962025316455e-06, + "loss": 0.0287, + "step": 15850 + }, + { + "epoch": 9.0, + "learning_rate": 6.109367088607595e-06, + "loss": 0.0252, + "step": 15875 + }, + { + "epoch": 9.0, + "learning_rate": 6.103037974683544e-06, + "loss": 0.0275, + "step": 15900 + }, + { + "epoch": 9.0, + "learning_rate": 6.096708860759494e-06, + "loss": 0.0193, + "step": 15925 + }, + { + "epoch": 9.0, + "learning_rate": 6.090379746835443e-06, + "loss": 0.0255, + "step": 15950 + }, + { + "epoch": 9.01, + "learning_rate": 6.084050632911393e-06, + "loss": 0.0248, + "step": 15975 + }, + { + "epoch": 9.01, + "learning_rate": 6.077721518987342e-06, + "loss": 0.0328, + "step": 16000 + }, + { + "epoch": 9.01, + "eval_loss": 0.2512108087539673, + "eval_runtime": 1474.2883, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.442, + "eval_wer": 35.66236849379665, + "step": 16000 + }, + { + "epoch": 9.01, + "learning_rate": 6.0713924050632915e-06, + "loss": 0.0181, + "step": 16025 + }, + { + "epoch": 9.01, + "learning_rate": 6.065063291139242e-06, + "loss": 0.0134, + "step": 16050 + }, + { + "epoch": 9.01, + "learning_rate": 6.058734177215191e-06, + "loss": 0.0203, + "step": 16075 + }, + { + "epoch": 9.01, + "learning_rate": 6.052405063291141e-06, + "loss": 0.0224, + "step": 16100 + }, + { + "epoch": 9.01, + "learning_rate": 6.046075949367089e-06, + "loss": 0.0164, + "step": 16125 + }, + { + "epoch": 9.01, + "learning_rate": 6.039746835443039e-06, + "loss": 0.019, + "step": 16150 + }, + { + "epoch": 9.01, + "learning_rate": 6.033417721518988e-06, + "loss": 0.0152, + "step": 16175 + }, + { + "epoch": 9.01, + "learning_rate": 6.0270886075949374e-06, + "loss": 0.0197, + "step": 16200 + }, + { + "epoch": 9.01, + "learning_rate": 6.020759493670887e-06, + "loss": 0.0188, + "step": 16225 + }, + { + "epoch": 9.01, + "learning_rate": 6.014430379746836e-06, + "loss": 0.0245, + "step": 16250 + }, + { + "epoch": 9.01, + "learning_rate": 6.008101265822786e-06, + "loss": 0.0202, + "step": 16275 + }, + { + "epoch": 9.01, + "learning_rate": 6.001772151898735e-06, + "loss": 0.0184, + "step": 16300 + }, + { + "epoch": 9.01, + "learning_rate": 5.995443037974685e-06, + "loss": 0.0158, + "step": 16325 + }, + { + "epoch": 9.01, + "learning_rate": 5.989113924050633e-06, + "loss": 0.0175, + "step": 16350 + }, + { + "epoch": 9.02, + "learning_rate": 5.982784810126583e-06, + "loss": 0.014, + "step": 16375 + }, + { + "epoch": 9.02, + "learning_rate": 5.976455696202532e-06, + "loss": 0.0149, + "step": 16400 + }, + { + "epoch": 9.02, + "learning_rate": 5.9701265822784815e-06, + "loss": 0.0188, + "step": 16425 + }, + { + "epoch": 9.02, + "learning_rate": 5.963797468354431e-06, + "loss": 0.0215, + "step": 16450 + }, + { + "epoch": 9.02, + "learning_rate": 5.95746835443038e-06, + "loss": 0.017, + "step": 16475 + }, + { + "epoch": 9.02, + "learning_rate": 5.95113924050633e-06, + "loss": 0.0095, + "step": 16500 + }, + { + "epoch": 9.02, + "learning_rate": 5.944810126582279e-06, + "loss": 0.0112, + "step": 16525 + }, + { + "epoch": 9.02, + "learning_rate": 5.938481012658229e-06, + "loss": 0.0198, + "step": 16550 + }, + { + "epoch": 9.02, + "learning_rate": 5.932151898734177e-06, + "loss": 0.0252, + "step": 16575 + }, + { + "epoch": 9.02, + "learning_rate": 5.925822784810127e-06, + "loss": 0.0226, + "step": 16600 + }, + { + "epoch": 9.02, + "learning_rate": 5.919493670886076e-06, + "loss": 0.0167, + "step": 16625 + }, + { + "epoch": 9.02, + "learning_rate": 5.9131645569620255e-06, + "loss": 0.0259, + "step": 16650 + }, + { + "epoch": 9.02, + "learning_rate": 5.906835443037975e-06, + "loss": 0.0229, + "step": 16675 + }, + { + "epoch": 9.02, + "learning_rate": 5.900506329113924e-06, + "loss": 0.0238, + "step": 16700 + }, + { + "epoch": 9.02, + "learning_rate": 5.894177215189874e-06, + "loss": 0.02, + "step": 16725 + }, + { + "epoch": 9.02, + "learning_rate": 5.887848101265823e-06, + "loss": 0.0182, + "step": 16750 + }, + { + "epoch": 9.03, + "learning_rate": 5.881518987341773e-06, + "loss": 0.0157, + "step": 16775 + }, + { + "epoch": 9.03, + "learning_rate": 5.875189873417721e-06, + "loss": 0.0145, + "step": 16800 + }, + { + "epoch": 9.03, + "learning_rate": 5.868860759493671e-06, + "loss": 0.0166, + "step": 16825 + }, + { + "epoch": 9.03, + "learning_rate": 5.86253164556962e-06, + "loss": 0.0108, + "step": 16850 + }, + { + "epoch": 9.03, + "learning_rate": 5.85620253164557e-06, + "loss": 0.014, + "step": 16875 + }, + { + "epoch": 9.03, + "learning_rate": 5.84987341772152e-06, + "loss": 0.0167, + "step": 16900 + }, + { + "epoch": 9.03, + "learning_rate": 5.843544303797469e-06, + "loss": 0.0121, + "step": 16925 + }, + { + "epoch": 9.03, + "learning_rate": 5.837215189873419e-06, + "loss": 0.0149, + "step": 16950 + }, + { + "epoch": 9.03, + "learning_rate": 5.831139240506329e-06, + "loss": 0.0143, + "step": 16975 + }, + { + "epoch": 9.03, + "learning_rate": 5.824810126582279e-06, + "loss": 0.0139, + "step": 17000 + }, + { + "epoch": 9.03, + "eval_loss": 0.2688957154750824, + "eval_runtime": 1423.1196, + "eval_samples_per_second": 7.316, + "eval_steps_per_second": 0.457, + "eval_wer": 25.920746678807788, + "step": 17000 + }, + { + "epoch": 9.03, + "learning_rate": 5.818481012658228e-06, + "loss": 0.0169, + "step": 17025 + }, + { + "epoch": 9.03, + "learning_rate": 5.8121518987341775e-06, + "loss": 0.013, + "step": 17050 + }, + { + "epoch": 9.03, + "learning_rate": 5.805822784810127e-06, + "loss": 0.0123, + "step": 17075 + }, + { + "epoch": 9.03, + "learning_rate": 5.799493670886076e-06, + "loss": 0.0161, + "step": 17100 + }, + { + "epoch": 9.03, + "learning_rate": 5.793164556962026e-06, + "loss": 0.0114, + "step": 17125 + }, + { + "epoch": 9.03, + "learning_rate": 5.786835443037974e-06, + "loss": 0.0194, + "step": 17150 + }, + { + "epoch": 9.04, + "learning_rate": 5.780506329113924e-06, + "loss": 0.0179, + "step": 17175 + }, + { + "epoch": 9.04, + "learning_rate": 5.774177215189873e-06, + "loss": 0.0165, + "step": 17200 + }, + { + "epoch": 9.04, + "learning_rate": 5.767848101265823e-06, + "loss": 0.0165, + "step": 17225 + }, + { + "epoch": 9.04, + "learning_rate": 5.761518987341773e-06, + "loss": 0.0125, + "step": 17250 + }, + { + "epoch": 9.04, + "learning_rate": 5.755189873417722e-06, + "loss": 0.0172, + "step": 17275 + }, + { + "epoch": 9.04, + "learning_rate": 5.748860759493672e-06, + "loss": 0.0158, + "step": 17300 + }, + { + "epoch": 9.04, + "learning_rate": 5.742531645569621e-06, + "loss": 0.0174, + "step": 17325 + }, + { + "epoch": 9.04, + "learning_rate": 5.736202531645571e-06, + "loss": 0.0162, + "step": 17350 + }, + { + "epoch": 9.04, + "learning_rate": 5.72987341772152e-06, + "loss": 0.0203, + "step": 17375 + }, + { + "epoch": 9.04, + "learning_rate": 5.7235443037974695e-06, + "loss": 0.0157, + "step": 17400 + }, + { + "epoch": 9.04, + "learning_rate": 5.717215189873419e-06, + "loss": 0.018, + "step": 17425 + }, + { + "epoch": 9.04, + "learning_rate": 5.7108860759493675e-06, + "loss": 0.0199, + "step": 17450 + }, + { + "epoch": 9.04, + "learning_rate": 5.704556962025317e-06, + "loss": 0.0153, + "step": 17475 + }, + { + "epoch": 9.04, + "learning_rate": 5.698227848101266e-06, + "loss": 0.0234, + "step": 17500 + }, + { + "epoch": 10.0, + "learning_rate": 5.691898734177216e-06, + "loss": 0.0168, + "step": 17525 + }, + { + "epoch": 10.0, + "learning_rate": 5.685569620253165e-06, + "loss": 0.0214, + "step": 17550 + }, + { + "epoch": 10.0, + "learning_rate": 5.679240506329115e-06, + "loss": 0.0185, + "step": 17575 + }, + { + "epoch": 10.0, + "learning_rate": 5.672911392405064e-06, + "loss": 0.0228, + "step": 17600 + }, + { + "epoch": 10.0, + "learning_rate": 5.6665822784810135e-06, + "loss": 0.024, + "step": 17625 + }, + { + "epoch": 10.0, + "learning_rate": 5.660253164556963e-06, + "loss": 0.0235, + "step": 17650 + }, + { + "epoch": 10.0, + "learning_rate": 5.6539240506329115e-06, + "loss": 0.0262, + "step": 17675 + }, + { + "epoch": 10.0, + "learning_rate": 5.647594936708861e-06, + "loss": 0.0214, + "step": 17700 + }, + { + "epoch": 10.01, + "learning_rate": 5.64126582278481e-06, + "loss": 0.0279, + "step": 17725 + }, + { + "epoch": 10.01, + "learning_rate": 5.63493670886076e-06, + "loss": 0.0248, + "step": 17750 + }, + { + "epoch": 10.01, + "learning_rate": 5.628607594936709e-06, + "loss": 0.0155, + "step": 17775 + }, + { + "epoch": 10.01, + "learning_rate": 5.622278481012659e-06, + "loss": 0.017, + "step": 17800 + }, + { + "epoch": 10.01, + "learning_rate": 5.615949367088608e-06, + "loss": 0.0205, + "step": 17825 + }, + { + "epoch": 10.01, + "learning_rate": 5.6096202531645575e-06, + "loss": 0.021, + "step": 17850 + }, + { + "epoch": 10.01, + "learning_rate": 5.603291139240507e-06, + "loss": 0.0178, + "step": 17875 + }, + { + "epoch": 10.01, + "learning_rate": 5.5969620253164555e-06, + "loss": 0.0169, + "step": 17900 + }, + { + "epoch": 10.01, + "learning_rate": 5.590632911392405e-06, + "loss": 0.0158, + "step": 17925 + }, + { + "epoch": 10.01, + "learning_rate": 5.584303797468354e-06, + "loss": 0.0195, + "step": 17950 + }, + { + "epoch": 10.01, + "learning_rate": 5.577974683544304e-06, + "loss": 0.0195, + "step": 17975 + }, + { + "epoch": 10.01, + "learning_rate": 5.571645569620253e-06, + "loss": 0.0187, + "step": 18000 + }, + { + "epoch": 10.01, + "eval_loss": 0.2605174481868744, + "eval_runtime": 1553.7854, + "eval_samples_per_second": 6.7, + "eval_steps_per_second": 0.419, + "eval_wer": 25.330493178750874, + "step": 18000 + }, + { + "epoch": 10.01, + "learning_rate": 5.565316455696203e-06, + "loss": 0.0214, + "step": 18025 + }, + { + "epoch": 10.01, + "learning_rate": 5.558987341772152e-06, + "loss": 0.0193, + "step": 18050 + }, + { + "epoch": 10.01, + "learning_rate": 5.552658227848102e-06, + "loss": 0.0168, + "step": 18075 + }, + { + "epoch": 10.01, + "learning_rate": 5.546329113924052e-06, + "loss": 0.0147, + "step": 18100 + }, + { + "epoch": 10.02, + "learning_rate": 5.540000000000001e-06, + "loss": 0.0192, + "step": 18125 + }, + { + "epoch": 10.02, + "learning_rate": 5.533670886075951e-06, + "loss": 0.014, + "step": 18150 + }, + { + "epoch": 10.02, + "learning_rate": 5.527341772151899e-06, + "loss": 0.0163, + "step": 18175 + }, + { + "epoch": 10.02, + "learning_rate": 5.521012658227849e-06, + "loss": 0.0139, + "step": 18200 + }, + { + "epoch": 10.02, + "learning_rate": 5.514683544303798e-06, + "loss": 0.0168, + "step": 18225 + }, + { + "epoch": 10.02, + "learning_rate": 5.5083544303797475e-06, + "loss": 0.0118, + "step": 18250 + }, + { + "epoch": 10.02, + "learning_rate": 5.502025316455697e-06, + "loss": 0.0119, + "step": 18275 + }, + { + "epoch": 10.02, + "learning_rate": 5.495696202531646e-06, + "loss": 0.0136, + "step": 18300 + }, + { + "epoch": 10.02, + "learning_rate": 5.489367088607596e-06, + "loss": 0.0132, + "step": 18325 + }, + { + "epoch": 10.02, + "learning_rate": 5.483037974683545e-06, + "loss": 0.0148, + "step": 18350 + }, + { + "epoch": 10.02, + "learning_rate": 5.476708860759495e-06, + "loss": 0.015, + "step": 18375 + }, + { + "epoch": 10.02, + "learning_rate": 5.470379746835443e-06, + "loss": 0.0154, + "step": 18400 + }, + { + "epoch": 10.02, + "learning_rate": 5.464050632911393e-06, + "loss": 0.0139, + "step": 18425 + }, + { + "epoch": 10.02, + "learning_rate": 5.457721518987342e-06, + "loss": 0.0239, + "step": 18450 + }, + { + "epoch": 10.02, + "learning_rate": 5.4513924050632915e-06, + "loss": 0.0203, + "step": 18475 + }, + { + "epoch": 10.02, + "learning_rate": 5.445063291139241e-06, + "loss": 0.0199, + "step": 18500 + }, + { + "epoch": 10.03, + "learning_rate": 5.43873417721519e-06, + "loss": 0.013, + "step": 18525 + }, + { + "epoch": 10.03, + "learning_rate": 5.43240506329114e-06, + "loss": 0.0128, + "step": 18550 + }, + { + "epoch": 10.03, + "learning_rate": 5.426075949367089e-06, + "loss": 0.0161, + "step": 18575 + }, + { + "epoch": 10.03, + "learning_rate": 5.419746835443039e-06, + "loss": 0.0129, + "step": 18600 + }, + { + "epoch": 10.03, + "learning_rate": 5.413417721518987e-06, + "loss": 0.0124, + "step": 18625 + }, + { + "epoch": 10.03, + "learning_rate": 5.407088607594937e-06, + "loss": 0.0161, + "step": 18650 + }, + { + "epoch": 10.03, + "learning_rate": 5.400759493670886e-06, + "loss": 0.0104, + "step": 18675 + }, + { + "epoch": 10.03, + "learning_rate": 5.3944303797468355e-06, + "loss": 0.0109, + "step": 18700 + }, + { + "epoch": 10.03, + "learning_rate": 5.388101265822785e-06, + "loss": 0.0128, + "step": 18725 + }, + { + "epoch": 10.03, + "learning_rate": 5.381772151898734e-06, + "loss": 0.014, + "step": 18750 + }, + { + "epoch": 10.03, + "learning_rate": 5.375443037974684e-06, + "loss": 0.0159, + "step": 18775 + }, + { + "epoch": 10.03, + "learning_rate": 5.369113924050633e-06, + "loss": 0.017, + "step": 18800 + }, + { + "epoch": 10.03, + "learning_rate": 5.362784810126583e-06, + "loss": 0.012, + "step": 18825 + }, + { + "epoch": 10.03, + "learning_rate": 5.356455696202531e-06, + "loss": 0.0165, + "step": 18850 + }, + { + "epoch": 10.03, + "learning_rate": 5.350126582278481e-06, + "loss": 0.0153, + "step": 18875 + }, + { + "epoch": 10.03, + "learning_rate": 5.343797468354432e-06, + "loss": 0.0182, + "step": 18900 + }, + { + "epoch": 10.04, + "learning_rate": 5.33746835443038e-06, + "loss": 0.0194, + "step": 18925 + }, + { + "epoch": 10.04, + "learning_rate": 5.33113924050633e-06, + "loss": 0.0144, + "step": 18950 + }, + { + "epoch": 10.04, + "learning_rate": 5.324810126582279e-06, + "loss": 0.0157, + "step": 18975 + }, + { + "epoch": 10.04, + "learning_rate": 5.318481012658229e-06, + "loss": 0.0111, + "step": 19000 + }, + { + "epoch": 10.04, + "eval_loss": 0.2696678638458252, + "eval_runtime": 1463.5986, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.445, + "eval_wer": 24.79877721588969, + "step": 19000 + }, + { + "epoch": 10.04, + "learning_rate": 5.312151898734178e-06, + "loss": 0.0176, + "step": 19025 + }, + { + "epoch": 10.04, + "learning_rate": 5.3058227848101275e-06, + "loss": 0.0144, + "step": 19050 + }, + { + "epoch": 10.04, + "learning_rate": 5.299493670886077e-06, + "loss": 0.0129, + "step": 19075 + }, + { + "epoch": 10.04, + "learning_rate": 5.293164556962026e-06, + "loss": 0.0162, + "step": 19100 + }, + { + "epoch": 10.04, + "learning_rate": 5.286835443037976e-06, + "loss": 0.0128, + "step": 19125 + }, + { + "epoch": 10.04, + "learning_rate": 5.280506329113924e-06, + "loss": 0.0152, + "step": 19150 + }, + { + "epoch": 10.04, + "learning_rate": 5.274177215189874e-06, + "loss": 0.0183, + "step": 19175 + }, + { + "epoch": 10.04, + "learning_rate": 5.267848101265823e-06, + "loss": 0.0198, + "step": 19200 + }, + { + "epoch": 10.04, + "learning_rate": 5.261518987341773e-06, + "loss": 0.0179, + "step": 19225 + }, + { + "epoch": 10.04, + "learning_rate": 5.255443037974683e-06, + "loss": 0.019, + "step": 19250 + }, + { + "epoch": 11.0, + "learning_rate": 5.249113924050633e-06, + "loss": 0.0159, + "step": 19275 + }, + { + "epoch": 11.0, + "learning_rate": 5.242784810126583e-06, + "loss": 0.0156, + "step": 19300 + }, + { + "epoch": 11.0, + "learning_rate": 5.236455696202532e-06, + "loss": 0.015, + "step": 19325 + }, + { + "epoch": 11.0, + "learning_rate": 5.230126582278482e-06, + "loss": 0.0162, + "step": 19350 + }, + { + "epoch": 11.0, + "learning_rate": 5.223797468354431e-06, + "loss": 0.0262, + "step": 19375 + }, + { + "epoch": 11.0, + "learning_rate": 5.217468354430381e-06, + "loss": 0.0207, + "step": 19400 + }, + { + "epoch": 11.0, + "learning_rate": 5.21113924050633e-06, + "loss": 0.0222, + "step": 19425 + }, + { + "epoch": 11.0, + "learning_rate": 5.2048101265822795e-06, + "loss": 0.016, + "step": 19450 + }, + { + "epoch": 11.01, + "learning_rate": 5.198481012658229e-06, + "loss": 0.0176, + "step": 19475 + }, + { + "epoch": 11.01, + "learning_rate": 5.192151898734178e-06, + "loss": 0.0209, + "step": 19500 + }, + { + "epoch": 11.01, + "learning_rate": 5.185822784810127e-06, + "loss": 0.0212, + "step": 19525 + }, + { + "epoch": 11.01, + "learning_rate": 5.179493670886076e-06, + "loss": 0.0143, + "step": 19550 + }, + { + "epoch": 11.01, + "learning_rate": 5.173164556962026e-06, + "loss": 0.0224, + "step": 19575 + }, + { + "epoch": 11.01, + "learning_rate": 5.166835443037975e-06, + "loss": 0.0171, + "step": 19600 + }, + { + "epoch": 11.01, + "learning_rate": 5.160506329113925e-06, + "loss": 0.0166, + "step": 19625 + }, + { + "epoch": 11.01, + "learning_rate": 5.154177215189874e-06, + "loss": 0.0135, + "step": 19650 + }, + { + "epoch": 11.01, + "learning_rate": 5.1478481012658235e-06, + "loss": 0.0164, + "step": 19675 + }, + { + "epoch": 11.01, + "learning_rate": 5.141518987341773e-06, + "loss": 0.0154, + "step": 19700 + }, + { + "epoch": 11.01, + "learning_rate": 5.135189873417722e-06, + "loss": 0.0183, + "step": 19725 + }, + { + "epoch": 11.01, + "learning_rate": 5.128860759493671e-06, + "loss": 0.0149, + "step": 19750 + }, + { + "epoch": 11.01, + "learning_rate": 5.12253164556962e-06, + "loss": 0.0176, + "step": 19775 + }, + { + "epoch": 11.01, + "learning_rate": 5.11620253164557e-06, + "loss": 0.0135, + "step": 19800 + }, + { + "epoch": 11.01, + "learning_rate": 5.109873417721519e-06, + "loss": 0.0121, + "step": 19825 + }, + { + "epoch": 11.01, + "learning_rate": 5.103544303797469e-06, + "loss": 0.0158, + "step": 19850 + }, + { + "epoch": 11.02, + "learning_rate": 5.097215189873418e-06, + "loss": 0.0191, + "step": 19875 + }, + { + "epoch": 11.02, + "learning_rate": 5.0908860759493675e-06, + "loss": 0.0127, + "step": 19900 + }, + { + "epoch": 11.02, + "learning_rate": 5.084556962025317e-06, + "loss": 0.0149, + "step": 19925 + }, + { + "epoch": 11.02, + "learning_rate": 5.0782278481012655e-06, + "loss": 0.0152, + "step": 19950 + }, + { + "epoch": 11.02, + "learning_rate": 5.071898734177215e-06, + "loss": 0.0139, + "step": 19975 + }, + { + "epoch": 11.02, + "learning_rate": 5.065569620253164e-06, + "loss": 0.015, + "step": 20000 + }, + { + "epoch": 11.02, + "eval_loss": 0.2623814344406128, + "eval_runtime": 1462.1621, + "eval_samples_per_second": 7.12, + "eval_steps_per_second": 0.445, + "eval_wer": 24.392266540919362, + "step": 20000 + }, + { + "epoch": 11.02, + "learning_rate": 5.059240506329114e-06, + "loss": 0.0094, + "step": 20025 + }, + { + "epoch": 11.02, + "learning_rate": 5.052911392405063e-06, + "loss": 0.0108, + "step": 20050 + }, + { + "epoch": 11.02, + "learning_rate": 5.046582278481013e-06, + "loss": 0.0155, + "step": 20075 + }, + { + "epoch": 11.02, + "learning_rate": 5.040253164556962e-06, + "loss": 0.0103, + "step": 20100 + }, + { + "epoch": 11.02, + "learning_rate": 5.033924050632912e-06, + "loss": 0.0109, + "step": 20125 + }, + { + "epoch": 11.02, + "learning_rate": 5.027594936708862e-06, + "loss": 0.0188, + "step": 20150 + }, + { + "epoch": 11.02, + "learning_rate": 5.021265822784811e-06, + "loss": 0.0178, + "step": 20175 + }, + { + "epoch": 11.02, + "learning_rate": 5.014936708860761e-06, + "loss": 0.0262, + "step": 20200 + }, + { + "epoch": 11.02, + "learning_rate": 5.00860759493671e-06, + "loss": 0.0194, + "step": 20225 + }, + { + "epoch": 11.02, + "learning_rate": 5.002278481012659e-06, + "loss": 0.0107, + "step": 20250 + }, + { + "epoch": 11.03, + "learning_rate": 4.995949367088608e-06, + "loss": 0.0147, + "step": 20275 + }, + { + "epoch": 11.03, + "learning_rate": 4.9896202531645575e-06, + "loss": 0.0084, + "step": 20300 + }, + { + "epoch": 11.03, + "learning_rate": 4.983291139240507e-06, + "loss": 0.0214, + "step": 20325 + }, + { + "epoch": 11.03, + "learning_rate": 4.976962025316456e-06, + "loss": 0.014, + "step": 20350 + }, + { + "epoch": 11.03, + "learning_rate": 4.970632911392406e-06, + "loss": 0.0129, + "step": 20375 + }, + { + "epoch": 11.03, + "learning_rate": 4.964303797468355e-06, + "loss": 0.0145, + "step": 20400 + }, + { + "epoch": 11.03, + "learning_rate": 4.957974683544305e-06, + "loss": 0.0109, + "step": 20425 + }, + { + "epoch": 11.03, + "learning_rate": 4.951645569620254e-06, + "loss": 0.0117, + "step": 20450 + }, + { + "epoch": 11.03, + "learning_rate": 4.945316455696203e-06, + "loss": 0.0137, + "step": 20475 + }, + { + "epoch": 11.03, + "learning_rate": 4.938987341772152e-06, + "loss": 0.0086, + "step": 20500 + }, + { + "epoch": 11.03, + "learning_rate": 4.9326582278481015e-06, + "loss": 0.0128, + "step": 20525 + }, + { + "epoch": 11.03, + "learning_rate": 4.926329113924051e-06, + "loss": 0.0101, + "step": 20550 + }, + { + "epoch": 11.03, + "learning_rate": 4.92e-06, + "loss": 0.0132, + "step": 20575 + }, + { + "epoch": 11.03, + "learning_rate": 4.91367088607595e-06, + "loss": 0.0147, + "step": 20600 + }, + { + "epoch": 11.03, + "learning_rate": 4.907341772151899e-06, + "loss": 0.0077, + "step": 20625 + }, + { + "epoch": 11.03, + "learning_rate": 4.901012658227849e-06, + "loss": 0.0125, + "step": 20650 + }, + { + "epoch": 11.04, + "learning_rate": 4.894683544303798e-06, + "loss": 0.0115, + "step": 20675 + }, + { + "epoch": 11.04, + "learning_rate": 4.888354430379747e-06, + "loss": 0.0136, + "step": 20700 + }, + { + "epoch": 11.04, + "learning_rate": 4.882025316455696e-06, + "loss": 0.0098, + "step": 20725 + }, + { + "epoch": 11.04, + "learning_rate": 4.875696202531646e-06, + "loss": 0.0154, + "step": 20750 + }, + { + "epoch": 11.04, + "learning_rate": 4.869367088607596e-06, + "loss": 0.0126, + "step": 20775 + }, + { + "epoch": 11.04, + "learning_rate": 4.863037974683545e-06, + "loss": 0.0116, + "step": 20800 + }, + { + "epoch": 11.04, + "learning_rate": 4.856708860759495e-06, + "loss": 0.0133, + "step": 20825 + }, + { + "epoch": 11.04, + "learning_rate": 4.850379746835443e-06, + "loss": 0.0106, + "step": 20850 + }, + { + "epoch": 11.04, + "learning_rate": 4.844050632911393e-06, + "loss": 0.0151, + "step": 20875 + }, + { + "epoch": 11.04, + "learning_rate": 4.837721518987342e-06, + "loss": 0.0148, + "step": 20900 + }, + { + "epoch": 11.04, + "learning_rate": 4.8313924050632915e-06, + "loss": 0.0173, + "step": 20925 + }, + { + "epoch": 11.04, + "learning_rate": 4.825063291139241e-06, + "loss": 0.0126, + "step": 20950 + }, + { + "epoch": 11.04, + "learning_rate": 4.81873417721519e-06, + "loss": 0.0163, + "step": 20975 + }, + { + "epoch": 11.04, + "learning_rate": 4.81240506329114e-06, + "loss": 0.0203, + "step": 21000 + }, + { + "epoch": 11.04, + "eval_loss": 0.27788785099983215, + "eval_runtime": 1546.852, + "eval_samples_per_second": 6.73, + "eval_steps_per_second": 0.421, + "eval_wer": 25.284963983154196, + "step": 21000 + }, + { + "epoch": 12.0, + "learning_rate": 4.806075949367089e-06, + "loss": 0.0176, + "step": 21025 + }, + { + "epoch": 12.0, + "learning_rate": 4.799746835443039e-06, + "loss": 0.0157, + "step": 21050 + }, + { + "epoch": 12.0, + "learning_rate": 4.793417721518987e-06, + "loss": 0.0192, + "step": 21075 + }, + { + "epoch": 12.0, + "learning_rate": 4.787088607594937e-06, + "loss": 0.0124, + "step": 21100 + }, + { + "epoch": 12.0, + "learning_rate": 4.780759493670886e-06, + "loss": 0.0194, + "step": 21125 + }, + { + "epoch": 12.0, + "learning_rate": 4.774430379746836e-06, + "loss": 0.0138, + "step": 21150 + }, + { + "epoch": 12.0, + "learning_rate": 4.768101265822786e-06, + "loss": 0.0191, + "step": 21175 + }, + { + "epoch": 12.0, + "learning_rate": 4.761772151898735e-06, + "loss": 0.0124, + "step": 21200 + }, + { + "epoch": 12.01, + "learning_rate": 4.755443037974684e-06, + "loss": 0.0175, + "step": 21225 + }, + { + "epoch": 12.01, + "learning_rate": 4.749113924050633e-06, + "loss": 0.0118, + "step": 21250 + }, + { + "epoch": 12.01, + "learning_rate": 4.742784810126583e-06, + "loss": 0.0172, + "step": 21275 + }, + { + "epoch": 12.01, + "learning_rate": 4.736455696202532e-06, + "loss": 0.0136, + "step": 21300 + }, + { + "epoch": 12.01, + "learning_rate": 4.7301265822784815e-06, + "loss": 0.018, + "step": 21325 + }, + { + "epoch": 12.01, + "learning_rate": 4.723797468354431e-06, + "loss": 0.0184, + "step": 21350 + }, + { + "epoch": 12.01, + "learning_rate": 4.71746835443038e-06, + "loss": 0.0166, + "step": 21375 + }, + { + "epoch": 12.01, + "learning_rate": 4.71113924050633e-06, + "loss": 0.0107, + "step": 21400 + }, + { + "epoch": 12.01, + "learning_rate": 4.704810126582279e-06, + "loss": 0.0125, + "step": 21425 + }, + { + "epoch": 12.01, + "learning_rate": 4.698481012658228e-06, + "loss": 0.0098, + "step": 21450 + }, + { + "epoch": 12.01, + "learning_rate": 4.692151898734177e-06, + "loss": 0.0103, + "step": 21475 + }, + { + "epoch": 12.01, + "learning_rate": 4.685822784810127e-06, + "loss": 0.0211, + "step": 21500 + }, + { + "epoch": 12.01, + "learning_rate": 4.679493670886076e-06, + "loss": 0.0125, + "step": 21525 + }, + { + "epoch": 12.01, + "learning_rate": 4.6731645569620255e-06, + "loss": 0.0083, + "step": 21550 + }, + { + "epoch": 12.01, + "learning_rate": 4.666835443037975e-06, + "loss": 0.0156, + "step": 21575 + }, + { + "epoch": 12.01, + "learning_rate": 4.660506329113924e-06, + "loss": 0.0137, + "step": 21600 + }, + { + "epoch": 12.02, + "learning_rate": 4.654177215189874e-06, + "loss": 0.0137, + "step": 21625 + }, + { + "epoch": 12.02, + "learning_rate": 4.647848101265823e-06, + "loss": 0.0092, + "step": 21650 + }, + { + "epoch": 12.02, + "learning_rate": 4.641518987341773e-06, + "loss": 0.0101, + "step": 21675 + }, + { + "epoch": 12.02, + "learning_rate": 4.635189873417722e-06, + "loss": 0.0118, + "step": 21700 + }, + { + "epoch": 12.02, + "learning_rate": 4.6288607594936715e-06, + "loss": 0.0119, + "step": 21725 + }, + { + "epoch": 12.02, + "learning_rate": 4.622531645569621e-06, + "loss": 0.009, + "step": 21750 + }, + { + "epoch": 12.02, + "learning_rate": 4.61620253164557e-06, + "loss": 0.014, + "step": 21775 + }, + { + "epoch": 12.02, + "learning_rate": 4.609873417721519e-06, + "loss": 0.0108, + "step": 21800 + }, + { + "epoch": 12.02, + "learning_rate": 4.603544303797468e-06, + "loss": 0.0117, + "step": 21825 + }, + { + "epoch": 12.02, + "learning_rate": 4.597215189873418e-06, + "loss": 0.0147, + "step": 21850 + }, + { + "epoch": 12.02, + "learning_rate": 4.590886075949367e-06, + "loss": 0.0133, + "step": 21875 + }, + { + "epoch": 12.02, + "learning_rate": 4.584556962025317e-06, + "loss": 0.0131, + "step": 21900 + }, + { + "epoch": 12.02, + "learning_rate": 4.578227848101266e-06, + "loss": 0.0156, + "step": 21925 + }, + { + "epoch": 12.02, + "learning_rate": 4.5718987341772155e-06, + "loss": 0.0211, + "step": 21950 + }, + { + "epoch": 12.02, + "learning_rate": 4.565569620253165e-06, + "loss": 0.0127, + "step": 21975 + }, + { + "epoch": 12.02, + "learning_rate": 4.559240506329114e-06, + "loss": 0.011, + "step": 22000 + }, + { + "epoch": 12.02, + "eval_loss": 0.26152506470680237, + "eval_runtime": 1497.9019, + "eval_samples_per_second": 6.95, + "eval_steps_per_second": 0.435, + "eval_wer": 26.236198962584755, + "step": 22000 + }, + { + "epoch": 12.03, + "learning_rate": 4.552911392405064e-06, + "loss": 0.0164, + "step": 22025 + }, + { + "epoch": 12.03, + "learning_rate": 4.546582278481013e-06, + "loss": 0.01, + "step": 22050 + }, + { + "epoch": 12.03, + "learning_rate": 4.540253164556963e-06, + "loss": 0.0127, + "step": 22075 + }, + { + "epoch": 12.03, + "learning_rate": 4.533924050632912e-06, + "loss": 0.0089, + "step": 22100 + }, + { + "epoch": 12.03, + "learning_rate": 4.5275949367088615e-06, + "loss": 0.0077, + "step": 22125 + }, + { + "epoch": 12.03, + "learning_rate": 4.521265822784811e-06, + "loss": 0.0098, + "step": 22150 + }, + { + "epoch": 12.03, + "learning_rate": 4.5149367088607595e-06, + "loss": 0.0084, + "step": 22175 + }, + { + "epoch": 12.03, + "learning_rate": 4.508607594936709e-06, + "loss": 0.0096, + "step": 22200 + }, + { + "epoch": 12.03, + "learning_rate": 4.502278481012658e-06, + "loss": 0.0097, + "step": 22225 + }, + { + "epoch": 12.03, + "learning_rate": 4.495949367088608e-06, + "loss": 0.0114, + "step": 22250 + }, + { + "epoch": 12.03, + "learning_rate": 4.489620253164557e-06, + "loss": 0.0109, + "step": 22275 + }, + { + "epoch": 12.03, + "learning_rate": 4.483291139240507e-06, + "loss": 0.0096, + "step": 22300 + }, + { + "epoch": 12.03, + "learning_rate": 4.476962025316456e-06, + "loss": 0.0117, + "step": 22325 + }, + { + "epoch": 12.03, + "learning_rate": 4.4706329113924055e-06, + "loss": 0.0105, + "step": 22350 + }, + { + "epoch": 12.03, + "learning_rate": 4.464303797468355e-06, + "loss": 0.0103, + "step": 22375 + }, + { + "epoch": 12.03, + "learning_rate": 4.457974683544304e-06, + "loss": 0.0085, + "step": 22400 + }, + { + "epoch": 12.04, + "learning_rate": 4.451645569620254e-06, + "loss": 0.0129, + "step": 22425 + }, + { + "epoch": 12.04, + "learning_rate": 4.445316455696203e-06, + "loss": 0.0188, + "step": 22450 + }, + { + "epoch": 12.04, + "learning_rate": 4.438987341772153e-06, + "loss": 0.013, + "step": 22475 + }, + { + "epoch": 12.04, + "learning_rate": 4.432658227848102e-06, + "loss": 0.0095, + "step": 22500 + }, + { + "epoch": 12.04, + "learning_rate": 4.4263291139240515e-06, + "loss": 0.0111, + "step": 22525 + }, + { + "epoch": 12.04, + "learning_rate": 4.42e-06, + "loss": 0.0115, + "step": 22550 + }, + { + "epoch": 12.04, + "learning_rate": 4.4136708860759495e-06, + "loss": 0.0096, + "step": 22575 + }, + { + "epoch": 12.04, + "learning_rate": 4.407341772151899e-06, + "loss": 0.0108, + "step": 22600 + }, + { + "epoch": 12.04, + "learning_rate": 4.401012658227848e-06, + "loss": 0.0118, + "step": 22625 + }, + { + "epoch": 12.04, + "learning_rate": 4.394683544303798e-06, + "loss": 0.0136, + "step": 22650 + }, + { + "epoch": 12.04, + "learning_rate": 4.388354430379747e-06, + "loss": 0.0176, + "step": 22675 + }, + { + "epoch": 12.04, + "learning_rate": 4.382025316455697e-06, + "loss": 0.011, + "step": 22700 + }, + { + "epoch": 12.04, + "learning_rate": 4.375696202531646e-06, + "loss": 0.0125, + "step": 22725 + }, + { + "epoch": 12.04, + "learning_rate": 4.3693670886075955e-06, + "loss": 0.0164, + "step": 22750 + }, + { + "epoch": 13.0, + "learning_rate": 4.363037974683544e-06, + "loss": 0.0192, + "step": 22775 + }, + { + "epoch": 13.0, + "learning_rate": 4.356708860759494e-06, + "loss": 0.0145, + "step": 22800 + }, + { + "epoch": 13.0, + "learning_rate": 4.350379746835444e-06, + "loss": 0.0174, + "step": 22825 + }, + { + "epoch": 13.0, + "learning_rate": 4.344050632911393e-06, + "loss": 0.0142, + "step": 22850 + }, + { + "epoch": 13.0, + "learning_rate": 4.337721518987343e-06, + "loss": 0.0228, + "step": 22875 + }, + { + "epoch": 13.0, + "learning_rate": 4.331392405063292e-06, + "loss": 0.0201, + "step": 22900 + }, + { + "epoch": 13.0, + "learning_rate": 4.325063291139241e-06, + "loss": 0.0188, + "step": 22925 + }, + { + "epoch": 13.0, + "learning_rate": 4.31873417721519e-06, + "loss": 0.0185, + "step": 22950 + }, + { + "epoch": 13.01, + "learning_rate": 4.3124050632911395e-06, + "loss": 0.0186, + "step": 22975 + }, + { + "epoch": 13.01, + "learning_rate": 4.306075949367089e-06, + "loss": 0.0191, + "step": 23000 + }, + { + "epoch": 13.01, + "eval_loss": 0.25467297434806824, + "eval_runtime": 1486.1182, + "eval_samples_per_second": 7.005, + "eval_steps_per_second": 0.438, + "eval_wer": 23.982503780549276, + "step": 23000 + }, + { + "epoch": 13.01, + "learning_rate": 4.299746835443038e-06, + "loss": 0.0145, + "step": 23025 + }, + { + "epoch": 13.01, + "learning_rate": 4.293417721518988e-06, + "loss": 0.0106, + "step": 23050 + }, + { + "epoch": 13.01, + "learning_rate": 4.287088607594937e-06, + "loss": 0.0135, + "step": 23075 + }, + { + "epoch": 13.01, + "learning_rate": 4.280759493670887e-06, + "loss": 0.0173, + "step": 23100 + }, + { + "epoch": 13.01, + "learning_rate": 4.274430379746836e-06, + "loss": 0.0126, + "step": 23125 + }, + { + "epoch": 13.01, + "learning_rate": 4.268101265822785e-06, + "loss": 0.0166, + "step": 23150 + }, + { + "epoch": 13.01, + "learning_rate": 4.261772151898734e-06, + "loss": 0.0097, + "step": 23175 + }, + { + "epoch": 13.01, + "learning_rate": 4.255443037974684e-06, + "loss": 0.0094, + "step": 23200 + }, + { + "epoch": 13.01, + "learning_rate": 4.249113924050634e-06, + "loss": 0.012, + "step": 23225 + }, + { + "epoch": 13.01, + "learning_rate": 4.242784810126583e-06, + "loss": 0.0153, + "step": 23250 + }, + { + "epoch": 13.01, + "learning_rate": 4.236455696202532e-06, + "loss": 0.0169, + "step": 23275 + }, + { + "epoch": 13.01, + "learning_rate": 4.230379746835443e-06, + "loss": 0.0097, + "step": 23300 + }, + { + "epoch": 13.01, + "learning_rate": 4.224050632911393e-06, + "loss": 0.0098, + "step": 23325 + }, + { + "epoch": 13.01, + "learning_rate": 4.217721518987342e-06, + "loss": 0.0129, + "step": 23350 + }, + { + "epoch": 13.02, + "learning_rate": 4.2113924050632915e-06, + "loss": 0.0157, + "step": 23375 + }, + { + "epoch": 13.02, + "learning_rate": 4.205063291139241e-06, + "loss": 0.0088, + "step": 23400 + }, + { + "epoch": 13.02, + "learning_rate": 4.19873417721519e-06, + "loss": 0.0105, + "step": 23425 + }, + { + "epoch": 13.02, + "learning_rate": 4.19240506329114e-06, + "loss": 0.0131, + "step": 23450 + }, + { + "epoch": 13.02, + "learning_rate": 4.186075949367089e-06, + "loss": 0.0133, + "step": 23475 + }, + { + "epoch": 13.02, + "learning_rate": 4.179746835443038e-06, + "loss": 0.0054, + "step": 23500 + }, + { + "epoch": 13.02, + "learning_rate": 4.173417721518987e-06, + "loss": 0.01, + "step": 23525 + }, + { + "epoch": 13.02, + "learning_rate": 4.167088607594937e-06, + "loss": 0.013, + "step": 23550 + }, + { + "epoch": 13.02, + "learning_rate": 4.160759493670886e-06, + "loss": 0.0141, + "step": 23575 + }, + { + "epoch": 13.02, + "learning_rate": 4.154430379746836e-06, + "loss": 0.0126, + "step": 23600 + }, + { + "epoch": 13.02, + "learning_rate": 4.148101265822786e-06, + "loss": 0.015, + "step": 23625 + }, + { + "epoch": 13.02, + "learning_rate": 4.141772151898734e-06, + "loss": 0.0164, + "step": 23650 + }, + { + "epoch": 13.02, + "learning_rate": 4.135443037974684e-06, + "loss": 0.0135, + "step": 23675 + }, + { + "epoch": 13.02, + "learning_rate": 4.129113924050633e-06, + "loss": 0.0151, + "step": 23700 + }, + { + "epoch": 13.02, + "learning_rate": 4.122784810126583e-06, + "loss": 0.0157, + "step": 23725 + }, + { + "epoch": 13.02, + "learning_rate": 4.116455696202532e-06, + "loss": 0.0087, + "step": 23750 + }, + { + "epoch": 13.03, + "learning_rate": 4.1101265822784815e-06, + "loss": 0.0097, + "step": 23775 + }, + { + "epoch": 13.03, + "learning_rate": 4.103797468354431e-06, + "loss": 0.0141, + "step": 23800 + }, + { + "epoch": 13.03, + "learning_rate": 4.09746835443038e-06, + "loss": 0.0079, + "step": 23825 + }, + { + "epoch": 13.03, + "learning_rate": 4.09113924050633e-06, + "loss": 0.0116, + "step": 23850 + }, + { + "epoch": 13.03, + "learning_rate": 4.084810126582278e-06, + "loss": 0.0093, + "step": 23875 + }, + { + "epoch": 13.03, + "learning_rate": 4.078481012658228e-06, + "loss": 0.0149, + "step": 23900 + }, + { + "epoch": 13.03, + "learning_rate": 4.072151898734177e-06, + "loss": 0.0125, + "step": 23925 + }, + { + "epoch": 13.03, + "learning_rate": 4.065822784810127e-06, + "loss": 0.0106, + "step": 23950 + }, + { + "epoch": 13.03, + "learning_rate": 4.059493670886076e-06, + "loss": 0.0124, + "step": 23975 + }, + { + "epoch": 13.03, + "learning_rate": 4.0531645569620255e-06, + "loss": 0.0074, + "step": 24000 + }, + { + "epoch": 13.03, + "eval_loss": 0.27036768198013306, + "eval_runtime": 1458.9195, + "eval_samples_per_second": 7.136, + "eval_steps_per_second": 0.446, + "eval_wer": 24.153238264036816, + "step": 24000 + }, + { + "epoch": 13.03, + "learning_rate": 4.046835443037975e-06, + "loss": 0.0128, + "step": 24025 + }, + { + "epoch": 13.03, + "learning_rate": 4.040506329113924e-06, + "loss": 0.012, + "step": 24050 + }, + { + "epoch": 13.03, + "learning_rate": 4.034177215189874e-06, + "loss": 0.0098, + "step": 24075 + }, + { + "epoch": 13.03, + "learning_rate": 4.027848101265823e-06, + "loss": 0.0091, + "step": 24100 + }, + { + "epoch": 13.03, + "learning_rate": 4.021518987341773e-06, + "loss": 0.0077, + "step": 24125 + }, + { + "epoch": 13.03, + "learning_rate": 4.015189873417722e-06, + "loss": 0.0135, + "step": 24150 + }, + { + "epoch": 13.04, + "learning_rate": 4.0088607594936715e-06, + "loss": 0.0124, + "step": 24175 + }, + { + "epoch": 13.04, + "learning_rate": 4.002531645569621e-06, + "loss": 0.0152, + "step": 24200 + }, + { + "epoch": 13.04, + "learning_rate": 3.99620253164557e-06, + "loss": 0.0127, + "step": 24225 + }, + { + "epoch": 13.04, + "learning_rate": 3.989873417721519e-06, + "loss": 0.0066, + "step": 24250 + }, + { + "epoch": 13.04, + "learning_rate": 3.983544303797468e-06, + "loss": 0.0163, + "step": 24275 + }, + { + "epoch": 13.04, + "learning_rate": 3.977215189873418e-06, + "loss": 0.0095, + "step": 24300 + }, + { + "epoch": 13.04, + "learning_rate": 3.970886075949367e-06, + "loss": 0.0103, + "step": 24325 + }, + { + "epoch": 13.04, + "learning_rate": 3.964556962025317e-06, + "loss": 0.0112, + "step": 24350 + }, + { + "epoch": 13.04, + "learning_rate": 3.958227848101266e-06, + "loss": 0.01, + "step": 24375 + }, + { + "epoch": 13.04, + "learning_rate": 3.9518987341772155e-06, + "loss": 0.0131, + "step": 24400 + }, + { + "epoch": 13.04, + "learning_rate": 3.945569620253165e-06, + "loss": 0.0143, + "step": 24425 + }, + { + "epoch": 13.04, + "learning_rate": 3.939240506329114e-06, + "loss": 0.0151, + "step": 24450 + }, + { + "epoch": 13.04, + "learning_rate": 3.932911392405064e-06, + "loss": 0.0129, + "step": 24475 + }, + { + "epoch": 13.04, + "learning_rate": 3.926582278481013e-06, + "loss": 0.0125, + "step": 24500 + }, + { + "epoch": 14.0, + "learning_rate": 3.920506329113925e-06, + "loss": 0.0165, + "step": 24525 + }, + { + "epoch": 14.0, + "learning_rate": 3.914177215189874e-06, + "loss": 0.0114, + "step": 24550 + }, + { + "epoch": 14.0, + "learning_rate": 3.9078481012658235e-06, + "loss": 0.0111, + "step": 24575 + }, + { + "epoch": 14.0, + "learning_rate": 3.901518987341772e-06, + "loss": 0.0134, + "step": 24600 + }, + { + "epoch": 14.0, + "learning_rate": 3.8951898734177215e-06, + "loss": 0.0138, + "step": 24625 + }, + { + "epoch": 14.0, + "learning_rate": 3.888860759493671e-06, + "loss": 0.0151, + "step": 24650 + }, + { + "epoch": 14.0, + "learning_rate": 3.88253164556962e-06, + "loss": 0.0101, + "step": 24675 + }, + { + "epoch": 14.0, + "learning_rate": 3.87620253164557e-06, + "loss": 0.0108, + "step": 24700 + }, + { + "epoch": 14.01, + "learning_rate": 3.869873417721519e-06, + "loss": 0.0151, + "step": 24725 + }, + { + "epoch": 14.01, + "learning_rate": 3.863544303797469e-06, + "loss": 0.0132, + "step": 24750 + }, + { + "epoch": 14.01, + "learning_rate": 3.857215189873418e-06, + "loss": 0.012, + "step": 24775 + }, + { + "epoch": 14.01, + "learning_rate": 3.8508860759493675e-06, + "loss": 0.017, + "step": 24800 + }, + { + "epoch": 14.01, + "learning_rate": 3.844556962025317e-06, + "loss": 0.0141, + "step": 24825 + }, + { + "epoch": 14.01, + "learning_rate": 3.838227848101266e-06, + "loss": 0.0166, + "step": 24850 + }, + { + "epoch": 14.01, + "learning_rate": 3.831898734177216e-06, + "loss": 0.0112, + "step": 24875 + }, + { + "epoch": 14.01, + "learning_rate": 3.825569620253165e-06, + "loss": 0.0097, + "step": 24900 + }, + { + "epoch": 14.01, + "learning_rate": 3.819240506329115e-06, + "loss": 0.0138, + "step": 24925 + }, + { + "epoch": 14.01, + "learning_rate": 3.8129113924050636e-06, + "loss": 0.0069, + "step": 24950 + }, + { + "epoch": 14.01, + "learning_rate": 3.806582278481013e-06, + "loss": 0.0111, + "step": 24975 + }, + { + "epoch": 14.01, + "learning_rate": 3.8002531645569625e-06, + "loss": 0.0149, + "step": 25000 + }, + { + "epoch": 14.01, + "eval_loss": 0.2640444040298462, + "eval_runtime": 1478.4977, + "eval_samples_per_second": 7.042, + "eval_steps_per_second": 0.44, + "eval_wer": 24.063805915543345, + "step": 25000 + }, + { + "epoch": 14.01, + "learning_rate": 3.7939240506329115e-06, + "loss": 0.0176, + "step": 25025 + }, + { + "epoch": 14.01, + "learning_rate": 3.787594936708861e-06, + "loss": 0.0104, + "step": 25050 + }, + { + "epoch": 14.01, + "learning_rate": 3.7812658227848103e-06, + "loss": 0.0084, + "step": 25075 + }, + { + "epoch": 14.01, + "learning_rate": 3.7749367088607598e-06, + "loss": 0.0118, + "step": 25100 + }, + { + "epoch": 14.02, + "learning_rate": 3.768607594936709e-06, + "loss": 0.0099, + "step": 25125 + }, + { + "epoch": 14.02, + "learning_rate": 3.762278481012658e-06, + "loss": 0.0089, + "step": 25150 + }, + { + "epoch": 14.02, + "learning_rate": 3.7559493670886076e-06, + "loss": 0.0119, + "step": 25175 + }, + { + "epoch": 14.02, + "learning_rate": 3.749620253164557e-06, + "loss": 0.0118, + "step": 25200 + }, + { + "epoch": 14.02, + "learning_rate": 3.743291139240507e-06, + "loss": 0.0108, + "step": 25225 + }, + { + "epoch": 14.02, + "learning_rate": 3.7369620253164563e-06, + "loss": 0.0101, + "step": 25250 + }, + { + "epoch": 14.02, + "learning_rate": 3.7306329113924058e-06, + "loss": 0.0095, + "step": 25275 + }, + { + "epoch": 14.02, + "learning_rate": 3.7243037974683548e-06, + "loss": 0.0106, + "step": 25300 + }, + { + "epoch": 14.02, + "learning_rate": 3.717974683544304e-06, + "loss": 0.0135, + "step": 25325 + }, + { + "epoch": 14.02, + "learning_rate": 3.7116455696202536e-06, + "loss": 0.0115, + "step": 25350 + }, + { + "epoch": 14.02, + "learning_rate": 3.705316455696203e-06, + "loss": 0.0114, + "step": 25375 + }, + { + "epoch": 14.02, + "learning_rate": 3.698987341772152e-06, + "loss": 0.0119, + "step": 25400 + }, + { + "epoch": 14.02, + "learning_rate": 3.6926582278481015e-06, + "loss": 0.0123, + "step": 25425 + }, + { + "epoch": 14.02, + "learning_rate": 3.686329113924051e-06, + "loss": 0.0138, + "step": 25450 + }, + { + "epoch": 14.02, + "learning_rate": 3.6800000000000003e-06, + "loss": 0.0133, + "step": 25475 + }, + { + "epoch": 14.02, + "learning_rate": 3.6736708860759498e-06, + "loss": 0.0106, + "step": 25500 + }, + { + "epoch": 14.03, + "learning_rate": 3.6673417721518988e-06, + "loss": 0.0099, + "step": 25525 + }, + { + "epoch": 14.03, + "learning_rate": 3.661012658227848e-06, + "loss": 0.0104, + "step": 25550 + }, + { + "epoch": 14.03, + "learning_rate": 3.6546835443037976e-06, + "loss": 0.0143, + "step": 25575 + }, + { + "epoch": 14.03, + "learning_rate": 3.648354430379747e-06, + "loss": 0.0096, + "step": 25600 + }, + { + "epoch": 14.03, + "learning_rate": 3.642025316455696e-06, + "loss": 0.0094, + "step": 25625 + }, + { + "epoch": 14.03, + "learning_rate": 3.6356962025316463e-06, + "loss": 0.0162, + "step": 25650 + }, + { + "epoch": 14.03, + "learning_rate": 3.6293670886075953e-06, + "loss": 0.008, + "step": 25675 + }, + { + "epoch": 14.03, + "learning_rate": 3.6230379746835448e-06, + "loss": 0.0094, + "step": 25700 + }, + { + "epoch": 14.03, + "learning_rate": 3.616708860759494e-06, + "loss": 0.0122, + "step": 25725 + }, + { + "epoch": 14.03, + "learning_rate": 3.6103797468354436e-06, + "loss": 0.0128, + "step": 25750 + }, + { + "epoch": 14.03, + "learning_rate": 3.6040506329113926e-06, + "loss": 0.0117, + "step": 25775 + }, + { + "epoch": 14.03, + "learning_rate": 3.597721518987342e-06, + "loss": 0.0067, + "step": 25800 + }, + { + "epoch": 14.03, + "learning_rate": 3.5913924050632915e-06, + "loss": 0.0078, + "step": 25825 + }, + { + "epoch": 14.03, + "learning_rate": 3.585063291139241e-06, + "loss": 0.0071, + "step": 25850 + }, + { + "epoch": 14.03, + "learning_rate": 3.57873417721519e-06, + "loss": 0.0093, + "step": 25875 + }, + { + "epoch": 14.03, + "learning_rate": 3.5724050632911393e-06, + "loss": 0.0064, + "step": 25900 + }, + { + "epoch": 14.04, + "learning_rate": 3.5660759493670888e-06, + "loss": 0.0067, + "step": 25925 + }, + { + "epoch": 14.04, + "learning_rate": 3.559746835443038e-06, + "loss": 0.0076, + "step": 25950 + }, + { + "epoch": 14.04, + "learning_rate": 3.5534177215189876e-06, + "loss": 0.0099, + "step": 25975 + }, + { + "epoch": 14.04, + "learning_rate": 3.5470886075949366e-06, + "loss": 0.0111, + "step": 26000 + }, + { + "epoch": 14.04, + "eval_loss": 0.2734878659248352, + "eval_runtime": 1456.7487, + "eval_samples_per_second": 7.147, + "eval_steps_per_second": 0.447, + "eval_wer": 24.359745686921737, + "step": 26000 + }, + { + "epoch": 14.04, + "learning_rate": 3.540759493670886e-06, + "loss": 0.0134, + "step": 26025 + }, + { + "epoch": 14.04, + "learning_rate": 3.534430379746836e-06, + "loss": 0.008, + "step": 26050 + }, + { + "epoch": 14.04, + "learning_rate": 3.5281012658227853e-06, + "loss": 0.0138, + "step": 26075 + }, + { + "epoch": 14.04, + "learning_rate": 3.5217721518987348e-06, + "loss": 0.0075, + "step": 26100 + }, + { + "epoch": 14.04, + "learning_rate": 3.515443037974684e-06, + "loss": 0.0097, + "step": 26125 + }, + { + "epoch": 14.04, + "learning_rate": 3.509113924050633e-06, + "loss": 0.0139, + "step": 26150 + }, + { + "epoch": 14.04, + "learning_rate": 3.5027848101265826e-06, + "loss": 0.0099, + "step": 26175 + }, + { + "epoch": 14.04, + "learning_rate": 3.496455696202532e-06, + "loss": 0.0148, + "step": 26200 + }, + { + "epoch": 14.04, + "learning_rate": 3.4901265822784815e-06, + "loss": 0.0119, + "step": 26225 + }, + { + "epoch": 14.04, + "learning_rate": 3.4837974683544305e-06, + "loss": 0.0086, + "step": 26250 + }, + { + "epoch": 15.0, + "learning_rate": 3.47746835443038e-06, + "loss": 0.0091, + "step": 26275 + }, + { + "epoch": 15.0, + "learning_rate": 3.4711392405063293e-06, + "loss": 0.0105, + "step": 26300 + }, + { + "epoch": 15.0, + "learning_rate": 3.465063291139241e-06, + "loss": 0.0138, + "step": 26325 + }, + { + "epoch": 15.0, + "learning_rate": 3.45873417721519e-06, + "loss": 0.0185, + "step": 26350 + }, + { + "epoch": 15.0, + "learning_rate": 3.4524050632911392e-06, + "loss": 0.015, + "step": 26375 + }, + { + "epoch": 15.0, + "learning_rate": 3.4460759493670886e-06, + "loss": 0.0177, + "step": 26400 + }, + { + "epoch": 15.0, + "learning_rate": 3.439746835443038e-06, + "loss": 0.014, + "step": 26425 + }, + { + "epoch": 15.0, + "learning_rate": 3.433417721518988e-06, + "loss": 0.0127, + "step": 26450 + }, + { + "epoch": 15.01, + "learning_rate": 3.4270886075949374e-06, + "loss": 0.0135, + "step": 26475 + }, + { + "epoch": 15.01, + "learning_rate": 3.4207594936708864e-06, + "loss": 0.0171, + "step": 26500 + }, + { + "epoch": 15.01, + "learning_rate": 3.4144303797468358e-06, + "loss": 0.0114, + "step": 26525 + }, + { + "epoch": 15.01, + "learning_rate": 3.4081012658227852e-06, + "loss": 0.0099, + "step": 26550 + }, + { + "epoch": 15.01, + "learning_rate": 3.4017721518987346e-06, + "loss": 0.0126, + "step": 26575 + }, + { + "epoch": 15.01, + "learning_rate": 3.395443037974684e-06, + "loss": 0.0116, + "step": 26600 + }, + { + "epoch": 15.01, + "learning_rate": 3.389113924050633e-06, + "loss": 0.0087, + "step": 26625 + }, + { + "epoch": 15.01, + "learning_rate": 3.3827848101265825e-06, + "loss": 0.0073, + "step": 26650 + }, + { + "epoch": 15.01, + "learning_rate": 3.376455696202532e-06, + "loss": 0.0102, + "step": 26675 + }, + { + "epoch": 15.01, + "learning_rate": 3.3701265822784814e-06, + "loss": 0.0106, + "step": 26700 + }, + { + "epoch": 15.01, + "learning_rate": 3.3637974683544304e-06, + "loss": 0.0094, + "step": 26725 + }, + { + "epoch": 15.01, + "learning_rate": 3.35746835443038e-06, + "loss": 0.0113, + "step": 26750 + }, + { + "epoch": 15.01, + "learning_rate": 3.3511392405063292e-06, + "loss": 0.0169, + "step": 26775 + }, + { + "epoch": 15.01, + "learning_rate": 3.3448101265822786e-06, + "loss": 0.0084, + "step": 26800 + }, + { + "epoch": 15.01, + "learning_rate": 3.338481012658228e-06, + "loss": 0.0153, + "step": 26825 + }, + { + "epoch": 15.01, + "learning_rate": 3.332151898734177e-06, + "loss": 0.012, + "step": 26850 + }, + { + "epoch": 15.02, + "learning_rate": 3.325822784810127e-06, + "loss": 0.0106, + "step": 26875 + }, + { + "epoch": 15.02, + "learning_rate": 3.3194936708860764e-06, + "loss": 0.0099, + "step": 26900 + }, + { + "epoch": 15.02, + "learning_rate": 3.3131645569620258e-06, + "loss": 0.008, + "step": 26925 + }, + { + "epoch": 15.02, + "learning_rate": 3.306835443037975e-06, + "loss": 0.0076, + "step": 26950 + }, + { + "epoch": 15.02, + "learning_rate": 3.3005063291139246e-06, + "loss": 0.0088, + "step": 26975 + }, + { + "epoch": 15.02, + "learning_rate": 3.2941772151898736e-06, + "loss": 0.009, + "step": 27000 + }, + { + "epoch": 15.02, + "eval_loss": 0.2693029046058655, + "eval_runtime": 1479.1374, + "eval_samples_per_second": 7.039, + "eval_steps_per_second": 0.44, + "eval_wer": 24.28819980812696, + "step": 27000 + }, + { + "epoch": 15.02, + "learning_rate": 3.287848101265823e-06, + "loss": 0.0115, + "step": 27025 + }, + { + "epoch": 15.02, + "learning_rate": 3.2815189873417725e-06, + "loss": 0.0099, + "step": 27050 + }, + { + "epoch": 15.02, + "learning_rate": 3.275189873417722e-06, + "loss": 0.0073, + "step": 27075 + }, + { + "epoch": 15.02, + "learning_rate": 3.268860759493671e-06, + "loss": 0.0061, + "step": 27100 + }, + { + "epoch": 15.02, + "learning_rate": 3.2625316455696204e-06, + "loss": 0.007, + "step": 27125 + }, + { + "epoch": 15.02, + "learning_rate": 3.2562025316455698e-06, + "loss": 0.0109, + "step": 27150 + }, + { + "epoch": 15.02, + "learning_rate": 3.2498734177215192e-06, + "loss": 0.0123, + "step": 27175 + }, + { + "epoch": 15.02, + "learning_rate": 3.2435443037974686e-06, + "loss": 0.0121, + "step": 27200 + }, + { + "epoch": 15.02, + "learning_rate": 3.2372151898734176e-06, + "loss": 0.0129, + "step": 27225 + }, + { + "epoch": 15.02, + "learning_rate": 3.230886075949367e-06, + "loss": 0.0128, + "step": 27250 + }, + { + "epoch": 15.03, + "learning_rate": 3.224556962025317e-06, + "loss": 0.0094, + "step": 27275 + }, + { + "epoch": 15.03, + "learning_rate": 3.2182278481012664e-06, + "loss": 0.0081, + "step": 27300 + }, + { + "epoch": 15.03, + "learning_rate": 3.2118987341772158e-06, + "loss": 0.0087, + "step": 27325 + }, + { + "epoch": 15.03, + "learning_rate": 3.2055696202531648e-06, + "loss": 0.0131, + "step": 27350 + }, + { + "epoch": 15.03, + "learning_rate": 3.199240506329114e-06, + "loss": 0.0073, + "step": 27375 + }, + { + "epoch": 15.03, + "learning_rate": 3.1929113924050636e-06, + "loss": 0.0086, + "step": 27400 + }, + { + "epoch": 15.03, + "learning_rate": 3.186582278481013e-06, + "loss": 0.0108, + "step": 27425 + }, + { + "epoch": 15.03, + "learning_rate": 3.1802531645569625e-06, + "loss": 0.01, + "step": 27450 + }, + { + "epoch": 15.03, + "learning_rate": 3.1739240506329115e-06, + "loss": 0.0123, + "step": 27475 + }, + { + "epoch": 15.03, + "learning_rate": 3.167594936708861e-06, + "loss": 0.0081, + "step": 27500 + }, + { + "epoch": 15.03, + "learning_rate": 3.1612658227848104e-06, + "loss": 0.0123, + "step": 27525 + }, + { + "epoch": 15.03, + "learning_rate": 3.1549367088607598e-06, + "loss": 0.0096, + "step": 27550 + }, + { + "epoch": 15.03, + "learning_rate": 3.1486075949367088e-06, + "loss": 0.0054, + "step": 27575 + }, + { + "epoch": 15.03, + "learning_rate": 3.1422784810126582e-06, + "loss": 0.006, + "step": 27600 + }, + { + "epoch": 15.03, + "learning_rate": 3.1359493670886076e-06, + "loss": 0.0089, + "step": 27625 + }, + { + "epoch": 15.03, + "learning_rate": 3.129620253164557e-06, + "loss": 0.0109, + "step": 27650 + }, + { + "epoch": 15.04, + "learning_rate": 3.1232911392405065e-06, + "loss": 0.0089, + "step": 27675 + }, + { + "epoch": 15.04, + "learning_rate": 3.1169620253164563e-06, + "loss": 0.0094, + "step": 27700 + }, + { + "epoch": 15.04, + "learning_rate": 3.1106329113924054e-06, + "loss": 0.0102, + "step": 27725 + }, + { + "epoch": 15.04, + "learning_rate": 3.1043037974683548e-06, + "loss": 0.0097, + "step": 27750 + }, + { + "epoch": 15.04, + "learning_rate": 3.097974683544304e-06, + "loss": 0.0078, + "step": 27775 + }, + { + "epoch": 15.04, + "learning_rate": 3.0916455696202536e-06, + "loss": 0.0073, + "step": 27800 + }, + { + "epoch": 15.04, + "learning_rate": 3.085316455696203e-06, + "loss": 0.0105, + "step": 27825 + }, + { + "epoch": 15.04, + "learning_rate": 3.078987341772152e-06, + "loss": 0.0083, + "step": 27850 + }, + { + "epoch": 15.04, + "learning_rate": 3.0726582278481015e-06, + "loss": 0.0138, + "step": 27875 + }, + { + "epoch": 15.04, + "learning_rate": 3.066329113924051e-06, + "loss": 0.0049, + "step": 27900 + }, + { + "epoch": 15.04, + "learning_rate": 3.0600000000000003e-06, + "loss": 0.0107, + "step": 27925 + }, + { + "epoch": 15.04, + "learning_rate": 3.0536708860759494e-06, + "loss": 0.0105, + "step": 27950 + }, + { + "epoch": 15.04, + "learning_rate": 3.0473417721518988e-06, + "loss": 0.0134, + "step": 27975 + }, + { + "epoch": 15.04, + "learning_rate": 3.041012658227848e-06, + "loss": 0.0137, + "step": 28000 + }, + { + "epoch": 15.04, + "eval_loss": 0.2780563235282898, + "eval_runtime": 1465.9542, + "eval_samples_per_second": 7.102, + "eval_steps_per_second": 0.444, + "eval_wer": 25.010162766874256, + "step": 28000 + }, + { + "epoch": 16.0, + "learning_rate": 3.0346835443037976e-06, + "loss": 0.009, + "step": 28025 + }, + { + "epoch": 16.0, + "learning_rate": 3.028354430379747e-06, + "loss": 0.0081, + "step": 28050 + }, + { + "epoch": 16.0, + "learning_rate": 3.022025316455696e-06, + "loss": 0.0109, + "step": 28075 + }, + { + "epoch": 16.0, + "learning_rate": 3.015696202531646e-06, + "loss": 0.0132, + "step": 28100 + }, + { + "epoch": 16.0, + "learning_rate": 3.0093670886075953e-06, + "loss": 0.0142, + "step": 28125 + }, + { + "epoch": 16.0, + "learning_rate": 3.0030379746835448e-06, + "loss": 0.0152, + "step": 28150 + }, + { + "epoch": 16.0, + "learning_rate": 2.996708860759494e-06, + "loss": 0.0074, + "step": 28175 + }, + { + "epoch": 16.0, + "learning_rate": 2.990379746835443e-06, + "loss": 0.0129, + "step": 28200 + }, + { + "epoch": 16.01, + "learning_rate": 2.9840506329113926e-06, + "loss": 0.0118, + "step": 28225 + }, + { + "epoch": 16.01, + "learning_rate": 2.977721518987342e-06, + "loss": 0.0153, + "step": 28250 + }, + { + "epoch": 16.01, + "learning_rate": 2.9713924050632915e-06, + "loss": 0.0121, + "step": 28275 + }, + { + "epoch": 16.01, + "learning_rate": 2.965063291139241e-06, + "loss": 0.0097, + "step": 28300 + }, + { + "epoch": 16.01, + "learning_rate": 2.95873417721519e-06, + "loss": 0.0083, + "step": 28325 + }, + { + "epoch": 16.01, + "learning_rate": 2.9524050632911393e-06, + "loss": 0.01, + "step": 28350 + }, + { + "epoch": 16.01, + "learning_rate": 2.9460759493670888e-06, + "loss": 0.0084, + "step": 28375 + }, + { + "epoch": 16.01, + "learning_rate": 2.939746835443038e-06, + "loss": 0.0124, + "step": 28400 + }, + { + "epoch": 16.01, + "learning_rate": 2.933417721518987e-06, + "loss": 0.0071, + "step": 28425 + }, + { + "epoch": 16.01, + "learning_rate": 2.9270886075949366e-06, + "loss": 0.0113, + "step": 28450 + }, + { + "epoch": 16.01, + "learning_rate": 2.920759493670886e-06, + "loss": 0.0085, + "step": 28475 + }, + { + "epoch": 16.01, + "learning_rate": 2.914430379746836e-06, + "loss": 0.0133, + "step": 28500 + }, + { + "epoch": 16.01, + "learning_rate": 2.9081012658227853e-06, + "loss": 0.014, + "step": 28525 + }, + { + "epoch": 16.01, + "learning_rate": 2.9017721518987348e-06, + "loss": 0.0105, + "step": 28550 + }, + { + "epoch": 16.01, + "learning_rate": 2.8954430379746838e-06, + "loss": 0.01, + "step": 28575 + }, + { + "epoch": 16.01, + "learning_rate": 2.889113924050633e-06, + "loss": 0.0135, + "step": 28600 + }, + { + "epoch": 16.02, + "learning_rate": 2.8827848101265826e-06, + "loss": 0.0118, + "step": 28625 + }, + { + "epoch": 16.02, + "learning_rate": 2.876455696202532e-06, + "loss": 0.0111, + "step": 28650 + }, + { + "epoch": 16.02, + "learning_rate": 2.8701265822784815e-06, + "loss": 0.008, + "step": 28675 + }, + { + "epoch": 16.02, + "learning_rate": 2.8637974683544305e-06, + "loss": 0.0103, + "step": 28700 + }, + { + "epoch": 16.02, + "learning_rate": 2.85746835443038e-06, + "loss": 0.0082, + "step": 28725 + }, + { + "epoch": 16.02, + "learning_rate": 2.8511392405063293e-06, + "loss": 0.0057, + "step": 28750 + }, + { + "epoch": 16.02, + "learning_rate": 2.8448101265822788e-06, + "loss": 0.0062, + "step": 28775 + }, + { + "epoch": 16.02, + "learning_rate": 2.8384810126582278e-06, + "loss": 0.0092, + "step": 28800 + }, + { + "epoch": 16.02, + "learning_rate": 2.832151898734177e-06, + "loss": 0.0084, + "step": 28825 + }, + { + "epoch": 16.02, + "learning_rate": 2.8258227848101266e-06, + "loss": 0.0099, + "step": 28850 + }, + { + "epoch": 16.02, + "learning_rate": 2.819493670886076e-06, + "loss": 0.0117, + "step": 28875 + }, + { + "epoch": 16.02, + "learning_rate": 2.8131645569620255e-06, + "loss": 0.0117, + "step": 28900 + }, + { + "epoch": 16.02, + "learning_rate": 2.8068354430379753e-06, + "loss": 0.0127, + "step": 28925 + }, + { + "epoch": 16.02, + "learning_rate": 2.8005063291139243e-06, + "loss": 0.0123, + "step": 28950 + }, + { + "epoch": 16.02, + "learning_rate": 2.7941772151898738e-06, + "loss": 0.0128, + "step": 28975 + }, + { + "epoch": 16.02, + "learning_rate": 2.787848101265823e-06, + "loss": 0.0073, + "step": 29000 + }, + { + "epoch": 16.02, + "eval_loss": 0.2685891091823578, + "eval_runtime": 1456.562, + "eval_samples_per_second": 7.148, + "eval_steps_per_second": 0.447, + "eval_wer": 23.278427291500677, + "step": 29000 + }, + { + "epoch": 16.03, + "learning_rate": 2.7815189873417726e-06, + "loss": 0.0068, + "step": 29025 + }, + { + "epoch": 16.03, + "learning_rate": 2.7751898734177216e-06, + "loss": 0.0087, + "step": 29050 + }, + { + "epoch": 16.03, + "learning_rate": 2.768860759493671e-06, + "loss": 0.0084, + "step": 29075 + }, + { + "epoch": 16.03, + "learning_rate": 2.7625316455696205e-06, + "loss": 0.0039, + "step": 29100 + }, + { + "epoch": 16.03, + "learning_rate": 2.75620253164557e-06, + "loss": 0.0066, + "step": 29125 + }, + { + "epoch": 16.03, + "learning_rate": 2.7498734177215193e-06, + "loss": 0.0108, + "step": 29150 + }, + { + "epoch": 16.03, + "learning_rate": 2.7435443037974683e-06, + "loss": 0.0052, + "step": 29175 + }, + { + "epoch": 16.03, + "learning_rate": 2.7372151898734178e-06, + "loss": 0.0061, + "step": 29200 + }, + { + "epoch": 16.03, + "learning_rate": 2.730886075949367e-06, + "loss": 0.0083, + "step": 29225 + }, + { + "epoch": 16.03, + "learning_rate": 2.7245569620253166e-06, + "loss": 0.0062, + "step": 29250 + }, + { + "epoch": 16.03, + "learning_rate": 2.7182278481012656e-06, + "loss": 0.0078, + "step": 29275 + }, + { + "epoch": 16.03, + "learning_rate": 2.711898734177215e-06, + "loss": 0.0127, + "step": 29300 + }, + { + "epoch": 16.03, + "learning_rate": 2.705569620253165e-06, + "loss": 0.0092, + "step": 29325 + }, + { + "epoch": 16.03, + "learning_rate": 2.6992405063291143e-06, + "loss": 0.0087, + "step": 29350 + }, + { + "epoch": 16.03, + "learning_rate": 2.6929113924050638e-06, + "loss": 0.0062, + "step": 29375 + }, + { + "epoch": 16.03, + "learning_rate": 2.686582278481013e-06, + "loss": 0.0065, + "step": 29400 + }, + { + "epoch": 16.04, + "learning_rate": 2.680253164556962e-06, + "loss": 0.0057, + "step": 29425 + }, + { + "epoch": 16.04, + "learning_rate": 2.6739240506329116e-06, + "loss": 0.0071, + "step": 29450 + }, + { + "epoch": 16.04, + "learning_rate": 2.667594936708861e-06, + "loss": 0.01, + "step": 29475 + }, + { + "epoch": 16.04, + "learning_rate": 2.6612658227848105e-06, + "loss": 0.009, + "step": 29500 + }, + { + "epoch": 16.04, + "learning_rate": 2.65493670886076e-06, + "loss": 0.0091, + "step": 29525 + }, + { + "epoch": 16.04, + "learning_rate": 2.648607594936709e-06, + "loss": 0.0104, + "step": 29550 + }, + { + "epoch": 16.04, + "learning_rate": 2.6422784810126583e-06, + "loss": 0.0071, + "step": 29575 + }, + { + "epoch": 16.04, + "learning_rate": 2.6359493670886078e-06, + "loss": 0.0081, + "step": 29600 + }, + { + "epoch": 16.04, + "learning_rate": 2.629620253164557e-06, + "loss": 0.008, + "step": 29625 + }, + { + "epoch": 16.04, + "learning_rate": 2.623291139240506e-06, + "loss": 0.0067, + "step": 29650 + }, + { + "epoch": 16.04, + "learning_rate": 2.6169620253164556e-06, + "loss": 0.0066, + "step": 29675 + }, + { + "epoch": 16.04, + "learning_rate": 2.610632911392405e-06, + "loss": 0.0121, + "step": 29700 + }, + { + "epoch": 16.04, + "learning_rate": 2.6043037974683545e-06, + "loss": 0.0075, + "step": 29725 + }, + { + "epoch": 16.04, + "learning_rate": 2.5979746835443043e-06, + "loss": 0.0111, + "step": 29750 + }, + { + "epoch": 17.0, + "learning_rate": 2.5916455696202538e-06, + "loss": 0.0095, + "step": 29775 + }, + { + "epoch": 17.0, + "learning_rate": 2.5853164556962028e-06, + "loss": 0.0077, + "step": 29800 + }, + { + "epoch": 17.0, + "learning_rate": 2.578987341772152e-06, + "loss": 0.0047, + "step": 29825 + }, + { + "epoch": 17.0, + "learning_rate": 2.5726582278481016e-06, + "loss": 0.0092, + "step": 29850 + }, + { + "epoch": 17.0, + "learning_rate": 2.566329113924051e-06, + "loss": 0.0191, + "step": 29875 + }, + { + "epoch": 17.0, + "learning_rate": 2.56e-06, + "loss": 0.0102, + "step": 29900 + }, + { + "epoch": 17.0, + "learning_rate": 2.5536708860759495e-06, + "loss": 0.0146, + "step": 29925 + }, + { + "epoch": 17.0, + "learning_rate": 2.547341772151899e-06, + "loss": 0.0096, + "step": 29950 + }, + { + "epoch": 17.01, + "learning_rate": 2.5410126582278483e-06, + "loss": 0.0159, + "step": 29975 + }, + { + "epoch": 17.01, + "learning_rate": 2.5346835443037978e-06, + "loss": 0.0094, + "step": 30000 + }, + { + "epoch": 17.01, + "eval_loss": 0.2636851966381073, + "eval_runtime": 1508.7196, + "eval_samples_per_second": 6.901, + "eval_steps_per_second": 0.431, + "eval_wer": 23.029642758418838, + "step": 30000 + }, + { + "epoch": 17.01, + "learning_rate": 2.5283544303797468e-06, + "loss": 0.0078, + "step": 30025 + }, + { + "epoch": 17.01, + "learning_rate": 2.522025316455696e-06, + "loss": 0.01, + "step": 30050 + }, + { + "epoch": 17.01, + "learning_rate": 2.5156962025316456e-06, + "loss": 0.0058, + "step": 30075 + }, + { + "epoch": 17.01, + "learning_rate": 2.509367088607595e-06, + "loss": 0.0092, + "step": 30100 + }, + { + "epoch": 17.01, + "learning_rate": 2.503037974683544e-06, + "loss": 0.0084, + "step": 30125 + }, + { + "epoch": 17.01, + "learning_rate": 2.496708860759494e-06, + "loss": 0.0069, + "step": 30150 + }, + { + "epoch": 17.01, + "learning_rate": 2.4903797468354433e-06, + "loss": 0.0059, + "step": 30175 + }, + { + "epoch": 17.01, + "learning_rate": 2.4840506329113923e-06, + "loss": 0.0097, + "step": 30200 + }, + { + "epoch": 17.01, + "learning_rate": 2.4777215189873418e-06, + "loss": 0.0126, + "step": 30225 + }, + { + "epoch": 17.01, + "learning_rate": 2.4713924050632916e-06, + "loss": 0.0096, + "step": 30250 + }, + { + "epoch": 17.01, + "learning_rate": 2.4650632911392406e-06, + "loss": 0.0094, + "step": 30275 + }, + { + "epoch": 17.01, + "learning_rate": 2.45873417721519e-06, + "loss": 0.0111, + "step": 30300 + }, + { + "epoch": 17.01, + "learning_rate": 2.4524050632911395e-06, + "loss": 0.0062, + "step": 30325 + }, + { + "epoch": 17.01, + "learning_rate": 2.446075949367089e-06, + "loss": 0.0078, + "step": 30350 + }, + { + "epoch": 17.02, + "learning_rate": 2.4397468354430383e-06, + "loss": 0.0043, + "step": 30375 + }, + { + "epoch": 17.02, + "learning_rate": 2.4336708860759494e-06, + "loss": 0.0102, + "step": 30400 + }, + { + "epoch": 17.02, + "learning_rate": 2.4273417721518988e-06, + "loss": 0.0101, + "step": 30425 + }, + { + "epoch": 17.02, + "learning_rate": 2.4210126582278482e-06, + "loss": 0.0066, + "step": 30450 + }, + { + "epoch": 17.02, + "learning_rate": 2.4146835443037976e-06, + "loss": 0.0078, + "step": 30475 + }, + { + "epoch": 17.02, + "learning_rate": 2.408354430379747e-06, + "loss": 0.0074, + "step": 30500 + }, + { + "epoch": 17.02, + "learning_rate": 2.4020253164556965e-06, + "loss": 0.0072, + "step": 30525 + }, + { + "epoch": 17.02, + "learning_rate": 2.395696202531646e-06, + "loss": 0.0109, + "step": 30550 + }, + { + "epoch": 17.02, + "learning_rate": 2.389367088607595e-06, + "loss": 0.0113, + "step": 30575 + }, + { + "epoch": 17.02, + "learning_rate": 2.3830379746835444e-06, + "loss": 0.0109, + "step": 30600 + }, + { + "epoch": 17.02, + "learning_rate": 2.3767088607594938e-06, + "loss": 0.0089, + "step": 30625 + }, + { + "epoch": 17.02, + "learning_rate": 2.3703797468354432e-06, + "loss": 0.0062, + "step": 30650 + }, + { + "epoch": 17.02, + "learning_rate": 2.3640506329113926e-06, + "loss": 0.0101, + "step": 30675 + }, + { + "epoch": 17.02, + "learning_rate": 2.357721518987342e-06, + "loss": 0.0096, + "step": 30700 + }, + { + "epoch": 17.02, + "learning_rate": 2.3513924050632915e-06, + "loss": 0.0093, + "step": 30725 + }, + { + "epoch": 17.02, + "learning_rate": 2.3450632911392405e-06, + "loss": 0.0113, + "step": 30750 + }, + { + "epoch": 17.03, + "learning_rate": 2.33873417721519e-06, + "loss": 0.0094, + "step": 30775 + }, + { + "epoch": 17.03, + "learning_rate": 2.3324050632911394e-06, + "loss": 0.0086, + "step": 30800 + }, + { + "epoch": 17.03, + "learning_rate": 2.3260759493670888e-06, + "loss": 0.0096, + "step": 30825 + }, + { + "epoch": 17.03, + "learning_rate": 2.319746835443038e-06, + "loss": 0.0086, + "step": 30850 + }, + { + "epoch": 17.03, + "learning_rate": 2.3134177215189876e-06, + "loss": 0.0064, + "step": 30875 + }, + { + "epoch": 17.03, + "learning_rate": 2.307088607594937e-06, + "loss": 0.0107, + "step": 30900 + }, + { + "epoch": 17.03, + "learning_rate": 2.3007594936708865e-06, + "loss": 0.0073, + "step": 30925 + }, + { + "epoch": 17.03, + "learning_rate": 2.2944303797468355e-06, + "loss": 0.005, + "step": 30950 + }, + { + "epoch": 17.03, + "learning_rate": 2.288101265822785e-06, + "loss": 0.0055, + "step": 30975 + }, + { + "epoch": 17.03, + "learning_rate": 2.2817721518987344e-06, + "loss": 0.006, + "step": 31000 + }, + { + "epoch": 17.03, + "eval_loss": 0.2709895372390747, + "eval_runtime": 1461.5544, + "eval_samples_per_second": 7.123, + "eval_steps_per_second": 0.445, + "eval_wer": 23.29468771849949, + "step": 31000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.430892990406656e+20, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-31000/training_args.bin b/checkpoint-31000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-31000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-32000/config.json b/checkpoint-32000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-32000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-32000/generation_config.json b/checkpoint-32000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-32000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-32000/optimizer.pt b/checkpoint-32000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bdee2acd13c852c49e61694241c2d883e8621e7d --- /dev/null +++ b/checkpoint-32000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50317e89b9d05321a72f4f7033b28074605dc98d2c97f3b6bae183ee71cfe7ae +size 1934161093 diff --git a/checkpoint-32000/preprocessor_config.json b/checkpoint-32000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-32000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-32000/pytorch_model.bin b/checkpoint-32000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..97a378383f97dbbde8c3871eeb572b2d6eda0918 --- /dev/null +++ b/checkpoint-32000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:225d0f76763e5d7dfce27ff5a82a1f0748c178e5f797d787aa1d990ef91a3489 +size 967102601 diff --git a/checkpoint-32000/rng_state.pth b/checkpoint-32000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3028553ad60cd2aceda83b466e5c57751f0b1329 --- /dev/null +++ b/checkpoint-32000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a6ae5df124172e08c7c0a2aff08539b9e705ae83bdff3f540c9ff8812f29482 +size 14575 diff --git a/checkpoint-32000/scaler.pt b/checkpoint-32000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..281138439703b0febda2ebf4a88656377ae71ba0 --- /dev/null +++ b/checkpoint-32000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e78a038f2aea69f1dd38dd93823473a69f6abcfb9ee8ac6ff06e0718faa7508 +size 557 diff --git a/checkpoint-32000/scheduler.pt b/checkpoint-32000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..319a383cd20b583f3a926af344a6ab287dd4227c --- /dev/null +++ b/checkpoint-32000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab48111e5affb700ba40331dcc106cd9d29125d5ecdfb4f31aa70e630d751720 +size 627 diff --git a/checkpoint-32000/trainer_state.json b/checkpoint-32000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8abd8448a76d84028567ee606c3149067a2a396f --- /dev/null +++ b/checkpoint-32000/trainer_state.json @@ -0,0 +1,7984 @@ +{ + "best_metric": 23.029642758418838, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-30000", + "epoch": 18.01205, + "global_step": 32000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + }, + { + "epoch": 7.02, + "learning_rate": 6.830632911392405e-06, + "loss": 0.0159, + "step": 13025 + }, + { + "epoch": 7.02, + "learning_rate": 6.824303797468354e-06, + "loss": 0.0186, + "step": 13050 + }, + { + "epoch": 7.02, + "learning_rate": 6.817974683544304e-06, + "loss": 0.0246, + "step": 13075 + }, + { + "epoch": 7.02, + "learning_rate": 6.811645569620253e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 7.02, + "learning_rate": 6.805316455696203e-06, + "loss": 0.0287, + "step": 13125 + }, + { + "epoch": 7.02, + "learning_rate": 6.798987341772152e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 7.02, + "learning_rate": 6.792658227848102e-06, + "loss": 0.035, + "step": 13175 + }, + { + "epoch": 7.02, + "learning_rate": 6.786329113924052e-06, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 7.02, + "learning_rate": 6.780000000000001e-06, + "loss": 0.0283, + "step": 13225 + }, + { + "epoch": 7.02, + "learning_rate": 6.773670886075951e-06, + "loss": 0.0203, + "step": 13250 + }, + { + "epoch": 7.03, + "learning_rate": 6.7673417721519e-06, + "loss": 0.0193, + "step": 13275 + }, + { + "epoch": 7.03, + "learning_rate": 6.7610126582278494e-06, + "loss": 0.0174, + "step": 13300 + }, + { + "epoch": 7.03, + "learning_rate": 6.754683544303798e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 7.03, + "learning_rate": 6.7483544303797474e-06, + "loss": 0.023, + "step": 13350 + }, + { + "epoch": 7.03, + "learning_rate": 6.742025316455697e-06, + "loss": 0.0266, + "step": 13375 + }, + { + "epoch": 7.03, + "learning_rate": 6.735696202531646e-06, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 7.03, + "learning_rate": 6.729367088607596e-06, + "loss": 0.0164, + "step": 13425 + }, + { + "epoch": 7.03, + "learning_rate": 6.723037974683545e-06, + "loss": 0.019, + "step": 13450 + }, + { + "epoch": 7.03, + "learning_rate": 6.716708860759495e-06, + "loss": 0.0209, + "step": 13475 + }, + { + "epoch": 7.03, + "learning_rate": 6.710379746835444e-06, + "loss": 0.0241, + "step": 13500 + }, + { + "epoch": 7.03, + "learning_rate": 6.7040506329113934e-06, + "loss": 0.0233, + "step": 13525 + }, + { + "epoch": 7.03, + "learning_rate": 6.697721518987342e-06, + "loss": 0.0204, + "step": 13550 + }, + { + "epoch": 7.03, + "learning_rate": 6.6913924050632914e-06, + "loss": 0.0131, + "step": 13575 + }, + { + "epoch": 7.03, + "learning_rate": 6.685063291139241e-06, + "loss": 0.0268, + "step": 13600 + }, + { + "epoch": 7.03, + "learning_rate": 6.67873417721519e-06, + "loss": 0.0156, + "step": 13625 + }, + { + "epoch": 7.03, + "learning_rate": 6.67240506329114e-06, + "loss": 0.0205, + "step": 13650 + }, + { + "epoch": 7.04, + "learning_rate": 6.666075949367089e-06, + "loss": 0.0217, + "step": 13675 + }, + { + "epoch": 7.04, + "learning_rate": 6.659746835443039e-06, + "loss": 0.0258, + "step": 13700 + }, + { + "epoch": 7.04, + "learning_rate": 6.653417721518988e-06, + "loss": 0.0211, + "step": 13725 + }, + { + "epoch": 7.04, + "learning_rate": 6.647088607594937e-06, + "loss": 0.0188, + "step": 13750 + }, + { + "epoch": 7.04, + "learning_rate": 6.640759493670886e-06, + "loss": 0.0234, + "step": 13775 + }, + { + "epoch": 7.04, + "learning_rate": 6.6344303797468355e-06, + "loss": 0.0202, + "step": 13800 + }, + { + "epoch": 7.04, + "learning_rate": 6.628101265822785e-06, + "loss": 0.0251, + "step": 13825 + }, + { + "epoch": 7.04, + "learning_rate": 6.621772151898734e-06, + "loss": 0.017, + "step": 13850 + }, + { + "epoch": 7.04, + "learning_rate": 6.615443037974684e-06, + "loss": 0.0254, + "step": 13875 + }, + { + "epoch": 7.04, + "learning_rate": 6.609113924050633e-06, + "loss": 0.0289, + "step": 13900 + }, + { + "epoch": 7.04, + "learning_rate": 6.602784810126583e-06, + "loss": 0.0238, + "step": 13925 + }, + { + "epoch": 7.04, + "learning_rate": 6.596455696202532e-06, + "loss": 0.0218, + "step": 13950 + }, + { + "epoch": 7.04, + "learning_rate": 6.590126582278481e-06, + "loss": 0.0289, + "step": 13975 + }, + { + "epoch": 7.04, + "learning_rate": 6.583797468354432e-06, + "loss": 0.0271, + "step": 14000 + }, + { + "epoch": 7.04, + "eval_loss": 0.26684999465942383, + "eval_runtime": 1448.0228, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "eval_wer": 42.464105107400115, + "step": 14000 + }, + { + "epoch": 8.0, + "learning_rate": 6.577468354430381e-06, + "loss": 0.0292, + "step": 14025 + }, + { + "epoch": 8.0, + "learning_rate": 6.57113924050633e-06, + "loss": 0.0192, + "step": 14050 + }, + { + "epoch": 8.0, + "learning_rate": 6.564810126582279e-06, + "loss": 0.027, + "step": 14075 + }, + { + "epoch": 8.0, + "learning_rate": 6.558481012658229e-06, + "loss": 0.03, + "step": 14100 + }, + { + "epoch": 8.0, + "learning_rate": 6.552151898734178e-06, + "loss": 0.0375, + "step": 14125 + }, + { + "epoch": 8.0, + "learning_rate": 6.5458227848101274e-06, + "loss": 0.0338, + "step": 14150 + }, + { + "epoch": 8.0, + "learning_rate": 6.539493670886077e-06, + "loss": 0.0305, + "step": 14175 + }, + { + "epoch": 8.0, + "learning_rate": 6.533164556962026e-06, + "loss": 0.0266, + "step": 14200 + }, + { + "epoch": 8.01, + "learning_rate": 6.526835443037976e-06, + "loss": 0.0378, + "step": 14225 + }, + { + "epoch": 8.01, + "learning_rate": 6.520506329113925e-06, + "loss": 0.0256, + "step": 14250 + }, + { + "epoch": 8.01, + "learning_rate": 6.514177215189874e-06, + "loss": 0.0203, + "step": 14275 + }, + { + "epoch": 8.01, + "learning_rate": 6.507848101265823e-06, + "loss": 0.0212, + "step": 14300 + }, + { + "epoch": 8.01, + "learning_rate": 6.501518987341773e-06, + "loss": 0.0204, + "step": 14325 + }, + { + "epoch": 8.01, + "learning_rate": 6.495189873417722e-06, + "loss": 0.0259, + "step": 14350 + }, + { + "epoch": 8.01, + "learning_rate": 6.4888607594936714e-06, + "loss": 0.0214, + "step": 14375 + }, + { + "epoch": 8.01, + "learning_rate": 6.482531645569621e-06, + "loss": 0.021, + "step": 14400 + }, + { + "epoch": 8.01, + "learning_rate": 6.47620253164557e-06, + "loss": 0.0241, + "step": 14425 + }, + { + "epoch": 8.01, + "learning_rate": 6.46987341772152e-06, + "loss": 0.0216, + "step": 14450 + }, + { + "epoch": 8.01, + "learning_rate": 6.463544303797469e-06, + "loss": 0.0209, + "step": 14475 + }, + { + "epoch": 8.01, + "learning_rate": 6.457215189873418e-06, + "loss": 0.0292, + "step": 14500 + }, + { + "epoch": 8.01, + "learning_rate": 6.450886075949367e-06, + "loss": 0.0286, + "step": 14525 + }, + { + "epoch": 8.01, + "learning_rate": 6.444556962025317e-06, + "loss": 0.0218, + "step": 14550 + }, + { + "epoch": 8.01, + "learning_rate": 6.438227848101266e-06, + "loss": 0.0209, + "step": 14575 + }, + { + "epoch": 8.01, + "learning_rate": 6.4318987341772154e-06, + "loss": 0.0244, + "step": 14600 + }, + { + "epoch": 8.02, + "learning_rate": 6.425569620253165e-06, + "loss": 0.0212, + "step": 14625 + }, + { + "epoch": 8.02, + "learning_rate": 6.419240506329114e-06, + "loss": 0.0155, + "step": 14650 + }, + { + "epoch": 8.02, + "learning_rate": 6.412911392405064e-06, + "loss": 0.0209, + "step": 14675 + }, + { + "epoch": 8.02, + "learning_rate": 6.406582278481013e-06, + "loss": 0.0203, + "step": 14700 + }, + { + "epoch": 8.02, + "learning_rate": 6.400253164556962e-06, + "loss": 0.0192, + "step": 14725 + }, + { + "epoch": 8.02, + "learning_rate": 6.393924050632911e-06, + "loss": 0.0216, + "step": 14750 + }, + { + "epoch": 8.02, + "learning_rate": 6.387594936708861e-06, + "loss": 0.012, + "step": 14775 + }, + { + "epoch": 8.02, + "learning_rate": 6.38126582278481e-06, + "loss": 0.0241, + "step": 14800 + }, + { + "epoch": 8.02, + "learning_rate": 6.37493670886076e-06, + "loss": 0.0192, + "step": 14825 + }, + { + "epoch": 8.02, + "learning_rate": 6.36860759493671e-06, + "loss": 0.0202, + "step": 14850 + }, + { + "epoch": 8.02, + "learning_rate": 6.362278481012659e-06, + "loss": 0.0257, + "step": 14875 + }, + { + "epoch": 8.02, + "learning_rate": 6.3559493670886086e-06, + "loss": 0.0218, + "step": 14900 + }, + { + "epoch": 8.02, + "learning_rate": 6.349620253164558e-06, + "loss": 0.0232, + "step": 14925 + }, + { + "epoch": 8.02, + "learning_rate": 6.3432911392405074e-06, + "loss": 0.03, + "step": 14950 + }, + { + "epoch": 8.02, + "learning_rate": 6.337215189873418e-06, + "loss": 0.0252, + "step": 14975 + }, + { + "epoch": 8.02, + "learning_rate": 6.3308860759493675e-06, + "loss": 0.0176, + "step": 15000 + }, + { + "epoch": 8.02, + "eval_loss": 0.2596448063850403, + "eval_runtime": 1417.6481, + "eval_samples_per_second": 7.344, + "eval_steps_per_second": 0.459, + "eval_wer": 28.52241499861786, + "step": 15000 + }, + { + "epoch": 8.03, + "learning_rate": 6.324556962025317e-06, + "loss": 0.0166, + "step": 15025 + }, + { + "epoch": 8.03, + "learning_rate": 6.318227848101266e-06, + "loss": 0.017, + "step": 15050 + }, + { + "epoch": 8.03, + "learning_rate": 6.311898734177216e-06, + "loss": 0.0291, + "step": 15075 + }, + { + "epoch": 8.03, + "learning_rate": 6.305569620253164e-06, + "loss": 0.0149, + "step": 15100 + }, + { + "epoch": 8.03, + "learning_rate": 6.299240506329114e-06, + "loss": 0.0223, + "step": 15125 + }, + { + "epoch": 8.03, + "learning_rate": 6.292911392405063e-06, + "loss": 0.0212, + "step": 15150 + }, + { + "epoch": 8.03, + "learning_rate": 6.286582278481013e-06, + "loss": 0.0125, + "step": 15175 + }, + { + "epoch": 8.03, + "learning_rate": 6.280253164556962e-06, + "loss": 0.0169, + "step": 15200 + }, + { + "epoch": 8.03, + "learning_rate": 6.273924050632912e-06, + "loss": 0.0244, + "step": 15225 + }, + { + "epoch": 8.03, + "learning_rate": 6.267594936708862e-06, + "loss": 0.015, + "step": 15250 + }, + { + "epoch": 8.03, + "learning_rate": 6.261265822784811e-06, + "loss": 0.0171, + "step": 15275 + }, + { + "epoch": 8.03, + "learning_rate": 6.254936708860761e-06, + "loss": 0.017, + "step": 15300 + }, + { + "epoch": 8.03, + "learning_rate": 6.24860759493671e-06, + "loss": 0.0162, + "step": 15325 + }, + { + "epoch": 8.03, + "learning_rate": 6.2422784810126594e-06, + "loss": 0.0133, + "step": 15350 + }, + { + "epoch": 8.03, + "learning_rate": 6.235949367088608e-06, + "loss": 0.0139, + "step": 15375 + }, + { + "epoch": 8.03, + "learning_rate": 6.2296202531645575e-06, + "loss": 0.0177, + "step": 15400 + }, + { + "epoch": 8.04, + "learning_rate": 6.223291139240507e-06, + "loss": 0.0207, + "step": 15425 + }, + { + "epoch": 8.04, + "learning_rate": 6.216962025316456e-06, + "loss": 0.0192, + "step": 15450 + }, + { + "epoch": 8.04, + "learning_rate": 6.210632911392406e-06, + "loss": 0.0176, + "step": 15475 + }, + { + "epoch": 8.04, + "learning_rate": 6.204303797468355e-06, + "loss": 0.0133, + "step": 15500 + }, + { + "epoch": 8.04, + "learning_rate": 6.197974683544305e-06, + "loss": 0.0199, + "step": 15525 + }, + { + "epoch": 8.04, + "learning_rate": 6.191645569620254e-06, + "loss": 0.0135, + "step": 15550 + }, + { + "epoch": 8.04, + "learning_rate": 6.1853164556962035e-06, + "loss": 0.0121, + "step": 15575 + }, + { + "epoch": 8.04, + "learning_rate": 6.178987341772152e-06, + "loss": 0.0152, + "step": 15600 + }, + { + "epoch": 8.04, + "learning_rate": 6.1726582278481015e-06, + "loss": 0.0201, + "step": 15625 + }, + { + "epoch": 8.04, + "learning_rate": 6.166329113924051e-06, + "loss": 0.0226, + "step": 15650 + }, + { + "epoch": 8.04, + "learning_rate": 6.16e-06, + "loss": 0.0222, + "step": 15675 + }, + { + "epoch": 8.04, + "learning_rate": 6.15367088607595e-06, + "loss": 0.0184, + "step": 15700 + }, + { + "epoch": 8.04, + "learning_rate": 6.147341772151899e-06, + "loss": 0.0219, + "step": 15725 + }, + { + "epoch": 8.04, + "learning_rate": 6.141012658227849e-06, + "loss": 0.0219, + "step": 15750 + }, + { + "epoch": 9.0, + "learning_rate": 6.134683544303798e-06, + "loss": 0.0208, + "step": 15775 + }, + { + "epoch": 9.0, + "learning_rate": 6.1283544303797475e-06, + "loss": 0.0143, + "step": 15800 + }, + { + "epoch": 9.0, + "learning_rate": 6.122025316455696e-06, + "loss": 0.0254, + "step": 15825 + }, + { + "epoch": 9.0, + "learning_rate": 6.1156962025316455e-06, + "loss": 0.0287, + "step": 15850 + }, + { + "epoch": 9.0, + "learning_rate": 6.109367088607595e-06, + "loss": 0.0252, + "step": 15875 + }, + { + "epoch": 9.0, + "learning_rate": 6.103037974683544e-06, + "loss": 0.0275, + "step": 15900 + }, + { + "epoch": 9.0, + "learning_rate": 6.096708860759494e-06, + "loss": 0.0193, + "step": 15925 + }, + { + "epoch": 9.0, + "learning_rate": 6.090379746835443e-06, + "loss": 0.0255, + "step": 15950 + }, + { + "epoch": 9.01, + "learning_rate": 6.084050632911393e-06, + "loss": 0.0248, + "step": 15975 + }, + { + "epoch": 9.01, + "learning_rate": 6.077721518987342e-06, + "loss": 0.0328, + "step": 16000 + }, + { + "epoch": 9.01, + "eval_loss": 0.2512108087539673, + "eval_runtime": 1474.2883, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.442, + "eval_wer": 35.66236849379665, + "step": 16000 + }, + { + "epoch": 9.01, + "learning_rate": 6.0713924050632915e-06, + "loss": 0.0181, + "step": 16025 + }, + { + "epoch": 9.01, + "learning_rate": 6.065063291139242e-06, + "loss": 0.0134, + "step": 16050 + }, + { + "epoch": 9.01, + "learning_rate": 6.058734177215191e-06, + "loss": 0.0203, + "step": 16075 + }, + { + "epoch": 9.01, + "learning_rate": 6.052405063291141e-06, + "loss": 0.0224, + "step": 16100 + }, + { + "epoch": 9.01, + "learning_rate": 6.046075949367089e-06, + "loss": 0.0164, + "step": 16125 + }, + { + "epoch": 9.01, + "learning_rate": 6.039746835443039e-06, + "loss": 0.019, + "step": 16150 + }, + { + "epoch": 9.01, + "learning_rate": 6.033417721518988e-06, + "loss": 0.0152, + "step": 16175 + }, + { + "epoch": 9.01, + "learning_rate": 6.0270886075949374e-06, + "loss": 0.0197, + "step": 16200 + }, + { + "epoch": 9.01, + "learning_rate": 6.020759493670887e-06, + "loss": 0.0188, + "step": 16225 + }, + { + "epoch": 9.01, + "learning_rate": 6.014430379746836e-06, + "loss": 0.0245, + "step": 16250 + }, + { + "epoch": 9.01, + "learning_rate": 6.008101265822786e-06, + "loss": 0.0202, + "step": 16275 + }, + { + "epoch": 9.01, + "learning_rate": 6.001772151898735e-06, + "loss": 0.0184, + "step": 16300 + }, + { + "epoch": 9.01, + "learning_rate": 5.995443037974685e-06, + "loss": 0.0158, + "step": 16325 + }, + { + "epoch": 9.01, + "learning_rate": 5.989113924050633e-06, + "loss": 0.0175, + "step": 16350 + }, + { + "epoch": 9.02, + "learning_rate": 5.982784810126583e-06, + "loss": 0.014, + "step": 16375 + }, + { + "epoch": 9.02, + "learning_rate": 5.976455696202532e-06, + "loss": 0.0149, + "step": 16400 + }, + { + "epoch": 9.02, + "learning_rate": 5.9701265822784815e-06, + "loss": 0.0188, + "step": 16425 + }, + { + "epoch": 9.02, + "learning_rate": 5.963797468354431e-06, + "loss": 0.0215, + "step": 16450 + }, + { + "epoch": 9.02, + "learning_rate": 5.95746835443038e-06, + "loss": 0.017, + "step": 16475 + }, + { + "epoch": 9.02, + "learning_rate": 5.95113924050633e-06, + "loss": 0.0095, + "step": 16500 + }, + { + "epoch": 9.02, + "learning_rate": 5.944810126582279e-06, + "loss": 0.0112, + "step": 16525 + }, + { + "epoch": 9.02, + "learning_rate": 5.938481012658229e-06, + "loss": 0.0198, + "step": 16550 + }, + { + "epoch": 9.02, + "learning_rate": 5.932151898734177e-06, + "loss": 0.0252, + "step": 16575 + }, + { + "epoch": 9.02, + "learning_rate": 5.925822784810127e-06, + "loss": 0.0226, + "step": 16600 + }, + { + "epoch": 9.02, + "learning_rate": 5.919493670886076e-06, + "loss": 0.0167, + "step": 16625 + }, + { + "epoch": 9.02, + "learning_rate": 5.9131645569620255e-06, + "loss": 0.0259, + "step": 16650 + }, + { + "epoch": 9.02, + "learning_rate": 5.906835443037975e-06, + "loss": 0.0229, + "step": 16675 + }, + { + "epoch": 9.02, + "learning_rate": 5.900506329113924e-06, + "loss": 0.0238, + "step": 16700 + }, + { + "epoch": 9.02, + "learning_rate": 5.894177215189874e-06, + "loss": 0.02, + "step": 16725 + }, + { + "epoch": 9.02, + "learning_rate": 5.887848101265823e-06, + "loss": 0.0182, + "step": 16750 + }, + { + "epoch": 9.03, + "learning_rate": 5.881518987341773e-06, + "loss": 0.0157, + "step": 16775 + }, + { + "epoch": 9.03, + "learning_rate": 5.875189873417721e-06, + "loss": 0.0145, + "step": 16800 + }, + { + "epoch": 9.03, + "learning_rate": 5.868860759493671e-06, + "loss": 0.0166, + "step": 16825 + }, + { + "epoch": 9.03, + "learning_rate": 5.86253164556962e-06, + "loss": 0.0108, + "step": 16850 + }, + { + "epoch": 9.03, + "learning_rate": 5.85620253164557e-06, + "loss": 0.014, + "step": 16875 + }, + { + "epoch": 9.03, + "learning_rate": 5.84987341772152e-06, + "loss": 0.0167, + "step": 16900 + }, + { + "epoch": 9.03, + "learning_rate": 5.843544303797469e-06, + "loss": 0.0121, + "step": 16925 + }, + { + "epoch": 9.03, + "learning_rate": 5.837215189873419e-06, + "loss": 0.0149, + "step": 16950 + }, + { + "epoch": 9.03, + "learning_rate": 5.831139240506329e-06, + "loss": 0.0143, + "step": 16975 + }, + { + "epoch": 9.03, + "learning_rate": 5.824810126582279e-06, + "loss": 0.0139, + "step": 17000 + }, + { + "epoch": 9.03, + "eval_loss": 0.2688957154750824, + "eval_runtime": 1423.1196, + "eval_samples_per_second": 7.316, + "eval_steps_per_second": 0.457, + "eval_wer": 25.920746678807788, + "step": 17000 + }, + { + "epoch": 9.03, + "learning_rate": 5.818481012658228e-06, + "loss": 0.0169, + "step": 17025 + }, + { + "epoch": 9.03, + "learning_rate": 5.8121518987341775e-06, + "loss": 0.013, + "step": 17050 + }, + { + "epoch": 9.03, + "learning_rate": 5.805822784810127e-06, + "loss": 0.0123, + "step": 17075 + }, + { + "epoch": 9.03, + "learning_rate": 5.799493670886076e-06, + "loss": 0.0161, + "step": 17100 + }, + { + "epoch": 9.03, + "learning_rate": 5.793164556962026e-06, + "loss": 0.0114, + "step": 17125 + }, + { + "epoch": 9.03, + "learning_rate": 5.786835443037974e-06, + "loss": 0.0194, + "step": 17150 + }, + { + "epoch": 9.04, + "learning_rate": 5.780506329113924e-06, + "loss": 0.0179, + "step": 17175 + }, + { + "epoch": 9.04, + "learning_rate": 5.774177215189873e-06, + "loss": 0.0165, + "step": 17200 + }, + { + "epoch": 9.04, + "learning_rate": 5.767848101265823e-06, + "loss": 0.0165, + "step": 17225 + }, + { + "epoch": 9.04, + "learning_rate": 5.761518987341773e-06, + "loss": 0.0125, + "step": 17250 + }, + { + "epoch": 9.04, + "learning_rate": 5.755189873417722e-06, + "loss": 0.0172, + "step": 17275 + }, + { + "epoch": 9.04, + "learning_rate": 5.748860759493672e-06, + "loss": 0.0158, + "step": 17300 + }, + { + "epoch": 9.04, + "learning_rate": 5.742531645569621e-06, + "loss": 0.0174, + "step": 17325 + }, + { + "epoch": 9.04, + "learning_rate": 5.736202531645571e-06, + "loss": 0.0162, + "step": 17350 + }, + { + "epoch": 9.04, + "learning_rate": 5.72987341772152e-06, + "loss": 0.0203, + "step": 17375 + }, + { + "epoch": 9.04, + "learning_rate": 5.7235443037974695e-06, + "loss": 0.0157, + "step": 17400 + }, + { + "epoch": 9.04, + "learning_rate": 5.717215189873419e-06, + "loss": 0.018, + "step": 17425 + }, + { + "epoch": 9.04, + "learning_rate": 5.7108860759493675e-06, + "loss": 0.0199, + "step": 17450 + }, + { + "epoch": 9.04, + "learning_rate": 5.704556962025317e-06, + "loss": 0.0153, + "step": 17475 + }, + { + "epoch": 9.04, + "learning_rate": 5.698227848101266e-06, + "loss": 0.0234, + "step": 17500 + }, + { + "epoch": 10.0, + "learning_rate": 5.691898734177216e-06, + "loss": 0.0168, + "step": 17525 + }, + { + "epoch": 10.0, + "learning_rate": 5.685569620253165e-06, + "loss": 0.0214, + "step": 17550 + }, + { + "epoch": 10.0, + "learning_rate": 5.679240506329115e-06, + "loss": 0.0185, + "step": 17575 + }, + { + "epoch": 10.0, + "learning_rate": 5.672911392405064e-06, + "loss": 0.0228, + "step": 17600 + }, + { + "epoch": 10.0, + "learning_rate": 5.6665822784810135e-06, + "loss": 0.024, + "step": 17625 + }, + { + "epoch": 10.0, + "learning_rate": 5.660253164556963e-06, + "loss": 0.0235, + "step": 17650 + }, + { + "epoch": 10.0, + "learning_rate": 5.6539240506329115e-06, + "loss": 0.0262, + "step": 17675 + }, + { + "epoch": 10.0, + "learning_rate": 5.647594936708861e-06, + "loss": 0.0214, + "step": 17700 + }, + { + "epoch": 10.01, + "learning_rate": 5.64126582278481e-06, + "loss": 0.0279, + "step": 17725 + }, + { + "epoch": 10.01, + "learning_rate": 5.63493670886076e-06, + "loss": 0.0248, + "step": 17750 + }, + { + "epoch": 10.01, + "learning_rate": 5.628607594936709e-06, + "loss": 0.0155, + "step": 17775 + }, + { + "epoch": 10.01, + "learning_rate": 5.622278481012659e-06, + "loss": 0.017, + "step": 17800 + }, + { + "epoch": 10.01, + "learning_rate": 5.615949367088608e-06, + "loss": 0.0205, + "step": 17825 + }, + { + "epoch": 10.01, + "learning_rate": 5.6096202531645575e-06, + "loss": 0.021, + "step": 17850 + }, + { + "epoch": 10.01, + "learning_rate": 5.603291139240507e-06, + "loss": 0.0178, + "step": 17875 + }, + { + "epoch": 10.01, + "learning_rate": 5.5969620253164555e-06, + "loss": 0.0169, + "step": 17900 + }, + { + "epoch": 10.01, + "learning_rate": 5.590632911392405e-06, + "loss": 0.0158, + "step": 17925 + }, + { + "epoch": 10.01, + "learning_rate": 5.584303797468354e-06, + "loss": 0.0195, + "step": 17950 + }, + { + "epoch": 10.01, + "learning_rate": 5.577974683544304e-06, + "loss": 0.0195, + "step": 17975 + }, + { + "epoch": 10.01, + "learning_rate": 5.571645569620253e-06, + "loss": 0.0187, + "step": 18000 + }, + { + "epoch": 10.01, + "eval_loss": 0.2605174481868744, + "eval_runtime": 1553.7854, + "eval_samples_per_second": 6.7, + "eval_steps_per_second": 0.419, + "eval_wer": 25.330493178750874, + "step": 18000 + }, + { + "epoch": 10.01, + "learning_rate": 5.565316455696203e-06, + "loss": 0.0214, + "step": 18025 + }, + { + "epoch": 10.01, + "learning_rate": 5.558987341772152e-06, + "loss": 0.0193, + "step": 18050 + }, + { + "epoch": 10.01, + "learning_rate": 5.552658227848102e-06, + "loss": 0.0168, + "step": 18075 + }, + { + "epoch": 10.01, + "learning_rate": 5.546329113924052e-06, + "loss": 0.0147, + "step": 18100 + }, + { + "epoch": 10.02, + "learning_rate": 5.540000000000001e-06, + "loss": 0.0192, + "step": 18125 + }, + { + "epoch": 10.02, + "learning_rate": 5.533670886075951e-06, + "loss": 0.014, + "step": 18150 + }, + { + "epoch": 10.02, + "learning_rate": 5.527341772151899e-06, + "loss": 0.0163, + "step": 18175 + }, + { + "epoch": 10.02, + "learning_rate": 5.521012658227849e-06, + "loss": 0.0139, + "step": 18200 + }, + { + "epoch": 10.02, + "learning_rate": 5.514683544303798e-06, + "loss": 0.0168, + "step": 18225 + }, + { + "epoch": 10.02, + "learning_rate": 5.5083544303797475e-06, + "loss": 0.0118, + "step": 18250 + }, + { + "epoch": 10.02, + "learning_rate": 5.502025316455697e-06, + "loss": 0.0119, + "step": 18275 + }, + { + "epoch": 10.02, + "learning_rate": 5.495696202531646e-06, + "loss": 0.0136, + "step": 18300 + }, + { + "epoch": 10.02, + "learning_rate": 5.489367088607596e-06, + "loss": 0.0132, + "step": 18325 + }, + { + "epoch": 10.02, + "learning_rate": 5.483037974683545e-06, + "loss": 0.0148, + "step": 18350 + }, + { + "epoch": 10.02, + "learning_rate": 5.476708860759495e-06, + "loss": 0.015, + "step": 18375 + }, + { + "epoch": 10.02, + "learning_rate": 5.470379746835443e-06, + "loss": 0.0154, + "step": 18400 + }, + { + "epoch": 10.02, + "learning_rate": 5.464050632911393e-06, + "loss": 0.0139, + "step": 18425 + }, + { + "epoch": 10.02, + "learning_rate": 5.457721518987342e-06, + "loss": 0.0239, + "step": 18450 + }, + { + "epoch": 10.02, + "learning_rate": 5.4513924050632915e-06, + "loss": 0.0203, + "step": 18475 + }, + { + "epoch": 10.02, + "learning_rate": 5.445063291139241e-06, + "loss": 0.0199, + "step": 18500 + }, + { + "epoch": 10.03, + "learning_rate": 5.43873417721519e-06, + "loss": 0.013, + "step": 18525 + }, + { + "epoch": 10.03, + "learning_rate": 5.43240506329114e-06, + "loss": 0.0128, + "step": 18550 + }, + { + "epoch": 10.03, + "learning_rate": 5.426075949367089e-06, + "loss": 0.0161, + "step": 18575 + }, + { + "epoch": 10.03, + "learning_rate": 5.419746835443039e-06, + "loss": 0.0129, + "step": 18600 + }, + { + "epoch": 10.03, + "learning_rate": 5.413417721518987e-06, + "loss": 0.0124, + "step": 18625 + }, + { + "epoch": 10.03, + "learning_rate": 5.407088607594937e-06, + "loss": 0.0161, + "step": 18650 + }, + { + "epoch": 10.03, + "learning_rate": 5.400759493670886e-06, + "loss": 0.0104, + "step": 18675 + }, + { + "epoch": 10.03, + "learning_rate": 5.3944303797468355e-06, + "loss": 0.0109, + "step": 18700 + }, + { + "epoch": 10.03, + "learning_rate": 5.388101265822785e-06, + "loss": 0.0128, + "step": 18725 + }, + { + "epoch": 10.03, + "learning_rate": 5.381772151898734e-06, + "loss": 0.014, + "step": 18750 + }, + { + "epoch": 10.03, + "learning_rate": 5.375443037974684e-06, + "loss": 0.0159, + "step": 18775 + }, + { + "epoch": 10.03, + "learning_rate": 5.369113924050633e-06, + "loss": 0.017, + "step": 18800 + }, + { + "epoch": 10.03, + "learning_rate": 5.362784810126583e-06, + "loss": 0.012, + "step": 18825 + }, + { + "epoch": 10.03, + "learning_rate": 5.356455696202531e-06, + "loss": 0.0165, + "step": 18850 + }, + { + "epoch": 10.03, + "learning_rate": 5.350126582278481e-06, + "loss": 0.0153, + "step": 18875 + }, + { + "epoch": 10.03, + "learning_rate": 5.343797468354432e-06, + "loss": 0.0182, + "step": 18900 + }, + { + "epoch": 10.04, + "learning_rate": 5.33746835443038e-06, + "loss": 0.0194, + "step": 18925 + }, + { + "epoch": 10.04, + "learning_rate": 5.33113924050633e-06, + "loss": 0.0144, + "step": 18950 + }, + { + "epoch": 10.04, + "learning_rate": 5.324810126582279e-06, + "loss": 0.0157, + "step": 18975 + }, + { + "epoch": 10.04, + "learning_rate": 5.318481012658229e-06, + "loss": 0.0111, + "step": 19000 + }, + { + "epoch": 10.04, + "eval_loss": 0.2696678638458252, + "eval_runtime": 1463.5986, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.445, + "eval_wer": 24.79877721588969, + "step": 19000 + }, + { + "epoch": 10.04, + "learning_rate": 5.312151898734178e-06, + "loss": 0.0176, + "step": 19025 + }, + { + "epoch": 10.04, + "learning_rate": 5.3058227848101275e-06, + "loss": 0.0144, + "step": 19050 + }, + { + "epoch": 10.04, + "learning_rate": 5.299493670886077e-06, + "loss": 0.0129, + "step": 19075 + }, + { + "epoch": 10.04, + "learning_rate": 5.293164556962026e-06, + "loss": 0.0162, + "step": 19100 + }, + { + "epoch": 10.04, + "learning_rate": 5.286835443037976e-06, + "loss": 0.0128, + "step": 19125 + }, + { + "epoch": 10.04, + "learning_rate": 5.280506329113924e-06, + "loss": 0.0152, + "step": 19150 + }, + { + "epoch": 10.04, + "learning_rate": 5.274177215189874e-06, + "loss": 0.0183, + "step": 19175 + }, + { + "epoch": 10.04, + "learning_rate": 5.267848101265823e-06, + "loss": 0.0198, + "step": 19200 + }, + { + "epoch": 10.04, + "learning_rate": 5.261518987341773e-06, + "loss": 0.0179, + "step": 19225 + }, + { + "epoch": 10.04, + "learning_rate": 5.255443037974683e-06, + "loss": 0.019, + "step": 19250 + }, + { + "epoch": 11.0, + "learning_rate": 5.249113924050633e-06, + "loss": 0.0159, + "step": 19275 + }, + { + "epoch": 11.0, + "learning_rate": 5.242784810126583e-06, + "loss": 0.0156, + "step": 19300 + }, + { + "epoch": 11.0, + "learning_rate": 5.236455696202532e-06, + "loss": 0.015, + "step": 19325 + }, + { + "epoch": 11.0, + "learning_rate": 5.230126582278482e-06, + "loss": 0.0162, + "step": 19350 + }, + { + "epoch": 11.0, + "learning_rate": 5.223797468354431e-06, + "loss": 0.0262, + "step": 19375 + }, + { + "epoch": 11.0, + "learning_rate": 5.217468354430381e-06, + "loss": 0.0207, + "step": 19400 + }, + { + "epoch": 11.0, + "learning_rate": 5.21113924050633e-06, + "loss": 0.0222, + "step": 19425 + }, + { + "epoch": 11.0, + "learning_rate": 5.2048101265822795e-06, + "loss": 0.016, + "step": 19450 + }, + { + "epoch": 11.01, + "learning_rate": 5.198481012658229e-06, + "loss": 0.0176, + "step": 19475 + }, + { + "epoch": 11.01, + "learning_rate": 5.192151898734178e-06, + "loss": 0.0209, + "step": 19500 + }, + { + "epoch": 11.01, + "learning_rate": 5.185822784810127e-06, + "loss": 0.0212, + "step": 19525 + }, + { + "epoch": 11.01, + "learning_rate": 5.179493670886076e-06, + "loss": 0.0143, + "step": 19550 + }, + { + "epoch": 11.01, + "learning_rate": 5.173164556962026e-06, + "loss": 0.0224, + "step": 19575 + }, + { + "epoch": 11.01, + "learning_rate": 5.166835443037975e-06, + "loss": 0.0171, + "step": 19600 + }, + { + "epoch": 11.01, + "learning_rate": 5.160506329113925e-06, + "loss": 0.0166, + "step": 19625 + }, + { + "epoch": 11.01, + "learning_rate": 5.154177215189874e-06, + "loss": 0.0135, + "step": 19650 + }, + { + "epoch": 11.01, + "learning_rate": 5.1478481012658235e-06, + "loss": 0.0164, + "step": 19675 + }, + { + "epoch": 11.01, + "learning_rate": 5.141518987341773e-06, + "loss": 0.0154, + "step": 19700 + }, + { + "epoch": 11.01, + "learning_rate": 5.135189873417722e-06, + "loss": 0.0183, + "step": 19725 + }, + { + "epoch": 11.01, + "learning_rate": 5.128860759493671e-06, + "loss": 0.0149, + "step": 19750 + }, + { + "epoch": 11.01, + "learning_rate": 5.12253164556962e-06, + "loss": 0.0176, + "step": 19775 + }, + { + "epoch": 11.01, + "learning_rate": 5.11620253164557e-06, + "loss": 0.0135, + "step": 19800 + }, + { + "epoch": 11.01, + "learning_rate": 5.109873417721519e-06, + "loss": 0.0121, + "step": 19825 + }, + { + "epoch": 11.01, + "learning_rate": 5.103544303797469e-06, + "loss": 0.0158, + "step": 19850 + }, + { + "epoch": 11.02, + "learning_rate": 5.097215189873418e-06, + "loss": 0.0191, + "step": 19875 + }, + { + "epoch": 11.02, + "learning_rate": 5.0908860759493675e-06, + "loss": 0.0127, + "step": 19900 + }, + { + "epoch": 11.02, + "learning_rate": 5.084556962025317e-06, + "loss": 0.0149, + "step": 19925 + }, + { + "epoch": 11.02, + "learning_rate": 5.0782278481012655e-06, + "loss": 0.0152, + "step": 19950 + }, + { + "epoch": 11.02, + "learning_rate": 5.071898734177215e-06, + "loss": 0.0139, + "step": 19975 + }, + { + "epoch": 11.02, + "learning_rate": 5.065569620253164e-06, + "loss": 0.015, + "step": 20000 + }, + { + "epoch": 11.02, + "eval_loss": 0.2623814344406128, + "eval_runtime": 1462.1621, + "eval_samples_per_second": 7.12, + "eval_steps_per_second": 0.445, + "eval_wer": 24.392266540919362, + "step": 20000 + }, + { + "epoch": 11.02, + "learning_rate": 5.059240506329114e-06, + "loss": 0.0094, + "step": 20025 + }, + { + "epoch": 11.02, + "learning_rate": 5.052911392405063e-06, + "loss": 0.0108, + "step": 20050 + }, + { + "epoch": 11.02, + "learning_rate": 5.046582278481013e-06, + "loss": 0.0155, + "step": 20075 + }, + { + "epoch": 11.02, + "learning_rate": 5.040253164556962e-06, + "loss": 0.0103, + "step": 20100 + }, + { + "epoch": 11.02, + "learning_rate": 5.033924050632912e-06, + "loss": 0.0109, + "step": 20125 + }, + { + "epoch": 11.02, + "learning_rate": 5.027594936708862e-06, + "loss": 0.0188, + "step": 20150 + }, + { + "epoch": 11.02, + "learning_rate": 5.021265822784811e-06, + "loss": 0.0178, + "step": 20175 + }, + { + "epoch": 11.02, + "learning_rate": 5.014936708860761e-06, + "loss": 0.0262, + "step": 20200 + }, + { + "epoch": 11.02, + "learning_rate": 5.00860759493671e-06, + "loss": 0.0194, + "step": 20225 + }, + { + "epoch": 11.02, + "learning_rate": 5.002278481012659e-06, + "loss": 0.0107, + "step": 20250 + }, + { + "epoch": 11.03, + "learning_rate": 4.995949367088608e-06, + "loss": 0.0147, + "step": 20275 + }, + { + "epoch": 11.03, + "learning_rate": 4.9896202531645575e-06, + "loss": 0.0084, + "step": 20300 + }, + { + "epoch": 11.03, + "learning_rate": 4.983291139240507e-06, + "loss": 0.0214, + "step": 20325 + }, + { + "epoch": 11.03, + "learning_rate": 4.976962025316456e-06, + "loss": 0.014, + "step": 20350 + }, + { + "epoch": 11.03, + "learning_rate": 4.970632911392406e-06, + "loss": 0.0129, + "step": 20375 + }, + { + "epoch": 11.03, + "learning_rate": 4.964303797468355e-06, + "loss": 0.0145, + "step": 20400 + }, + { + "epoch": 11.03, + "learning_rate": 4.957974683544305e-06, + "loss": 0.0109, + "step": 20425 + }, + { + "epoch": 11.03, + "learning_rate": 4.951645569620254e-06, + "loss": 0.0117, + "step": 20450 + }, + { + "epoch": 11.03, + "learning_rate": 4.945316455696203e-06, + "loss": 0.0137, + "step": 20475 + }, + { + "epoch": 11.03, + "learning_rate": 4.938987341772152e-06, + "loss": 0.0086, + "step": 20500 + }, + { + "epoch": 11.03, + "learning_rate": 4.9326582278481015e-06, + "loss": 0.0128, + "step": 20525 + }, + { + "epoch": 11.03, + "learning_rate": 4.926329113924051e-06, + "loss": 0.0101, + "step": 20550 + }, + { + "epoch": 11.03, + "learning_rate": 4.92e-06, + "loss": 0.0132, + "step": 20575 + }, + { + "epoch": 11.03, + "learning_rate": 4.91367088607595e-06, + "loss": 0.0147, + "step": 20600 + }, + { + "epoch": 11.03, + "learning_rate": 4.907341772151899e-06, + "loss": 0.0077, + "step": 20625 + }, + { + "epoch": 11.03, + "learning_rate": 4.901012658227849e-06, + "loss": 0.0125, + "step": 20650 + }, + { + "epoch": 11.04, + "learning_rate": 4.894683544303798e-06, + "loss": 0.0115, + "step": 20675 + }, + { + "epoch": 11.04, + "learning_rate": 4.888354430379747e-06, + "loss": 0.0136, + "step": 20700 + }, + { + "epoch": 11.04, + "learning_rate": 4.882025316455696e-06, + "loss": 0.0098, + "step": 20725 + }, + { + "epoch": 11.04, + "learning_rate": 4.875696202531646e-06, + "loss": 0.0154, + "step": 20750 + }, + { + "epoch": 11.04, + "learning_rate": 4.869367088607596e-06, + "loss": 0.0126, + "step": 20775 + }, + { + "epoch": 11.04, + "learning_rate": 4.863037974683545e-06, + "loss": 0.0116, + "step": 20800 + }, + { + "epoch": 11.04, + "learning_rate": 4.856708860759495e-06, + "loss": 0.0133, + "step": 20825 + }, + { + "epoch": 11.04, + "learning_rate": 4.850379746835443e-06, + "loss": 0.0106, + "step": 20850 + }, + { + "epoch": 11.04, + "learning_rate": 4.844050632911393e-06, + "loss": 0.0151, + "step": 20875 + }, + { + "epoch": 11.04, + "learning_rate": 4.837721518987342e-06, + "loss": 0.0148, + "step": 20900 + }, + { + "epoch": 11.04, + "learning_rate": 4.8313924050632915e-06, + "loss": 0.0173, + "step": 20925 + }, + { + "epoch": 11.04, + "learning_rate": 4.825063291139241e-06, + "loss": 0.0126, + "step": 20950 + }, + { + "epoch": 11.04, + "learning_rate": 4.81873417721519e-06, + "loss": 0.0163, + "step": 20975 + }, + { + "epoch": 11.04, + "learning_rate": 4.81240506329114e-06, + "loss": 0.0203, + "step": 21000 + }, + { + "epoch": 11.04, + "eval_loss": 0.27788785099983215, + "eval_runtime": 1546.852, + "eval_samples_per_second": 6.73, + "eval_steps_per_second": 0.421, + "eval_wer": 25.284963983154196, + "step": 21000 + }, + { + "epoch": 12.0, + "learning_rate": 4.806075949367089e-06, + "loss": 0.0176, + "step": 21025 + }, + { + "epoch": 12.0, + "learning_rate": 4.799746835443039e-06, + "loss": 0.0157, + "step": 21050 + }, + { + "epoch": 12.0, + "learning_rate": 4.793417721518987e-06, + "loss": 0.0192, + "step": 21075 + }, + { + "epoch": 12.0, + "learning_rate": 4.787088607594937e-06, + "loss": 0.0124, + "step": 21100 + }, + { + "epoch": 12.0, + "learning_rate": 4.780759493670886e-06, + "loss": 0.0194, + "step": 21125 + }, + { + "epoch": 12.0, + "learning_rate": 4.774430379746836e-06, + "loss": 0.0138, + "step": 21150 + }, + { + "epoch": 12.0, + "learning_rate": 4.768101265822786e-06, + "loss": 0.0191, + "step": 21175 + }, + { + "epoch": 12.0, + "learning_rate": 4.761772151898735e-06, + "loss": 0.0124, + "step": 21200 + }, + { + "epoch": 12.01, + "learning_rate": 4.755443037974684e-06, + "loss": 0.0175, + "step": 21225 + }, + { + "epoch": 12.01, + "learning_rate": 4.749113924050633e-06, + "loss": 0.0118, + "step": 21250 + }, + { + "epoch": 12.01, + "learning_rate": 4.742784810126583e-06, + "loss": 0.0172, + "step": 21275 + }, + { + "epoch": 12.01, + "learning_rate": 4.736455696202532e-06, + "loss": 0.0136, + "step": 21300 + }, + { + "epoch": 12.01, + "learning_rate": 4.7301265822784815e-06, + "loss": 0.018, + "step": 21325 + }, + { + "epoch": 12.01, + "learning_rate": 4.723797468354431e-06, + "loss": 0.0184, + "step": 21350 + }, + { + "epoch": 12.01, + "learning_rate": 4.71746835443038e-06, + "loss": 0.0166, + "step": 21375 + }, + { + "epoch": 12.01, + "learning_rate": 4.71113924050633e-06, + "loss": 0.0107, + "step": 21400 + }, + { + "epoch": 12.01, + "learning_rate": 4.704810126582279e-06, + "loss": 0.0125, + "step": 21425 + }, + { + "epoch": 12.01, + "learning_rate": 4.698481012658228e-06, + "loss": 0.0098, + "step": 21450 + }, + { + "epoch": 12.01, + "learning_rate": 4.692151898734177e-06, + "loss": 0.0103, + "step": 21475 + }, + { + "epoch": 12.01, + "learning_rate": 4.685822784810127e-06, + "loss": 0.0211, + "step": 21500 + }, + { + "epoch": 12.01, + "learning_rate": 4.679493670886076e-06, + "loss": 0.0125, + "step": 21525 + }, + { + "epoch": 12.01, + "learning_rate": 4.6731645569620255e-06, + "loss": 0.0083, + "step": 21550 + }, + { + "epoch": 12.01, + "learning_rate": 4.666835443037975e-06, + "loss": 0.0156, + "step": 21575 + }, + { + "epoch": 12.01, + "learning_rate": 4.660506329113924e-06, + "loss": 0.0137, + "step": 21600 + }, + { + "epoch": 12.02, + "learning_rate": 4.654177215189874e-06, + "loss": 0.0137, + "step": 21625 + }, + { + "epoch": 12.02, + "learning_rate": 4.647848101265823e-06, + "loss": 0.0092, + "step": 21650 + }, + { + "epoch": 12.02, + "learning_rate": 4.641518987341773e-06, + "loss": 0.0101, + "step": 21675 + }, + { + "epoch": 12.02, + "learning_rate": 4.635189873417722e-06, + "loss": 0.0118, + "step": 21700 + }, + { + "epoch": 12.02, + "learning_rate": 4.6288607594936715e-06, + "loss": 0.0119, + "step": 21725 + }, + { + "epoch": 12.02, + "learning_rate": 4.622531645569621e-06, + "loss": 0.009, + "step": 21750 + }, + { + "epoch": 12.02, + "learning_rate": 4.61620253164557e-06, + "loss": 0.014, + "step": 21775 + }, + { + "epoch": 12.02, + "learning_rate": 4.609873417721519e-06, + "loss": 0.0108, + "step": 21800 + }, + { + "epoch": 12.02, + "learning_rate": 4.603544303797468e-06, + "loss": 0.0117, + "step": 21825 + }, + { + "epoch": 12.02, + "learning_rate": 4.597215189873418e-06, + "loss": 0.0147, + "step": 21850 + }, + { + "epoch": 12.02, + "learning_rate": 4.590886075949367e-06, + "loss": 0.0133, + "step": 21875 + }, + { + "epoch": 12.02, + "learning_rate": 4.584556962025317e-06, + "loss": 0.0131, + "step": 21900 + }, + { + "epoch": 12.02, + "learning_rate": 4.578227848101266e-06, + "loss": 0.0156, + "step": 21925 + }, + { + "epoch": 12.02, + "learning_rate": 4.5718987341772155e-06, + "loss": 0.0211, + "step": 21950 + }, + { + "epoch": 12.02, + "learning_rate": 4.565569620253165e-06, + "loss": 0.0127, + "step": 21975 + }, + { + "epoch": 12.02, + "learning_rate": 4.559240506329114e-06, + "loss": 0.011, + "step": 22000 + }, + { + "epoch": 12.02, + "eval_loss": 0.26152506470680237, + "eval_runtime": 1497.9019, + "eval_samples_per_second": 6.95, + "eval_steps_per_second": 0.435, + "eval_wer": 26.236198962584755, + "step": 22000 + }, + { + "epoch": 12.03, + "learning_rate": 4.552911392405064e-06, + "loss": 0.0164, + "step": 22025 + }, + { + "epoch": 12.03, + "learning_rate": 4.546582278481013e-06, + "loss": 0.01, + "step": 22050 + }, + { + "epoch": 12.03, + "learning_rate": 4.540253164556963e-06, + "loss": 0.0127, + "step": 22075 + }, + { + "epoch": 12.03, + "learning_rate": 4.533924050632912e-06, + "loss": 0.0089, + "step": 22100 + }, + { + "epoch": 12.03, + "learning_rate": 4.5275949367088615e-06, + "loss": 0.0077, + "step": 22125 + }, + { + "epoch": 12.03, + "learning_rate": 4.521265822784811e-06, + "loss": 0.0098, + "step": 22150 + }, + { + "epoch": 12.03, + "learning_rate": 4.5149367088607595e-06, + "loss": 0.0084, + "step": 22175 + }, + { + "epoch": 12.03, + "learning_rate": 4.508607594936709e-06, + "loss": 0.0096, + "step": 22200 + }, + { + "epoch": 12.03, + "learning_rate": 4.502278481012658e-06, + "loss": 0.0097, + "step": 22225 + }, + { + "epoch": 12.03, + "learning_rate": 4.495949367088608e-06, + "loss": 0.0114, + "step": 22250 + }, + { + "epoch": 12.03, + "learning_rate": 4.489620253164557e-06, + "loss": 0.0109, + "step": 22275 + }, + { + "epoch": 12.03, + "learning_rate": 4.483291139240507e-06, + "loss": 0.0096, + "step": 22300 + }, + { + "epoch": 12.03, + "learning_rate": 4.476962025316456e-06, + "loss": 0.0117, + "step": 22325 + }, + { + "epoch": 12.03, + "learning_rate": 4.4706329113924055e-06, + "loss": 0.0105, + "step": 22350 + }, + { + "epoch": 12.03, + "learning_rate": 4.464303797468355e-06, + "loss": 0.0103, + "step": 22375 + }, + { + "epoch": 12.03, + "learning_rate": 4.457974683544304e-06, + "loss": 0.0085, + "step": 22400 + }, + { + "epoch": 12.04, + "learning_rate": 4.451645569620254e-06, + "loss": 0.0129, + "step": 22425 + }, + { + "epoch": 12.04, + "learning_rate": 4.445316455696203e-06, + "loss": 0.0188, + "step": 22450 + }, + { + "epoch": 12.04, + "learning_rate": 4.438987341772153e-06, + "loss": 0.013, + "step": 22475 + }, + { + "epoch": 12.04, + "learning_rate": 4.432658227848102e-06, + "loss": 0.0095, + "step": 22500 + }, + { + "epoch": 12.04, + "learning_rate": 4.4263291139240515e-06, + "loss": 0.0111, + "step": 22525 + }, + { + "epoch": 12.04, + "learning_rate": 4.42e-06, + "loss": 0.0115, + "step": 22550 + }, + { + "epoch": 12.04, + "learning_rate": 4.4136708860759495e-06, + "loss": 0.0096, + "step": 22575 + }, + { + "epoch": 12.04, + "learning_rate": 4.407341772151899e-06, + "loss": 0.0108, + "step": 22600 + }, + { + "epoch": 12.04, + "learning_rate": 4.401012658227848e-06, + "loss": 0.0118, + "step": 22625 + }, + { + "epoch": 12.04, + "learning_rate": 4.394683544303798e-06, + "loss": 0.0136, + "step": 22650 + }, + { + "epoch": 12.04, + "learning_rate": 4.388354430379747e-06, + "loss": 0.0176, + "step": 22675 + }, + { + "epoch": 12.04, + "learning_rate": 4.382025316455697e-06, + "loss": 0.011, + "step": 22700 + }, + { + "epoch": 12.04, + "learning_rate": 4.375696202531646e-06, + "loss": 0.0125, + "step": 22725 + }, + { + "epoch": 12.04, + "learning_rate": 4.3693670886075955e-06, + "loss": 0.0164, + "step": 22750 + }, + { + "epoch": 13.0, + "learning_rate": 4.363037974683544e-06, + "loss": 0.0192, + "step": 22775 + }, + { + "epoch": 13.0, + "learning_rate": 4.356708860759494e-06, + "loss": 0.0145, + "step": 22800 + }, + { + "epoch": 13.0, + "learning_rate": 4.350379746835444e-06, + "loss": 0.0174, + "step": 22825 + }, + { + "epoch": 13.0, + "learning_rate": 4.344050632911393e-06, + "loss": 0.0142, + "step": 22850 + }, + { + "epoch": 13.0, + "learning_rate": 4.337721518987343e-06, + "loss": 0.0228, + "step": 22875 + }, + { + "epoch": 13.0, + "learning_rate": 4.331392405063292e-06, + "loss": 0.0201, + "step": 22900 + }, + { + "epoch": 13.0, + "learning_rate": 4.325063291139241e-06, + "loss": 0.0188, + "step": 22925 + }, + { + "epoch": 13.0, + "learning_rate": 4.31873417721519e-06, + "loss": 0.0185, + "step": 22950 + }, + { + "epoch": 13.01, + "learning_rate": 4.3124050632911395e-06, + "loss": 0.0186, + "step": 22975 + }, + { + "epoch": 13.01, + "learning_rate": 4.306075949367089e-06, + "loss": 0.0191, + "step": 23000 + }, + { + "epoch": 13.01, + "eval_loss": 0.25467297434806824, + "eval_runtime": 1486.1182, + "eval_samples_per_second": 7.005, + "eval_steps_per_second": 0.438, + "eval_wer": 23.982503780549276, + "step": 23000 + }, + { + "epoch": 13.01, + "learning_rate": 4.299746835443038e-06, + "loss": 0.0145, + "step": 23025 + }, + { + "epoch": 13.01, + "learning_rate": 4.293417721518988e-06, + "loss": 0.0106, + "step": 23050 + }, + { + "epoch": 13.01, + "learning_rate": 4.287088607594937e-06, + "loss": 0.0135, + "step": 23075 + }, + { + "epoch": 13.01, + "learning_rate": 4.280759493670887e-06, + "loss": 0.0173, + "step": 23100 + }, + { + "epoch": 13.01, + "learning_rate": 4.274430379746836e-06, + "loss": 0.0126, + "step": 23125 + }, + { + "epoch": 13.01, + "learning_rate": 4.268101265822785e-06, + "loss": 0.0166, + "step": 23150 + }, + { + "epoch": 13.01, + "learning_rate": 4.261772151898734e-06, + "loss": 0.0097, + "step": 23175 + }, + { + "epoch": 13.01, + "learning_rate": 4.255443037974684e-06, + "loss": 0.0094, + "step": 23200 + }, + { + "epoch": 13.01, + "learning_rate": 4.249113924050634e-06, + "loss": 0.012, + "step": 23225 + }, + { + "epoch": 13.01, + "learning_rate": 4.242784810126583e-06, + "loss": 0.0153, + "step": 23250 + }, + { + "epoch": 13.01, + "learning_rate": 4.236455696202532e-06, + "loss": 0.0169, + "step": 23275 + }, + { + "epoch": 13.01, + "learning_rate": 4.230379746835443e-06, + "loss": 0.0097, + "step": 23300 + }, + { + "epoch": 13.01, + "learning_rate": 4.224050632911393e-06, + "loss": 0.0098, + "step": 23325 + }, + { + "epoch": 13.01, + "learning_rate": 4.217721518987342e-06, + "loss": 0.0129, + "step": 23350 + }, + { + "epoch": 13.02, + "learning_rate": 4.2113924050632915e-06, + "loss": 0.0157, + "step": 23375 + }, + { + "epoch": 13.02, + "learning_rate": 4.205063291139241e-06, + "loss": 0.0088, + "step": 23400 + }, + { + "epoch": 13.02, + "learning_rate": 4.19873417721519e-06, + "loss": 0.0105, + "step": 23425 + }, + { + "epoch": 13.02, + "learning_rate": 4.19240506329114e-06, + "loss": 0.0131, + "step": 23450 + }, + { + "epoch": 13.02, + "learning_rate": 4.186075949367089e-06, + "loss": 0.0133, + "step": 23475 + }, + { + "epoch": 13.02, + "learning_rate": 4.179746835443038e-06, + "loss": 0.0054, + "step": 23500 + }, + { + "epoch": 13.02, + "learning_rate": 4.173417721518987e-06, + "loss": 0.01, + "step": 23525 + }, + { + "epoch": 13.02, + "learning_rate": 4.167088607594937e-06, + "loss": 0.013, + "step": 23550 + }, + { + "epoch": 13.02, + "learning_rate": 4.160759493670886e-06, + "loss": 0.0141, + "step": 23575 + }, + { + "epoch": 13.02, + "learning_rate": 4.154430379746836e-06, + "loss": 0.0126, + "step": 23600 + }, + { + "epoch": 13.02, + "learning_rate": 4.148101265822786e-06, + "loss": 0.015, + "step": 23625 + }, + { + "epoch": 13.02, + "learning_rate": 4.141772151898734e-06, + "loss": 0.0164, + "step": 23650 + }, + { + "epoch": 13.02, + "learning_rate": 4.135443037974684e-06, + "loss": 0.0135, + "step": 23675 + }, + { + "epoch": 13.02, + "learning_rate": 4.129113924050633e-06, + "loss": 0.0151, + "step": 23700 + }, + { + "epoch": 13.02, + "learning_rate": 4.122784810126583e-06, + "loss": 0.0157, + "step": 23725 + }, + { + "epoch": 13.02, + "learning_rate": 4.116455696202532e-06, + "loss": 0.0087, + "step": 23750 + }, + { + "epoch": 13.03, + "learning_rate": 4.1101265822784815e-06, + "loss": 0.0097, + "step": 23775 + }, + { + "epoch": 13.03, + "learning_rate": 4.103797468354431e-06, + "loss": 0.0141, + "step": 23800 + }, + { + "epoch": 13.03, + "learning_rate": 4.09746835443038e-06, + "loss": 0.0079, + "step": 23825 + }, + { + "epoch": 13.03, + "learning_rate": 4.09113924050633e-06, + "loss": 0.0116, + "step": 23850 + }, + { + "epoch": 13.03, + "learning_rate": 4.084810126582278e-06, + "loss": 0.0093, + "step": 23875 + }, + { + "epoch": 13.03, + "learning_rate": 4.078481012658228e-06, + "loss": 0.0149, + "step": 23900 + }, + { + "epoch": 13.03, + "learning_rate": 4.072151898734177e-06, + "loss": 0.0125, + "step": 23925 + }, + { + "epoch": 13.03, + "learning_rate": 4.065822784810127e-06, + "loss": 0.0106, + "step": 23950 + }, + { + "epoch": 13.03, + "learning_rate": 4.059493670886076e-06, + "loss": 0.0124, + "step": 23975 + }, + { + "epoch": 13.03, + "learning_rate": 4.0531645569620255e-06, + "loss": 0.0074, + "step": 24000 + }, + { + "epoch": 13.03, + "eval_loss": 0.27036768198013306, + "eval_runtime": 1458.9195, + "eval_samples_per_second": 7.136, + "eval_steps_per_second": 0.446, + "eval_wer": 24.153238264036816, + "step": 24000 + }, + { + "epoch": 13.03, + "learning_rate": 4.046835443037975e-06, + "loss": 0.0128, + "step": 24025 + }, + { + "epoch": 13.03, + "learning_rate": 4.040506329113924e-06, + "loss": 0.012, + "step": 24050 + }, + { + "epoch": 13.03, + "learning_rate": 4.034177215189874e-06, + "loss": 0.0098, + "step": 24075 + }, + { + "epoch": 13.03, + "learning_rate": 4.027848101265823e-06, + "loss": 0.0091, + "step": 24100 + }, + { + "epoch": 13.03, + "learning_rate": 4.021518987341773e-06, + "loss": 0.0077, + "step": 24125 + }, + { + "epoch": 13.03, + "learning_rate": 4.015189873417722e-06, + "loss": 0.0135, + "step": 24150 + }, + { + "epoch": 13.04, + "learning_rate": 4.0088607594936715e-06, + "loss": 0.0124, + "step": 24175 + }, + { + "epoch": 13.04, + "learning_rate": 4.002531645569621e-06, + "loss": 0.0152, + "step": 24200 + }, + { + "epoch": 13.04, + "learning_rate": 3.99620253164557e-06, + "loss": 0.0127, + "step": 24225 + }, + { + "epoch": 13.04, + "learning_rate": 3.989873417721519e-06, + "loss": 0.0066, + "step": 24250 + }, + { + "epoch": 13.04, + "learning_rate": 3.983544303797468e-06, + "loss": 0.0163, + "step": 24275 + }, + { + "epoch": 13.04, + "learning_rate": 3.977215189873418e-06, + "loss": 0.0095, + "step": 24300 + }, + { + "epoch": 13.04, + "learning_rate": 3.970886075949367e-06, + "loss": 0.0103, + "step": 24325 + }, + { + "epoch": 13.04, + "learning_rate": 3.964556962025317e-06, + "loss": 0.0112, + "step": 24350 + }, + { + "epoch": 13.04, + "learning_rate": 3.958227848101266e-06, + "loss": 0.01, + "step": 24375 + }, + { + "epoch": 13.04, + "learning_rate": 3.9518987341772155e-06, + "loss": 0.0131, + "step": 24400 + }, + { + "epoch": 13.04, + "learning_rate": 3.945569620253165e-06, + "loss": 0.0143, + "step": 24425 + }, + { + "epoch": 13.04, + "learning_rate": 3.939240506329114e-06, + "loss": 0.0151, + "step": 24450 + }, + { + "epoch": 13.04, + "learning_rate": 3.932911392405064e-06, + "loss": 0.0129, + "step": 24475 + }, + { + "epoch": 13.04, + "learning_rate": 3.926582278481013e-06, + "loss": 0.0125, + "step": 24500 + }, + { + "epoch": 14.0, + "learning_rate": 3.920506329113925e-06, + "loss": 0.0165, + "step": 24525 + }, + { + "epoch": 14.0, + "learning_rate": 3.914177215189874e-06, + "loss": 0.0114, + "step": 24550 + }, + { + "epoch": 14.0, + "learning_rate": 3.9078481012658235e-06, + "loss": 0.0111, + "step": 24575 + }, + { + "epoch": 14.0, + "learning_rate": 3.901518987341772e-06, + "loss": 0.0134, + "step": 24600 + }, + { + "epoch": 14.0, + "learning_rate": 3.8951898734177215e-06, + "loss": 0.0138, + "step": 24625 + }, + { + "epoch": 14.0, + "learning_rate": 3.888860759493671e-06, + "loss": 0.0151, + "step": 24650 + }, + { + "epoch": 14.0, + "learning_rate": 3.88253164556962e-06, + "loss": 0.0101, + "step": 24675 + }, + { + "epoch": 14.0, + "learning_rate": 3.87620253164557e-06, + "loss": 0.0108, + "step": 24700 + }, + { + "epoch": 14.01, + "learning_rate": 3.869873417721519e-06, + "loss": 0.0151, + "step": 24725 + }, + { + "epoch": 14.01, + "learning_rate": 3.863544303797469e-06, + "loss": 0.0132, + "step": 24750 + }, + { + "epoch": 14.01, + "learning_rate": 3.857215189873418e-06, + "loss": 0.012, + "step": 24775 + }, + { + "epoch": 14.01, + "learning_rate": 3.8508860759493675e-06, + "loss": 0.017, + "step": 24800 + }, + { + "epoch": 14.01, + "learning_rate": 3.844556962025317e-06, + "loss": 0.0141, + "step": 24825 + }, + { + "epoch": 14.01, + "learning_rate": 3.838227848101266e-06, + "loss": 0.0166, + "step": 24850 + }, + { + "epoch": 14.01, + "learning_rate": 3.831898734177216e-06, + "loss": 0.0112, + "step": 24875 + }, + { + "epoch": 14.01, + "learning_rate": 3.825569620253165e-06, + "loss": 0.0097, + "step": 24900 + }, + { + "epoch": 14.01, + "learning_rate": 3.819240506329115e-06, + "loss": 0.0138, + "step": 24925 + }, + { + "epoch": 14.01, + "learning_rate": 3.8129113924050636e-06, + "loss": 0.0069, + "step": 24950 + }, + { + "epoch": 14.01, + "learning_rate": 3.806582278481013e-06, + "loss": 0.0111, + "step": 24975 + }, + { + "epoch": 14.01, + "learning_rate": 3.8002531645569625e-06, + "loss": 0.0149, + "step": 25000 + }, + { + "epoch": 14.01, + "eval_loss": 0.2640444040298462, + "eval_runtime": 1478.4977, + "eval_samples_per_second": 7.042, + "eval_steps_per_second": 0.44, + "eval_wer": 24.063805915543345, + "step": 25000 + }, + { + "epoch": 14.01, + "learning_rate": 3.7939240506329115e-06, + "loss": 0.0176, + "step": 25025 + }, + { + "epoch": 14.01, + "learning_rate": 3.787594936708861e-06, + "loss": 0.0104, + "step": 25050 + }, + { + "epoch": 14.01, + "learning_rate": 3.7812658227848103e-06, + "loss": 0.0084, + "step": 25075 + }, + { + "epoch": 14.01, + "learning_rate": 3.7749367088607598e-06, + "loss": 0.0118, + "step": 25100 + }, + { + "epoch": 14.02, + "learning_rate": 3.768607594936709e-06, + "loss": 0.0099, + "step": 25125 + }, + { + "epoch": 14.02, + "learning_rate": 3.762278481012658e-06, + "loss": 0.0089, + "step": 25150 + }, + { + "epoch": 14.02, + "learning_rate": 3.7559493670886076e-06, + "loss": 0.0119, + "step": 25175 + }, + { + "epoch": 14.02, + "learning_rate": 3.749620253164557e-06, + "loss": 0.0118, + "step": 25200 + }, + { + "epoch": 14.02, + "learning_rate": 3.743291139240507e-06, + "loss": 0.0108, + "step": 25225 + }, + { + "epoch": 14.02, + "learning_rate": 3.7369620253164563e-06, + "loss": 0.0101, + "step": 25250 + }, + { + "epoch": 14.02, + "learning_rate": 3.7306329113924058e-06, + "loss": 0.0095, + "step": 25275 + }, + { + "epoch": 14.02, + "learning_rate": 3.7243037974683548e-06, + "loss": 0.0106, + "step": 25300 + }, + { + "epoch": 14.02, + "learning_rate": 3.717974683544304e-06, + "loss": 0.0135, + "step": 25325 + }, + { + "epoch": 14.02, + "learning_rate": 3.7116455696202536e-06, + "loss": 0.0115, + "step": 25350 + }, + { + "epoch": 14.02, + "learning_rate": 3.705316455696203e-06, + "loss": 0.0114, + "step": 25375 + }, + { + "epoch": 14.02, + "learning_rate": 3.698987341772152e-06, + "loss": 0.0119, + "step": 25400 + }, + { + "epoch": 14.02, + "learning_rate": 3.6926582278481015e-06, + "loss": 0.0123, + "step": 25425 + }, + { + "epoch": 14.02, + "learning_rate": 3.686329113924051e-06, + "loss": 0.0138, + "step": 25450 + }, + { + "epoch": 14.02, + "learning_rate": 3.6800000000000003e-06, + "loss": 0.0133, + "step": 25475 + }, + { + "epoch": 14.02, + "learning_rate": 3.6736708860759498e-06, + "loss": 0.0106, + "step": 25500 + }, + { + "epoch": 14.03, + "learning_rate": 3.6673417721518988e-06, + "loss": 0.0099, + "step": 25525 + }, + { + "epoch": 14.03, + "learning_rate": 3.661012658227848e-06, + "loss": 0.0104, + "step": 25550 + }, + { + "epoch": 14.03, + "learning_rate": 3.6546835443037976e-06, + "loss": 0.0143, + "step": 25575 + }, + { + "epoch": 14.03, + "learning_rate": 3.648354430379747e-06, + "loss": 0.0096, + "step": 25600 + }, + { + "epoch": 14.03, + "learning_rate": 3.642025316455696e-06, + "loss": 0.0094, + "step": 25625 + }, + { + "epoch": 14.03, + "learning_rate": 3.6356962025316463e-06, + "loss": 0.0162, + "step": 25650 + }, + { + "epoch": 14.03, + "learning_rate": 3.6293670886075953e-06, + "loss": 0.008, + "step": 25675 + }, + { + "epoch": 14.03, + "learning_rate": 3.6230379746835448e-06, + "loss": 0.0094, + "step": 25700 + }, + { + "epoch": 14.03, + "learning_rate": 3.616708860759494e-06, + "loss": 0.0122, + "step": 25725 + }, + { + "epoch": 14.03, + "learning_rate": 3.6103797468354436e-06, + "loss": 0.0128, + "step": 25750 + }, + { + "epoch": 14.03, + "learning_rate": 3.6040506329113926e-06, + "loss": 0.0117, + "step": 25775 + }, + { + "epoch": 14.03, + "learning_rate": 3.597721518987342e-06, + "loss": 0.0067, + "step": 25800 + }, + { + "epoch": 14.03, + "learning_rate": 3.5913924050632915e-06, + "loss": 0.0078, + "step": 25825 + }, + { + "epoch": 14.03, + "learning_rate": 3.585063291139241e-06, + "loss": 0.0071, + "step": 25850 + }, + { + "epoch": 14.03, + "learning_rate": 3.57873417721519e-06, + "loss": 0.0093, + "step": 25875 + }, + { + "epoch": 14.03, + "learning_rate": 3.5724050632911393e-06, + "loss": 0.0064, + "step": 25900 + }, + { + "epoch": 14.04, + "learning_rate": 3.5660759493670888e-06, + "loss": 0.0067, + "step": 25925 + }, + { + "epoch": 14.04, + "learning_rate": 3.559746835443038e-06, + "loss": 0.0076, + "step": 25950 + }, + { + "epoch": 14.04, + "learning_rate": 3.5534177215189876e-06, + "loss": 0.0099, + "step": 25975 + }, + { + "epoch": 14.04, + "learning_rate": 3.5470886075949366e-06, + "loss": 0.0111, + "step": 26000 + }, + { + "epoch": 14.04, + "eval_loss": 0.2734878659248352, + "eval_runtime": 1456.7487, + "eval_samples_per_second": 7.147, + "eval_steps_per_second": 0.447, + "eval_wer": 24.359745686921737, + "step": 26000 + }, + { + "epoch": 14.04, + "learning_rate": 3.540759493670886e-06, + "loss": 0.0134, + "step": 26025 + }, + { + "epoch": 14.04, + "learning_rate": 3.534430379746836e-06, + "loss": 0.008, + "step": 26050 + }, + { + "epoch": 14.04, + "learning_rate": 3.5281012658227853e-06, + "loss": 0.0138, + "step": 26075 + }, + { + "epoch": 14.04, + "learning_rate": 3.5217721518987348e-06, + "loss": 0.0075, + "step": 26100 + }, + { + "epoch": 14.04, + "learning_rate": 3.515443037974684e-06, + "loss": 0.0097, + "step": 26125 + }, + { + "epoch": 14.04, + "learning_rate": 3.509113924050633e-06, + "loss": 0.0139, + "step": 26150 + }, + { + "epoch": 14.04, + "learning_rate": 3.5027848101265826e-06, + "loss": 0.0099, + "step": 26175 + }, + { + "epoch": 14.04, + "learning_rate": 3.496455696202532e-06, + "loss": 0.0148, + "step": 26200 + }, + { + "epoch": 14.04, + "learning_rate": 3.4901265822784815e-06, + "loss": 0.0119, + "step": 26225 + }, + { + "epoch": 14.04, + "learning_rate": 3.4837974683544305e-06, + "loss": 0.0086, + "step": 26250 + }, + { + "epoch": 15.0, + "learning_rate": 3.47746835443038e-06, + "loss": 0.0091, + "step": 26275 + }, + { + "epoch": 15.0, + "learning_rate": 3.4711392405063293e-06, + "loss": 0.0105, + "step": 26300 + }, + { + "epoch": 15.0, + "learning_rate": 3.465063291139241e-06, + "loss": 0.0138, + "step": 26325 + }, + { + "epoch": 15.0, + "learning_rate": 3.45873417721519e-06, + "loss": 0.0185, + "step": 26350 + }, + { + "epoch": 15.0, + "learning_rate": 3.4524050632911392e-06, + "loss": 0.015, + "step": 26375 + }, + { + "epoch": 15.0, + "learning_rate": 3.4460759493670886e-06, + "loss": 0.0177, + "step": 26400 + }, + { + "epoch": 15.0, + "learning_rate": 3.439746835443038e-06, + "loss": 0.014, + "step": 26425 + }, + { + "epoch": 15.0, + "learning_rate": 3.433417721518988e-06, + "loss": 0.0127, + "step": 26450 + }, + { + "epoch": 15.01, + "learning_rate": 3.4270886075949374e-06, + "loss": 0.0135, + "step": 26475 + }, + { + "epoch": 15.01, + "learning_rate": 3.4207594936708864e-06, + "loss": 0.0171, + "step": 26500 + }, + { + "epoch": 15.01, + "learning_rate": 3.4144303797468358e-06, + "loss": 0.0114, + "step": 26525 + }, + { + "epoch": 15.01, + "learning_rate": 3.4081012658227852e-06, + "loss": 0.0099, + "step": 26550 + }, + { + "epoch": 15.01, + "learning_rate": 3.4017721518987346e-06, + "loss": 0.0126, + "step": 26575 + }, + { + "epoch": 15.01, + "learning_rate": 3.395443037974684e-06, + "loss": 0.0116, + "step": 26600 + }, + { + "epoch": 15.01, + "learning_rate": 3.389113924050633e-06, + "loss": 0.0087, + "step": 26625 + }, + { + "epoch": 15.01, + "learning_rate": 3.3827848101265825e-06, + "loss": 0.0073, + "step": 26650 + }, + { + "epoch": 15.01, + "learning_rate": 3.376455696202532e-06, + "loss": 0.0102, + "step": 26675 + }, + { + "epoch": 15.01, + "learning_rate": 3.3701265822784814e-06, + "loss": 0.0106, + "step": 26700 + }, + { + "epoch": 15.01, + "learning_rate": 3.3637974683544304e-06, + "loss": 0.0094, + "step": 26725 + }, + { + "epoch": 15.01, + "learning_rate": 3.35746835443038e-06, + "loss": 0.0113, + "step": 26750 + }, + { + "epoch": 15.01, + "learning_rate": 3.3511392405063292e-06, + "loss": 0.0169, + "step": 26775 + }, + { + "epoch": 15.01, + "learning_rate": 3.3448101265822786e-06, + "loss": 0.0084, + "step": 26800 + }, + { + "epoch": 15.01, + "learning_rate": 3.338481012658228e-06, + "loss": 0.0153, + "step": 26825 + }, + { + "epoch": 15.01, + "learning_rate": 3.332151898734177e-06, + "loss": 0.012, + "step": 26850 + }, + { + "epoch": 15.02, + "learning_rate": 3.325822784810127e-06, + "loss": 0.0106, + "step": 26875 + }, + { + "epoch": 15.02, + "learning_rate": 3.3194936708860764e-06, + "loss": 0.0099, + "step": 26900 + }, + { + "epoch": 15.02, + "learning_rate": 3.3131645569620258e-06, + "loss": 0.008, + "step": 26925 + }, + { + "epoch": 15.02, + "learning_rate": 3.306835443037975e-06, + "loss": 0.0076, + "step": 26950 + }, + { + "epoch": 15.02, + "learning_rate": 3.3005063291139246e-06, + "loss": 0.0088, + "step": 26975 + }, + { + "epoch": 15.02, + "learning_rate": 3.2941772151898736e-06, + "loss": 0.009, + "step": 27000 + }, + { + "epoch": 15.02, + "eval_loss": 0.2693029046058655, + "eval_runtime": 1479.1374, + "eval_samples_per_second": 7.039, + "eval_steps_per_second": 0.44, + "eval_wer": 24.28819980812696, + "step": 27000 + }, + { + "epoch": 15.02, + "learning_rate": 3.287848101265823e-06, + "loss": 0.0115, + "step": 27025 + }, + { + "epoch": 15.02, + "learning_rate": 3.2815189873417725e-06, + "loss": 0.0099, + "step": 27050 + }, + { + "epoch": 15.02, + "learning_rate": 3.275189873417722e-06, + "loss": 0.0073, + "step": 27075 + }, + { + "epoch": 15.02, + "learning_rate": 3.268860759493671e-06, + "loss": 0.0061, + "step": 27100 + }, + { + "epoch": 15.02, + "learning_rate": 3.2625316455696204e-06, + "loss": 0.007, + "step": 27125 + }, + { + "epoch": 15.02, + "learning_rate": 3.2562025316455698e-06, + "loss": 0.0109, + "step": 27150 + }, + { + "epoch": 15.02, + "learning_rate": 3.2498734177215192e-06, + "loss": 0.0123, + "step": 27175 + }, + { + "epoch": 15.02, + "learning_rate": 3.2435443037974686e-06, + "loss": 0.0121, + "step": 27200 + }, + { + "epoch": 15.02, + "learning_rate": 3.2372151898734176e-06, + "loss": 0.0129, + "step": 27225 + }, + { + "epoch": 15.02, + "learning_rate": 3.230886075949367e-06, + "loss": 0.0128, + "step": 27250 + }, + { + "epoch": 15.03, + "learning_rate": 3.224556962025317e-06, + "loss": 0.0094, + "step": 27275 + }, + { + "epoch": 15.03, + "learning_rate": 3.2182278481012664e-06, + "loss": 0.0081, + "step": 27300 + }, + { + "epoch": 15.03, + "learning_rate": 3.2118987341772158e-06, + "loss": 0.0087, + "step": 27325 + }, + { + "epoch": 15.03, + "learning_rate": 3.2055696202531648e-06, + "loss": 0.0131, + "step": 27350 + }, + { + "epoch": 15.03, + "learning_rate": 3.199240506329114e-06, + "loss": 0.0073, + "step": 27375 + }, + { + "epoch": 15.03, + "learning_rate": 3.1929113924050636e-06, + "loss": 0.0086, + "step": 27400 + }, + { + "epoch": 15.03, + "learning_rate": 3.186582278481013e-06, + "loss": 0.0108, + "step": 27425 + }, + { + "epoch": 15.03, + "learning_rate": 3.1802531645569625e-06, + "loss": 0.01, + "step": 27450 + }, + { + "epoch": 15.03, + "learning_rate": 3.1739240506329115e-06, + "loss": 0.0123, + "step": 27475 + }, + { + "epoch": 15.03, + "learning_rate": 3.167594936708861e-06, + "loss": 0.0081, + "step": 27500 + }, + { + "epoch": 15.03, + "learning_rate": 3.1612658227848104e-06, + "loss": 0.0123, + "step": 27525 + }, + { + "epoch": 15.03, + "learning_rate": 3.1549367088607598e-06, + "loss": 0.0096, + "step": 27550 + }, + { + "epoch": 15.03, + "learning_rate": 3.1486075949367088e-06, + "loss": 0.0054, + "step": 27575 + }, + { + "epoch": 15.03, + "learning_rate": 3.1422784810126582e-06, + "loss": 0.006, + "step": 27600 + }, + { + "epoch": 15.03, + "learning_rate": 3.1359493670886076e-06, + "loss": 0.0089, + "step": 27625 + }, + { + "epoch": 15.03, + "learning_rate": 3.129620253164557e-06, + "loss": 0.0109, + "step": 27650 + }, + { + "epoch": 15.04, + "learning_rate": 3.1232911392405065e-06, + "loss": 0.0089, + "step": 27675 + }, + { + "epoch": 15.04, + "learning_rate": 3.1169620253164563e-06, + "loss": 0.0094, + "step": 27700 + }, + { + "epoch": 15.04, + "learning_rate": 3.1106329113924054e-06, + "loss": 0.0102, + "step": 27725 + }, + { + "epoch": 15.04, + "learning_rate": 3.1043037974683548e-06, + "loss": 0.0097, + "step": 27750 + }, + { + "epoch": 15.04, + "learning_rate": 3.097974683544304e-06, + "loss": 0.0078, + "step": 27775 + }, + { + "epoch": 15.04, + "learning_rate": 3.0916455696202536e-06, + "loss": 0.0073, + "step": 27800 + }, + { + "epoch": 15.04, + "learning_rate": 3.085316455696203e-06, + "loss": 0.0105, + "step": 27825 + }, + { + "epoch": 15.04, + "learning_rate": 3.078987341772152e-06, + "loss": 0.0083, + "step": 27850 + }, + { + "epoch": 15.04, + "learning_rate": 3.0726582278481015e-06, + "loss": 0.0138, + "step": 27875 + }, + { + "epoch": 15.04, + "learning_rate": 3.066329113924051e-06, + "loss": 0.0049, + "step": 27900 + }, + { + "epoch": 15.04, + "learning_rate": 3.0600000000000003e-06, + "loss": 0.0107, + "step": 27925 + }, + { + "epoch": 15.04, + "learning_rate": 3.0536708860759494e-06, + "loss": 0.0105, + "step": 27950 + }, + { + "epoch": 15.04, + "learning_rate": 3.0473417721518988e-06, + "loss": 0.0134, + "step": 27975 + }, + { + "epoch": 15.04, + "learning_rate": 3.041012658227848e-06, + "loss": 0.0137, + "step": 28000 + }, + { + "epoch": 15.04, + "eval_loss": 0.2780563235282898, + "eval_runtime": 1465.9542, + "eval_samples_per_second": 7.102, + "eval_steps_per_second": 0.444, + "eval_wer": 25.010162766874256, + "step": 28000 + }, + { + "epoch": 16.0, + "learning_rate": 3.0346835443037976e-06, + "loss": 0.009, + "step": 28025 + }, + { + "epoch": 16.0, + "learning_rate": 3.028354430379747e-06, + "loss": 0.0081, + "step": 28050 + }, + { + "epoch": 16.0, + "learning_rate": 3.022025316455696e-06, + "loss": 0.0109, + "step": 28075 + }, + { + "epoch": 16.0, + "learning_rate": 3.015696202531646e-06, + "loss": 0.0132, + "step": 28100 + }, + { + "epoch": 16.0, + "learning_rate": 3.0093670886075953e-06, + "loss": 0.0142, + "step": 28125 + }, + { + "epoch": 16.0, + "learning_rate": 3.0030379746835448e-06, + "loss": 0.0152, + "step": 28150 + }, + { + "epoch": 16.0, + "learning_rate": 2.996708860759494e-06, + "loss": 0.0074, + "step": 28175 + }, + { + "epoch": 16.0, + "learning_rate": 2.990379746835443e-06, + "loss": 0.0129, + "step": 28200 + }, + { + "epoch": 16.01, + "learning_rate": 2.9840506329113926e-06, + "loss": 0.0118, + "step": 28225 + }, + { + "epoch": 16.01, + "learning_rate": 2.977721518987342e-06, + "loss": 0.0153, + "step": 28250 + }, + { + "epoch": 16.01, + "learning_rate": 2.9713924050632915e-06, + "loss": 0.0121, + "step": 28275 + }, + { + "epoch": 16.01, + "learning_rate": 2.965063291139241e-06, + "loss": 0.0097, + "step": 28300 + }, + { + "epoch": 16.01, + "learning_rate": 2.95873417721519e-06, + "loss": 0.0083, + "step": 28325 + }, + { + "epoch": 16.01, + "learning_rate": 2.9524050632911393e-06, + "loss": 0.01, + "step": 28350 + }, + { + "epoch": 16.01, + "learning_rate": 2.9460759493670888e-06, + "loss": 0.0084, + "step": 28375 + }, + { + "epoch": 16.01, + "learning_rate": 2.939746835443038e-06, + "loss": 0.0124, + "step": 28400 + }, + { + "epoch": 16.01, + "learning_rate": 2.933417721518987e-06, + "loss": 0.0071, + "step": 28425 + }, + { + "epoch": 16.01, + "learning_rate": 2.9270886075949366e-06, + "loss": 0.0113, + "step": 28450 + }, + { + "epoch": 16.01, + "learning_rate": 2.920759493670886e-06, + "loss": 0.0085, + "step": 28475 + }, + { + "epoch": 16.01, + "learning_rate": 2.914430379746836e-06, + "loss": 0.0133, + "step": 28500 + }, + { + "epoch": 16.01, + "learning_rate": 2.9081012658227853e-06, + "loss": 0.014, + "step": 28525 + }, + { + "epoch": 16.01, + "learning_rate": 2.9017721518987348e-06, + "loss": 0.0105, + "step": 28550 + }, + { + "epoch": 16.01, + "learning_rate": 2.8954430379746838e-06, + "loss": 0.01, + "step": 28575 + }, + { + "epoch": 16.01, + "learning_rate": 2.889113924050633e-06, + "loss": 0.0135, + "step": 28600 + }, + { + "epoch": 16.02, + "learning_rate": 2.8827848101265826e-06, + "loss": 0.0118, + "step": 28625 + }, + { + "epoch": 16.02, + "learning_rate": 2.876455696202532e-06, + "loss": 0.0111, + "step": 28650 + }, + { + "epoch": 16.02, + "learning_rate": 2.8701265822784815e-06, + "loss": 0.008, + "step": 28675 + }, + { + "epoch": 16.02, + "learning_rate": 2.8637974683544305e-06, + "loss": 0.0103, + "step": 28700 + }, + { + "epoch": 16.02, + "learning_rate": 2.85746835443038e-06, + "loss": 0.0082, + "step": 28725 + }, + { + "epoch": 16.02, + "learning_rate": 2.8511392405063293e-06, + "loss": 0.0057, + "step": 28750 + }, + { + "epoch": 16.02, + "learning_rate": 2.8448101265822788e-06, + "loss": 0.0062, + "step": 28775 + }, + { + "epoch": 16.02, + "learning_rate": 2.8384810126582278e-06, + "loss": 0.0092, + "step": 28800 + }, + { + "epoch": 16.02, + "learning_rate": 2.832151898734177e-06, + "loss": 0.0084, + "step": 28825 + }, + { + "epoch": 16.02, + "learning_rate": 2.8258227848101266e-06, + "loss": 0.0099, + "step": 28850 + }, + { + "epoch": 16.02, + "learning_rate": 2.819493670886076e-06, + "loss": 0.0117, + "step": 28875 + }, + { + "epoch": 16.02, + "learning_rate": 2.8131645569620255e-06, + "loss": 0.0117, + "step": 28900 + }, + { + "epoch": 16.02, + "learning_rate": 2.8068354430379753e-06, + "loss": 0.0127, + "step": 28925 + }, + { + "epoch": 16.02, + "learning_rate": 2.8005063291139243e-06, + "loss": 0.0123, + "step": 28950 + }, + { + "epoch": 16.02, + "learning_rate": 2.7941772151898738e-06, + "loss": 0.0128, + "step": 28975 + }, + { + "epoch": 16.02, + "learning_rate": 2.787848101265823e-06, + "loss": 0.0073, + "step": 29000 + }, + { + "epoch": 16.02, + "eval_loss": 0.2685891091823578, + "eval_runtime": 1456.562, + "eval_samples_per_second": 7.148, + "eval_steps_per_second": 0.447, + "eval_wer": 23.278427291500677, + "step": 29000 + }, + { + "epoch": 16.03, + "learning_rate": 2.7815189873417726e-06, + "loss": 0.0068, + "step": 29025 + }, + { + "epoch": 16.03, + "learning_rate": 2.7751898734177216e-06, + "loss": 0.0087, + "step": 29050 + }, + { + "epoch": 16.03, + "learning_rate": 2.768860759493671e-06, + "loss": 0.0084, + "step": 29075 + }, + { + "epoch": 16.03, + "learning_rate": 2.7625316455696205e-06, + "loss": 0.0039, + "step": 29100 + }, + { + "epoch": 16.03, + "learning_rate": 2.75620253164557e-06, + "loss": 0.0066, + "step": 29125 + }, + { + "epoch": 16.03, + "learning_rate": 2.7498734177215193e-06, + "loss": 0.0108, + "step": 29150 + }, + { + "epoch": 16.03, + "learning_rate": 2.7435443037974683e-06, + "loss": 0.0052, + "step": 29175 + }, + { + "epoch": 16.03, + "learning_rate": 2.7372151898734178e-06, + "loss": 0.0061, + "step": 29200 + }, + { + "epoch": 16.03, + "learning_rate": 2.730886075949367e-06, + "loss": 0.0083, + "step": 29225 + }, + { + "epoch": 16.03, + "learning_rate": 2.7245569620253166e-06, + "loss": 0.0062, + "step": 29250 + }, + { + "epoch": 16.03, + "learning_rate": 2.7182278481012656e-06, + "loss": 0.0078, + "step": 29275 + }, + { + "epoch": 16.03, + "learning_rate": 2.711898734177215e-06, + "loss": 0.0127, + "step": 29300 + }, + { + "epoch": 16.03, + "learning_rate": 2.705569620253165e-06, + "loss": 0.0092, + "step": 29325 + }, + { + "epoch": 16.03, + "learning_rate": 2.6992405063291143e-06, + "loss": 0.0087, + "step": 29350 + }, + { + "epoch": 16.03, + "learning_rate": 2.6929113924050638e-06, + "loss": 0.0062, + "step": 29375 + }, + { + "epoch": 16.03, + "learning_rate": 2.686582278481013e-06, + "loss": 0.0065, + "step": 29400 + }, + { + "epoch": 16.04, + "learning_rate": 2.680253164556962e-06, + "loss": 0.0057, + "step": 29425 + }, + { + "epoch": 16.04, + "learning_rate": 2.6739240506329116e-06, + "loss": 0.0071, + "step": 29450 + }, + { + "epoch": 16.04, + "learning_rate": 2.667594936708861e-06, + "loss": 0.01, + "step": 29475 + }, + { + "epoch": 16.04, + "learning_rate": 2.6612658227848105e-06, + "loss": 0.009, + "step": 29500 + }, + { + "epoch": 16.04, + "learning_rate": 2.65493670886076e-06, + "loss": 0.0091, + "step": 29525 + }, + { + "epoch": 16.04, + "learning_rate": 2.648607594936709e-06, + "loss": 0.0104, + "step": 29550 + }, + { + "epoch": 16.04, + "learning_rate": 2.6422784810126583e-06, + "loss": 0.0071, + "step": 29575 + }, + { + "epoch": 16.04, + "learning_rate": 2.6359493670886078e-06, + "loss": 0.0081, + "step": 29600 + }, + { + "epoch": 16.04, + "learning_rate": 2.629620253164557e-06, + "loss": 0.008, + "step": 29625 + }, + { + "epoch": 16.04, + "learning_rate": 2.623291139240506e-06, + "loss": 0.0067, + "step": 29650 + }, + { + "epoch": 16.04, + "learning_rate": 2.6169620253164556e-06, + "loss": 0.0066, + "step": 29675 + }, + { + "epoch": 16.04, + "learning_rate": 2.610632911392405e-06, + "loss": 0.0121, + "step": 29700 + }, + { + "epoch": 16.04, + "learning_rate": 2.6043037974683545e-06, + "loss": 0.0075, + "step": 29725 + }, + { + "epoch": 16.04, + "learning_rate": 2.5979746835443043e-06, + "loss": 0.0111, + "step": 29750 + }, + { + "epoch": 17.0, + "learning_rate": 2.5916455696202538e-06, + "loss": 0.0095, + "step": 29775 + }, + { + "epoch": 17.0, + "learning_rate": 2.5853164556962028e-06, + "loss": 0.0077, + "step": 29800 + }, + { + "epoch": 17.0, + "learning_rate": 2.578987341772152e-06, + "loss": 0.0047, + "step": 29825 + }, + { + "epoch": 17.0, + "learning_rate": 2.5726582278481016e-06, + "loss": 0.0092, + "step": 29850 + }, + { + "epoch": 17.0, + "learning_rate": 2.566329113924051e-06, + "loss": 0.0191, + "step": 29875 + }, + { + "epoch": 17.0, + "learning_rate": 2.56e-06, + "loss": 0.0102, + "step": 29900 + }, + { + "epoch": 17.0, + "learning_rate": 2.5536708860759495e-06, + "loss": 0.0146, + "step": 29925 + }, + { + "epoch": 17.0, + "learning_rate": 2.547341772151899e-06, + "loss": 0.0096, + "step": 29950 + }, + { + "epoch": 17.01, + "learning_rate": 2.5410126582278483e-06, + "loss": 0.0159, + "step": 29975 + }, + { + "epoch": 17.01, + "learning_rate": 2.5346835443037978e-06, + "loss": 0.0094, + "step": 30000 + }, + { + "epoch": 17.01, + "eval_loss": 0.2636851966381073, + "eval_runtime": 1508.7196, + "eval_samples_per_second": 6.901, + "eval_steps_per_second": 0.431, + "eval_wer": 23.029642758418838, + "step": 30000 + }, + { + "epoch": 17.01, + "learning_rate": 2.5283544303797468e-06, + "loss": 0.0078, + "step": 30025 + }, + { + "epoch": 17.01, + "learning_rate": 2.522025316455696e-06, + "loss": 0.01, + "step": 30050 + }, + { + "epoch": 17.01, + "learning_rate": 2.5156962025316456e-06, + "loss": 0.0058, + "step": 30075 + }, + { + "epoch": 17.01, + "learning_rate": 2.509367088607595e-06, + "loss": 0.0092, + "step": 30100 + }, + { + "epoch": 17.01, + "learning_rate": 2.503037974683544e-06, + "loss": 0.0084, + "step": 30125 + }, + { + "epoch": 17.01, + "learning_rate": 2.496708860759494e-06, + "loss": 0.0069, + "step": 30150 + }, + { + "epoch": 17.01, + "learning_rate": 2.4903797468354433e-06, + "loss": 0.0059, + "step": 30175 + }, + { + "epoch": 17.01, + "learning_rate": 2.4840506329113923e-06, + "loss": 0.0097, + "step": 30200 + }, + { + "epoch": 17.01, + "learning_rate": 2.4777215189873418e-06, + "loss": 0.0126, + "step": 30225 + }, + { + "epoch": 17.01, + "learning_rate": 2.4713924050632916e-06, + "loss": 0.0096, + "step": 30250 + }, + { + "epoch": 17.01, + "learning_rate": 2.4650632911392406e-06, + "loss": 0.0094, + "step": 30275 + }, + { + "epoch": 17.01, + "learning_rate": 2.45873417721519e-06, + "loss": 0.0111, + "step": 30300 + }, + { + "epoch": 17.01, + "learning_rate": 2.4524050632911395e-06, + "loss": 0.0062, + "step": 30325 + }, + { + "epoch": 17.01, + "learning_rate": 2.446075949367089e-06, + "loss": 0.0078, + "step": 30350 + }, + { + "epoch": 17.02, + "learning_rate": 2.4397468354430383e-06, + "loss": 0.0043, + "step": 30375 + }, + { + "epoch": 17.02, + "learning_rate": 2.4336708860759494e-06, + "loss": 0.0102, + "step": 30400 + }, + { + "epoch": 17.02, + "learning_rate": 2.4273417721518988e-06, + "loss": 0.0101, + "step": 30425 + }, + { + "epoch": 17.02, + "learning_rate": 2.4210126582278482e-06, + "loss": 0.0066, + "step": 30450 + }, + { + "epoch": 17.02, + "learning_rate": 2.4146835443037976e-06, + "loss": 0.0078, + "step": 30475 + }, + { + "epoch": 17.02, + "learning_rate": 2.408354430379747e-06, + "loss": 0.0074, + "step": 30500 + }, + { + "epoch": 17.02, + "learning_rate": 2.4020253164556965e-06, + "loss": 0.0072, + "step": 30525 + }, + { + "epoch": 17.02, + "learning_rate": 2.395696202531646e-06, + "loss": 0.0109, + "step": 30550 + }, + { + "epoch": 17.02, + "learning_rate": 2.389367088607595e-06, + "loss": 0.0113, + "step": 30575 + }, + { + "epoch": 17.02, + "learning_rate": 2.3830379746835444e-06, + "loss": 0.0109, + "step": 30600 + }, + { + "epoch": 17.02, + "learning_rate": 2.3767088607594938e-06, + "loss": 0.0089, + "step": 30625 + }, + { + "epoch": 17.02, + "learning_rate": 2.3703797468354432e-06, + "loss": 0.0062, + "step": 30650 + }, + { + "epoch": 17.02, + "learning_rate": 2.3640506329113926e-06, + "loss": 0.0101, + "step": 30675 + }, + { + "epoch": 17.02, + "learning_rate": 2.357721518987342e-06, + "loss": 0.0096, + "step": 30700 + }, + { + "epoch": 17.02, + "learning_rate": 2.3513924050632915e-06, + "loss": 0.0093, + "step": 30725 + }, + { + "epoch": 17.02, + "learning_rate": 2.3450632911392405e-06, + "loss": 0.0113, + "step": 30750 + }, + { + "epoch": 17.03, + "learning_rate": 2.33873417721519e-06, + "loss": 0.0094, + "step": 30775 + }, + { + "epoch": 17.03, + "learning_rate": 2.3324050632911394e-06, + "loss": 0.0086, + "step": 30800 + }, + { + "epoch": 17.03, + "learning_rate": 2.3260759493670888e-06, + "loss": 0.0096, + "step": 30825 + }, + { + "epoch": 17.03, + "learning_rate": 2.319746835443038e-06, + "loss": 0.0086, + "step": 30850 + }, + { + "epoch": 17.03, + "learning_rate": 2.3134177215189876e-06, + "loss": 0.0064, + "step": 30875 + }, + { + "epoch": 17.03, + "learning_rate": 2.307088607594937e-06, + "loss": 0.0107, + "step": 30900 + }, + { + "epoch": 17.03, + "learning_rate": 2.3007594936708865e-06, + "loss": 0.0073, + "step": 30925 + }, + { + "epoch": 17.03, + "learning_rate": 2.2944303797468355e-06, + "loss": 0.005, + "step": 30950 + }, + { + "epoch": 17.03, + "learning_rate": 2.288101265822785e-06, + "loss": 0.0055, + "step": 30975 + }, + { + "epoch": 17.03, + "learning_rate": 2.2817721518987344e-06, + "loss": 0.006, + "step": 31000 + }, + { + "epoch": 17.03, + "eval_loss": 0.2709895372390747, + "eval_runtime": 1461.5544, + "eval_samples_per_second": 7.123, + "eval_steps_per_second": 0.445, + "eval_wer": 23.29468771849949, + "step": 31000 + }, + { + "epoch": 17.03, + "learning_rate": 2.2754430379746838e-06, + "loss": 0.0094, + "step": 31025 + }, + { + "epoch": 17.03, + "learning_rate": 2.2691139240506328e-06, + "loss": 0.0088, + "step": 31050 + }, + { + "epoch": 17.03, + "learning_rate": 2.2627848101265826e-06, + "loss": 0.0057, + "step": 31075 + }, + { + "epoch": 17.03, + "learning_rate": 2.256455696202532e-06, + "loss": 0.0066, + "step": 31100 + }, + { + "epoch": 17.03, + "learning_rate": 2.250126582278481e-06, + "loss": 0.008, + "step": 31125 + }, + { + "epoch": 17.03, + "learning_rate": 2.2437974683544305e-06, + "loss": 0.0048, + "step": 31150 + }, + { + "epoch": 17.04, + "learning_rate": 2.23746835443038e-06, + "loss": 0.0074, + "step": 31175 + }, + { + "epoch": 17.04, + "learning_rate": 2.2311392405063294e-06, + "loss": 0.0061, + "step": 31200 + }, + { + "epoch": 17.04, + "learning_rate": 2.2248101265822788e-06, + "loss": 0.0088, + "step": 31225 + }, + { + "epoch": 17.04, + "learning_rate": 2.2184810126582278e-06, + "loss": 0.0087, + "step": 31250 + }, + { + "epoch": 17.04, + "learning_rate": 2.2121518987341776e-06, + "loss": 0.0089, + "step": 31275 + }, + { + "epoch": 17.04, + "learning_rate": 2.2058227848101266e-06, + "loss": 0.0061, + "step": 31300 + }, + { + "epoch": 17.04, + "learning_rate": 2.199493670886076e-06, + "loss": 0.0097, + "step": 31325 + }, + { + "epoch": 17.04, + "learning_rate": 2.1931645569620255e-06, + "loss": 0.0063, + "step": 31350 + }, + { + "epoch": 17.04, + "learning_rate": 2.186835443037975e-06, + "loss": 0.0084, + "step": 31375 + }, + { + "epoch": 17.04, + "learning_rate": 2.1805063291139243e-06, + "loss": 0.0089, + "step": 31400 + }, + { + "epoch": 17.04, + "learning_rate": 2.1741772151898734e-06, + "loss": 0.0077, + "step": 31425 + }, + { + "epoch": 17.04, + "learning_rate": 2.1678481012658228e-06, + "loss": 0.0153, + "step": 31450 + }, + { + "epoch": 17.04, + "learning_rate": 2.1615189873417726e-06, + "loss": 0.0088, + "step": 31475 + }, + { + "epoch": 17.04, + "learning_rate": 2.1551898734177216e-06, + "loss": 0.0095, + "step": 31500 + }, + { + "epoch": 18.0, + "learning_rate": 2.148860759493671e-06, + "loss": 0.0084, + "step": 31525 + }, + { + "epoch": 18.0, + "learning_rate": 2.1425316455696205e-06, + "loss": 0.0054, + "step": 31550 + }, + { + "epoch": 18.0, + "learning_rate": 2.13620253164557e-06, + "loss": 0.013, + "step": 31575 + }, + { + "epoch": 18.0, + "learning_rate": 2.129873417721519e-06, + "loss": 0.0074, + "step": 31600 + }, + { + "epoch": 18.0, + "learning_rate": 2.1235443037974684e-06, + "loss": 0.0083, + "step": 31625 + }, + { + "epoch": 18.0, + "learning_rate": 2.1172151898734178e-06, + "loss": 0.0118, + "step": 31650 + }, + { + "epoch": 18.0, + "learning_rate": 2.110886075949367e-06, + "loss": 0.0095, + "step": 31675 + }, + { + "epoch": 18.0, + "learning_rate": 2.1045569620253166e-06, + "loss": 0.0075, + "step": 31700 + }, + { + "epoch": 18.01, + "learning_rate": 2.098227848101266e-06, + "loss": 0.0149, + "step": 31725 + }, + { + "epoch": 18.01, + "learning_rate": 2.0918987341772155e-06, + "loss": 0.0095, + "step": 31750 + }, + { + "epoch": 18.01, + "learning_rate": 2.085569620253165e-06, + "loss": 0.0098, + "step": 31775 + }, + { + "epoch": 18.01, + "learning_rate": 2.079240506329114e-06, + "loss": 0.0058, + "step": 31800 + }, + { + "epoch": 18.01, + "learning_rate": 2.0729113924050633e-06, + "loss": 0.0072, + "step": 31825 + }, + { + "epoch": 18.01, + "learning_rate": 2.0665822784810128e-06, + "loss": 0.0103, + "step": 31850 + }, + { + "epoch": 18.01, + "learning_rate": 2.060253164556962e-06, + "loss": 0.0085, + "step": 31875 + }, + { + "epoch": 18.01, + "learning_rate": 2.0539240506329116e-06, + "loss": 0.0064, + "step": 31900 + }, + { + "epoch": 18.01, + "learning_rate": 2.047594936708861e-06, + "loss": 0.0077, + "step": 31925 + }, + { + "epoch": 18.01, + "learning_rate": 2.0412658227848105e-06, + "loss": 0.0086, + "step": 31950 + }, + { + "epoch": 18.01, + "learning_rate": 2.0349367088607595e-06, + "loss": 0.0136, + "step": 31975 + }, + { + "epoch": 18.01, + "learning_rate": 2.028607594936709e-06, + "loss": 0.0085, + "step": 32000 + }, + { + "epoch": 18.01, + "eval_loss": 0.26488059759140015, + "eval_runtime": 1466.953, + "eval_samples_per_second": 7.097, + "eval_steps_per_second": 0.444, + "eval_wer": 23.089806338314446, + "step": 32000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.477037795917824e+20, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-32000/training_args.bin b/checkpoint-32000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-32000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-33000/config.json b/checkpoint-33000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-33000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-33000/generation_config.json b/checkpoint-33000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-33000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-33000/optimizer.pt b/checkpoint-33000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..664bc866d071465ff857f98d60a643a9d93fa952 --- /dev/null +++ b/checkpoint-33000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff3e8c242e053712c3e1b76c1b417cebb6ff0f8771d01bcb4e376f6ebae5dd26 +size 1934161093 diff --git a/checkpoint-33000/preprocessor_config.json b/checkpoint-33000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-33000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-33000/pytorch_model.bin b/checkpoint-33000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..fb12948d004ccb27ba177b1573dd67c841cc8fee --- /dev/null +++ b/checkpoint-33000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c08b65c3c977f8b45beb9e3773e28f59aca8d53a905194bf2eeece0bd6d5d311 +size 967102601 diff --git a/checkpoint-33000/rng_state.pth b/checkpoint-33000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..02a3ee92bf19fe3b2bf4a1fd7b3efa593b317608 --- /dev/null +++ b/checkpoint-33000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8baa45e71fac06e826b87e3d3de5dcbf9b9b2ab7b293ca0b37e5afd921b8ff0 +size 14639 diff --git a/checkpoint-33000/scaler.pt b/checkpoint-33000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f3ba32df925fa5cd1eda61993dfcb077336a3c00 --- /dev/null +++ b/checkpoint-33000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f3247035787340edc9d864411f6a02deebfe9b8832451670f4357d659f8e5bc +size 557 diff --git a/checkpoint-33000/scheduler.pt b/checkpoint-33000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..53d757000d4aeb084f4b6b8401f3b69d8fa3919c --- /dev/null +++ b/checkpoint-33000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b982dcf8d977faeb833c8a852300f701931f8ddd719b208d3393893a7afa42cf +size 627 diff --git a/checkpoint-33000/trainer_state.json b/checkpoint-33000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5adabaca61a3f07e5d77ee18968bf1ea7aa8bd12 --- /dev/null +++ b/checkpoint-33000/trainer_state.json @@ -0,0 +1,8233 @@ +{ + "best_metric": 23.029642758418838, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-30000", + "epoch": 18.03705, + "global_step": 33000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + }, + { + "epoch": 7.02, + "learning_rate": 6.830632911392405e-06, + "loss": 0.0159, + "step": 13025 + }, + { + "epoch": 7.02, + "learning_rate": 6.824303797468354e-06, + "loss": 0.0186, + "step": 13050 + }, + { + "epoch": 7.02, + "learning_rate": 6.817974683544304e-06, + "loss": 0.0246, + "step": 13075 + }, + { + "epoch": 7.02, + "learning_rate": 6.811645569620253e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 7.02, + "learning_rate": 6.805316455696203e-06, + "loss": 0.0287, + "step": 13125 + }, + { + "epoch": 7.02, + "learning_rate": 6.798987341772152e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 7.02, + "learning_rate": 6.792658227848102e-06, + "loss": 0.035, + "step": 13175 + }, + { + "epoch": 7.02, + "learning_rate": 6.786329113924052e-06, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 7.02, + "learning_rate": 6.780000000000001e-06, + "loss": 0.0283, + "step": 13225 + }, + { + "epoch": 7.02, + "learning_rate": 6.773670886075951e-06, + "loss": 0.0203, + "step": 13250 + }, + { + "epoch": 7.03, + "learning_rate": 6.7673417721519e-06, + "loss": 0.0193, + "step": 13275 + }, + { + "epoch": 7.03, + "learning_rate": 6.7610126582278494e-06, + "loss": 0.0174, + "step": 13300 + }, + { + "epoch": 7.03, + "learning_rate": 6.754683544303798e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 7.03, + "learning_rate": 6.7483544303797474e-06, + "loss": 0.023, + "step": 13350 + }, + { + "epoch": 7.03, + "learning_rate": 6.742025316455697e-06, + "loss": 0.0266, + "step": 13375 + }, + { + "epoch": 7.03, + "learning_rate": 6.735696202531646e-06, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 7.03, + "learning_rate": 6.729367088607596e-06, + "loss": 0.0164, + "step": 13425 + }, + { + "epoch": 7.03, + "learning_rate": 6.723037974683545e-06, + "loss": 0.019, + "step": 13450 + }, + { + "epoch": 7.03, + "learning_rate": 6.716708860759495e-06, + "loss": 0.0209, + "step": 13475 + }, + { + "epoch": 7.03, + "learning_rate": 6.710379746835444e-06, + "loss": 0.0241, + "step": 13500 + }, + { + "epoch": 7.03, + "learning_rate": 6.7040506329113934e-06, + "loss": 0.0233, + "step": 13525 + }, + { + "epoch": 7.03, + "learning_rate": 6.697721518987342e-06, + "loss": 0.0204, + "step": 13550 + }, + { + "epoch": 7.03, + "learning_rate": 6.6913924050632914e-06, + "loss": 0.0131, + "step": 13575 + }, + { + "epoch": 7.03, + "learning_rate": 6.685063291139241e-06, + "loss": 0.0268, + "step": 13600 + }, + { + "epoch": 7.03, + "learning_rate": 6.67873417721519e-06, + "loss": 0.0156, + "step": 13625 + }, + { + "epoch": 7.03, + "learning_rate": 6.67240506329114e-06, + "loss": 0.0205, + "step": 13650 + }, + { + "epoch": 7.04, + "learning_rate": 6.666075949367089e-06, + "loss": 0.0217, + "step": 13675 + }, + { + "epoch": 7.04, + "learning_rate": 6.659746835443039e-06, + "loss": 0.0258, + "step": 13700 + }, + { + "epoch": 7.04, + "learning_rate": 6.653417721518988e-06, + "loss": 0.0211, + "step": 13725 + }, + { + "epoch": 7.04, + "learning_rate": 6.647088607594937e-06, + "loss": 0.0188, + "step": 13750 + }, + { + "epoch": 7.04, + "learning_rate": 6.640759493670886e-06, + "loss": 0.0234, + "step": 13775 + }, + { + "epoch": 7.04, + "learning_rate": 6.6344303797468355e-06, + "loss": 0.0202, + "step": 13800 + }, + { + "epoch": 7.04, + "learning_rate": 6.628101265822785e-06, + "loss": 0.0251, + "step": 13825 + }, + { + "epoch": 7.04, + "learning_rate": 6.621772151898734e-06, + "loss": 0.017, + "step": 13850 + }, + { + "epoch": 7.04, + "learning_rate": 6.615443037974684e-06, + "loss": 0.0254, + "step": 13875 + }, + { + "epoch": 7.04, + "learning_rate": 6.609113924050633e-06, + "loss": 0.0289, + "step": 13900 + }, + { + "epoch": 7.04, + "learning_rate": 6.602784810126583e-06, + "loss": 0.0238, + "step": 13925 + }, + { + "epoch": 7.04, + "learning_rate": 6.596455696202532e-06, + "loss": 0.0218, + "step": 13950 + }, + { + "epoch": 7.04, + "learning_rate": 6.590126582278481e-06, + "loss": 0.0289, + "step": 13975 + }, + { + "epoch": 7.04, + "learning_rate": 6.583797468354432e-06, + "loss": 0.0271, + "step": 14000 + }, + { + "epoch": 7.04, + "eval_loss": 0.26684999465942383, + "eval_runtime": 1448.0228, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "eval_wer": 42.464105107400115, + "step": 14000 + }, + { + "epoch": 8.0, + "learning_rate": 6.577468354430381e-06, + "loss": 0.0292, + "step": 14025 + }, + { + "epoch": 8.0, + "learning_rate": 6.57113924050633e-06, + "loss": 0.0192, + "step": 14050 + }, + { + "epoch": 8.0, + "learning_rate": 6.564810126582279e-06, + "loss": 0.027, + "step": 14075 + }, + { + "epoch": 8.0, + "learning_rate": 6.558481012658229e-06, + "loss": 0.03, + "step": 14100 + }, + { + "epoch": 8.0, + "learning_rate": 6.552151898734178e-06, + "loss": 0.0375, + "step": 14125 + }, + { + "epoch": 8.0, + "learning_rate": 6.5458227848101274e-06, + "loss": 0.0338, + "step": 14150 + }, + { + "epoch": 8.0, + "learning_rate": 6.539493670886077e-06, + "loss": 0.0305, + "step": 14175 + }, + { + "epoch": 8.0, + "learning_rate": 6.533164556962026e-06, + "loss": 0.0266, + "step": 14200 + }, + { + "epoch": 8.01, + "learning_rate": 6.526835443037976e-06, + "loss": 0.0378, + "step": 14225 + }, + { + "epoch": 8.01, + "learning_rate": 6.520506329113925e-06, + "loss": 0.0256, + "step": 14250 + }, + { + "epoch": 8.01, + "learning_rate": 6.514177215189874e-06, + "loss": 0.0203, + "step": 14275 + }, + { + "epoch": 8.01, + "learning_rate": 6.507848101265823e-06, + "loss": 0.0212, + "step": 14300 + }, + { + "epoch": 8.01, + "learning_rate": 6.501518987341773e-06, + "loss": 0.0204, + "step": 14325 + }, + { + "epoch": 8.01, + "learning_rate": 6.495189873417722e-06, + "loss": 0.0259, + "step": 14350 + }, + { + "epoch": 8.01, + "learning_rate": 6.4888607594936714e-06, + "loss": 0.0214, + "step": 14375 + }, + { + "epoch": 8.01, + "learning_rate": 6.482531645569621e-06, + "loss": 0.021, + "step": 14400 + }, + { + "epoch": 8.01, + "learning_rate": 6.47620253164557e-06, + "loss": 0.0241, + "step": 14425 + }, + { + "epoch": 8.01, + "learning_rate": 6.46987341772152e-06, + "loss": 0.0216, + "step": 14450 + }, + { + "epoch": 8.01, + "learning_rate": 6.463544303797469e-06, + "loss": 0.0209, + "step": 14475 + }, + { + "epoch": 8.01, + "learning_rate": 6.457215189873418e-06, + "loss": 0.0292, + "step": 14500 + }, + { + "epoch": 8.01, + "learning_rate": 6.450886075949367e-06, + "loss": 0.0286, + "step": 14525 + }, + { + "epoch": 8.01, + "learning_rate": 6.444556962025317e-06, + "loss": 0.0218, + "step": 14550 + }, + { + "epoch": 8.01, + "learning_rate": 6.438227848101266e-06, + "loss": 0.0209, + "step": 14575 + }, + { + "epoch": 8.01, + "learning_rate": 6.4318987341772154e-06, + "loss": 0.0244, + "step": 14600 + }, + { + "epoch": 8.02, + "learning_rate": 6.425569620253165e-06, + "loss": 0.0212, + "step": 14625 + }, + { + "epoch": 8.02, + "learning_rate": 6.419240506329114e-06, + "loss": 0.0155, + "step": 14650 + }, + { + "epoch": 8.02, + "learning_rate": 6.412911392405064e-06, + "loss": 0.0209, + "step": 14675 + }, + { + "epoch": 8.02, + "learning_rate": 6.406582278481013e-06, + "loss": 0.0203, + "step": 14700 + }, + { + "epoch": 8.02, + "learning_rate": 6.400253164556962e-06, + "loss": 0.0192, + "step": 14725 + }, + { + "epoch": 8.02, + "learning_rate": 6.393924050632911e-06, + "loss": 0.0216, + "step": 14750 + }, + { + "epoch": 8.02, + "learning_rate": 6.387594936708861e-06, + "loss": 0.012, + "step": 14775 + }, + { + "epoch": 8.02, + "learning_rate": 6.38126582278481e-06, + "loss": 0.0241, + "step": 14800 + }, + { + "epoch": 8.02, + "learning_rate": 6.37493670886076e-06, + "loss": 0.0192, + "step": 14825 + }, + { + "epoch": 8.02, + "learning_rate": 6.36860759493671e-06, + "loss": 0.0202, + "step": 14850 + }, + { + "epoch": 8.02, + "learning_rate": 6.362278481012659e-06, + "loss": 0.0257, + "step": 14875 + }, + { + "epoch": 8.02, + "learning_rate": 6.3559493670886086e-06, + "loss": 0.0218, + "step": 14900 + }, + { + "epoch": 8.02, + "learning_rate": 6.349620253164558e-06, + "loss": 0.0232, + "step": 14925 + }, + { + "epoch": 8.02, + "learning_rate": 6.3432911392405074e-06, + "loss": 0.03, + "step": 14950 + }, + { + "epoch": 8.02, + "learning_rate": 6.337215189873418e-06, + "loss": 0.0252, + "step": 14975 + }, + { + "epoch": 8.02, + "learning_rate": 6.3308860759493675e-06, + "loss": 0.0176, + "step": 15000 + }, + { + "epoch": 8.02, + "eval_loss": 0.2596448063850403, + "eval_runtime": 1417.6481, + "eval_samples_per_second": 7.344, + "eval_steps_per_second": 0.459, + "eval_wer": 28.52241499861786, + "step": 15000 + }, + { + "epoch": 8.03, + "learning_rate": 6.324556962025317e-06, + "loss": 0.0166, + "step": 15025 + }, + { + "epoch": 8.03, + "learning_rate": 6.318227848101266e-06, + "loss": 0.017, + "step": 15050 + }, + { + "epoch": 8.03, + "learning_rate": 6.311898734177216e-06, + "loss": 0.0291, + "step": 15075 + }, + { + "epoch": 8.03, + "learning_rate": 6.305569620253164e-06, + "loss": 0.0149, + "step": 15100 + }, + { + "epoch": 8.03, + "learning_rate": 6.299240506329114e-06, + "loss": 0.0223, + "step": 15125 + }, + { + "epoch": 8.03, + "learning_rate": 6.292911392405063e-06, + "loss": 0.0212, + "step": 15150 + }, + { + "epoch": 8.03, + "learning_rate": 6.286582278481013e-06, + "loss": 0.0125, + "step": 15175 + }, + { + "epoch": 8.03, + "learning_rate": 6.280253164556962e-06, + "loss": 0.0169, + "step": 15200 + }, + { + "epoch": 8.03, + "learning_rate": 6.273924050632912e-06, + "loss": 0.0244, + "step": 15225 + }, + { + "epoch": 8.03, + "learning_rate": 6.267594936708862e-06, + "loss": 0.015, + "step": 15250 + }, + { + "epoch": 8.03, + "learning_rate": 6.261265822784811e-06, + "loss": 0.0171, + "step": 15275 + }, + { + "epoch": 8.03, + "learning_rate": 6.254936708860761e-06, + "loss": 0.017, + "step": 15300 + }, + { + "epoch": 8.03, + "learning_rate": 6.24860759493671e-06, + "loss": 0.0162, + "step": 15325 + }, + { + "epoch": 8.03, + "learning_rate": 6.2422784810126594e-06, + "loss": 0.0133, + "step": 15350 + }, + { + "epoch": 8.03, + "learning_rate": 6.235949367088608e-06, + "loss": 0.0139, + "step": 15375 + }, + { + "epoch": 8.03, + "learning_rate": 6.2296202531645575e-06, + "loss": 0.0177, + "step": 15400 + }, + { + "epoch": 8.04, + "learning_rate": 6.223291139240507e-06, + "loss": 0.0207, + "step": 15425 + }, + { + "epoch": 8.04, + "learning_rate": 6.216962025316456e-06, + "loss": 0.0192, + "step": 15450 + }, + { + "epoch": 8.04, + "learning_rate": 6.210632911392406e-06, + "loss": 0.0176, + "step": 15475 + }, + { + "epoch": 8.04, + "learning_rate": 6.204303797468355e-06, + "loss": 0.0133, + "step": 15500 + }, + { + "epoch": 8.04, + "learning_rate": 6.197974683544305e-06, + "loss": 0.0199, + "step": 15525 + }, + { + "epoch": 8.04, + "learning_rate": 6.191645569620254e-06, + "loss": 0.0135, + "step": 15550 + }, + { + "epoch": 8.04, + "learning_rate": 6.1853164556962035e-06, + "loss": 0.0121, + "step": 15575 + }, + { + "epoch": 8.04, + "learning_rate": 6.178987341772152e-06, + "loss": 0.0152, + "step": 15600 + }, + { + "epoch": 8.04, + "learning_rate": 6.1726582278481015e-06, + "loss": 0.0201, + "step": 15625 + }, + { + "epoch": 8.04, + "learning_rate": 6.166329113924051e-06, + "loss": 0.0226, + "step": 15650 + }, + { + "epoch": 8.04, + "learning_rate": 6.16e-06, + "loss": 0.0222, + "step": 15675 + }, + { + "epoch": 8.04, + "learning_rate": 6.15367088607595e-06, + "loss": 0.0184, + "step": 15700 + }, + { + "epoch": 8.04, + "learning_rate": 6.147341772151899e-06, + "loss": 0.0219, + "step": 15725 + }, + { + "epoch": 8.04, + "learning_rate": 6.141012658227849e-06, + "loss": 0.0219, + "step": 15750 + }, + { + "epoch": 9.0, + "learning_rate": 6.134683544303798e-06, + "loss": 0.0208, + "step": 15775 + }, + { + "epoch": 9.0, + "learning_rate": 6.1283544303797475e-06, + "loss": 0.0143, + "step": 15800 + }, + { + "epoch": 9.0, + "learning_rate": 6.122025316455696e-06, + "loss": 0.0254, + "step": 15825 + }, + { + "epoch": 9.0, + "learning_rate": 6.1156962025316455e-06, + "loss": 0.0287, + "step": 15850 + }, + { + "epoch": 9.0, + "learning_rate": 6.109367088607595e-06, + "loss": 0.0252, + "step": 15875 + }, + { + "epoch": 9.0, + "learning_rate": 6.103037974683544e-06, + "loss": 0.0275, + "step": 15900 + }, + { + "epoch": 9.0, + "learning_rate": 6.096708860759494e-06, + "loss": 0.0193, + "step": 15925 + }, + { + "epoch": 9.0, + "learning_rate": 6.090379746835443e-06, + "loss": 0.0255, + "step": 15950 + }, + { + "epoch": 9.01, + "learning_rate": 6.084050632911393e-06, + "loss": 0.0248, + "step": 15975 + }, + { + "epoch": 9.01, + "learning_rate": 6.077721518987342e-06, + "loss": 0.0328, + "step": 16000 + }, + { + "epoch": 9.01, + "eval_loss": 0.2512108087539673, + "eval_runtime": 1474.2883, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.442, + "eval_wer": 35.66236849379665, + "step": 16000 + }, + { + "epoch": 9.01, + "learning_rate": 6.0713924050632915e-06, + "loss": 0.0181, + "step": 16025 + }, + { + "epoch": 9.01, + "learning_rate": 6.065063291139242e-06, + "loss": 0.0134, + "step": 16050 + }, + { + "epoch": 9.01, + "learning_rate": 6.058734177215191e-06, + "loss": 0.0203, + "step": 16075 + }, + { + "epoch": 9.01, + "learning_rate": 6.052405063291141e-06, + "loss": 0.0224, + "step": 16100 + }, + { + "epoch": 9.01, + "learning_rate": 6.046075949367089e-06, + "loss": 0.0164, + "step": 16125 + }, + { + "epoch": 9.01, + "learning_rate": 6.039746835443039e-06, + "loss": 0.019, + "step": 16150 + }, + { + "epoch": 9.01, + "learning_rate": 6.033417721518988e-06, + "loss": 0.0152, + "step": 16175 + }, + { + "epoch": 9.01, + "learning_rate": 6.0270886075949374e-06, + "loss": 0.0197, + "step": 16200 + }, + { + "epoch": 9.01, + "learning_rate": 6.020759493670887e-06, + "loss": 0.0188, + "step": 16225 + }, + { + "epoch": 9.01, + "learning_rate": 6.014430379746836e-06, + "loss": 0.0245, + "step": 16250 + }, + { + "epoch": 9.01, + "learning_rate": 6.008101265822786e-06, + "loss": 0.0202, + "step": 16275 + }, + { + "epoch": 9.01, + "learning_rate": 6.001772151898735e-06, + "loss": 0.0184, + "step": 16300 + }, + { + "epoch": 9.01, + "learning_rate": 5.995443037974685e-06, + "loss": 0.0158, + "step": 16325 + }, + { + "epoch": 9.01, + "learning_rate": 5.989113924050633e-06, + "loss": 0.0175, + "step": 16350 + }, + { + "epoch": 9.02, + "learning_rate": 5.982784810126583e-06, + "loss": 0.014, + "step": 16375 + }, + { + "epoch": 9.02, + "learning_rate": 5.976455696202532e-06, + "loss": 0.0149, + "step": 16400 + }, + { + "epoch": 9.02, + "learning_rate": 5.9701265822784815e-06, + "loss": 0.0188, + "step": 16425 + }, + { + "epoch": 9.02, + "learning_rate": 5.963797468354431e-06, + "loss": 0.0215, + "step": 16450 + }, + { + "epoch": 9.02, + "learning_rate": 5.95746835443038e-06, + "loss": 0.017, + "step": 16475 + }, + { + "epoch": 9.02, + "learning_rate": 5.95113924050633e-06, + "loss": 0.0095, + "step": 16500 + }, + { + "epoch": 9.02, + "learning_rate": 5.944810126582279e-06, + "loss": 0.0112, + "step": 16525 + }, + { + "epoch": 9.02, + "learning_rate": 5.938481012658229e-06, + "loss": 0.0198, + "step": 16550 + }, + { + "epoch": 9.02, + "learning_rate": 5.932151898734177e-06, + "loss": 0.0252, + "step": 16575 + }, + { + "epoch": 9.02, + "learning_rate": 5.925822784810127e-06, + "loss": 0.0226, + "step": 16600 + }, + { + "epoch": 9.02, + "learning_rate": 5.919493670886076e-06, + "loss": 0.0167, + "step": 16625 + }, + { + "epoch": 9.02, + "learning_rate": 5.9131645569620255e-06, + "loss": 0.0259, + "step": 16650 + }, + { + "epoch": 9.02, + "learning_rate": 5.906835443037975e-06, + "loss": 0.0229, + "step": 16675 + }, + { + "epoch": 9.02, + "learning_rate": 5.900506329113924e-06, + "loss": 0.0238, + "step": 16700 + }, + { + "epoch": 9.02, + "learning_rate": 5.894177215189874e-06, + "loss": 0.02, + "step": 16725 + }, + { + "epoch": 9.02, + "learning_rate": 5.887848101265823e-06, + "loss": 0.0182, + "step": 16750 + }, + { + "epoch": 9.03, + "learning_rate": 5.881518987341773e-06, + "loss": 0.0157, + "step": 16775 + }, + { + "epoch": 9.03, + "learning_rate": 5.875189873417721e-06, + "loss": 0.0145, + "step": 16800 + }, + { + "epoch": 9.03, + "learning_rate": 5.868860759493671e-06, + "loss": 0.0166, + "step": 16825 + }, + { + "epoch": 9.03, + "learning_rate": 5.86253164556962e-06, + "loss": 0.0108, + "step": 16850 + }, + { + "epoch": 9.03, + "learning_rate": 5.85620253164557e-06, + "loss": 0.014, + "step": 16875 + }, + { + "epoch": 9.03, + "learning_rate": 5.84987341772152e-06, + "loss": 0.0167, + "step": 16900 + }, + { + "epoch": 9.03, + "learning_rate": 5.843544303797469e-06, + "loss": 0.0121, + "step": 16925 + }, + { + "epoch": 9.03, + "learning_rate": 5.837215189873419e-06, + "loss": 0.0149, + "step": 16950 + }, + { + "epoch": 9.03, + "learning_rate": 5.831139240506329e-06, + "loss": 0.0143, + "step": 16975 + }, + { + "epoch": 9.03, + "learning_rate": 5.824810126582279e-06, + "loss": 0.0139, + "step": 17000 + }, + { + "epoch": 9.03, + "eval_loss": 0.2688957154750824, + "eval_runtime": 1423.1196, + "eval_samples_per_second": 7.316, + "eval_steps_per_second": 0.457, + "eval_wer": 25.920746678807788, + "step": 17000 + }, + { + "epoch": 9.03, + "learning_rate": 5.818481012658228e-06, + "loss": 0.0169, + "step": 17025 + }, + { + "epoch": 9.03, + "learning_rate": 5.8121518987341775e-06, + "loss": 0.013, + "step": 17050 + }, + { + "epoch": 9.03, + "learning_rate": 5.805822784810127e-06, + "loss": 0.0123, + "step": 17075 + }, + { + "epoch": 9.03, + "learning_rate": 5.799493670886076e-06, + "loss": 0.0161, + "step": 17100 + }, + { + "epoch": 9.03, + "learning_rate": 5.793164556962026e-06, + "loss": 0.0114, + "step": 17125 + }, + { + "epoch": 9.03, + "learning_rate": 5.786835443037974e-06, + "loss": 0.0194, + "step": 17150 + }, + { + "epoch": 9.04, + "learning_rate": 5.780506329113924e-06, + "loss": 0.0179, + "step": 17175 + }, + { + "epoch": 9.04, + "learning_rate": 5.774177215189873e-06, + "loss": 0.0165, + "step": 17200 + }, + { + "epoch": 9.04, + "learning_rate": 5.767848101265823e-06, + "loss": 0.0165, + "step": 17225 + }, + { + "epoch": 9.04, + "learning_rate": 5.761518987341773e-06, + "loss": 0.0125, + "step": 17250 + }, + { + "epoch": 9.04, + "learning_rate": 5.755189873417722e-06, + "loss": 0.0172, + "step": 17275 + }, + { + "epoch": 9.04, + "learning_rate": 5.748860759493672e-06, + "loss": 0.0158, + "step": 17300 + }, + { + "epoch": 9.04, + "learning_rate": 5.742531645569621e-06, + "loss": 0.0174, + "step": 17325 + }, + { + "epoch": 9.04, + "learning_rate": 5.736202531645571e-06, + "loss": 0.0162, + "step": 17350 + }, + { + "epoch": 9.04, + "learning_rate": 5.72987341772152e-06, + "loss": 0.0203, + "step": 17375 + }, + { + "epoch": 9.04, + "learning_rate": 5.7235443037974695e-06, + "loss": 0.0157, + "step": 17400 + }, + { + "epoch": 9.04, + "learning_rate": 5.717215189873419e-06, + "loss": 0.018, + "step": 17425 + }, + { + "epoch": 9.04, + "learning_rate": 5.7108860759493675e-06, + "loss": 0.0199, + "step": 17450 + }, + { + "epoch": 9.04, + "learning_rate": 5.704556962025317e-06, + "loss": 0.0153, + "step": 17475 + }, + { + "epoch": 9.04, + "learning_rate": 5.698227848101266e-06, + "loss": 0.0234, + "step": 17500 + }, + { + "epoch": 10.0, + "learning_rate": 5.691898734177216e-06, + "loss": 0.0168, + "step": 17525 + }, + { + "epoch": 10.0, + "learning_rate": 5.685569620253165e-06, + "loss": 0.0214, + "step": 17550 + }, + { + "epoch": 10.0, + "learning_rate": 5.679240506329115e-06, + "loss": 0.0185, + "step": 17575 + }, + { + "epoch": 10.0, + "learning_rate": 5.672911392405064e-06, + "loss": 0.0228, + "step": 17600 + }, + { + "epoch": 10.0, + "learning_rate": 5.6665822784810135e-06, + "loss": 0.024, + "step": 17625 + }, + { + "epoch": 10.0, + "learning_rate": 5.660253164556963e-06, + "loss": 0.0235, + "step": 17650 + }, + { + "epoch": 10.0, + "learning_rate": 5.6539240506329115e-06, + "loss": 0.0262, + "step": 17675 + }, + { + "epoch": 10.0, + "learning_rate": 5.647594936708861e-06, + "loss": 0.0214, + "step": 17700 + }, + { + "epoch": 10.01, + "learning_rate": 5.64126582278481e-06, + "loss": 0.0279, + "step": 17725 + }, + { + "epoch": 10.01, + "learning_rate": 5.63493670886076e-06, + "loss": 0.0248, + "step": 17750 + }, + { + "epoch": 10.01, + "learning_rate": 5.628607594936709e-06, + "loss": 0.0155, + "step": 17775 + }, + { + "epoch": 10.01, + "learning_rate": 5.622278481012659e-06, + "loss": 0.017, + "step": 17800 + }, + { + "epoch": 10.01, + "learning_rate": 5.615949367088608e-06, + "loss": 0.0205, + "step": 17825 + }, + { + "epoch": 10.01, + "learning_rate": 5.6096202531645575e-06, + "loss": 0.021, + "step": 17850 + }, + { + "epoch": 10.01, + "learning_rate": 5.603291139240507e-06, + "loss": 0.0178, + "step": 17875 + }, + { + "epoch": 10.01, + "learning_rate": 5.5969620253164555e-06, + "loss": 0.0169, + "step": 17900 + }, + { + "epoch": 10.01, + "learning_rate": 5.590632911392405e-06, + "loss": 0.0158, + "step": 17925 + }, + { + "epoch": 10.01, + "learning_rate": 5.584303797468354e-06, + "loss": 0.0195, + "step": 17950 + }, + { + "epoch": 10.01, + "learning_rate": 5.577974683544304e-06, + "loss": 0.0195, + "step": 17975 + }, + { + "epoch": 10.01, + "learning_rate": 5.571645569620253e-06, + "loss": 0.0187, + "step": 18000 + }, + { + "epoch": 10.01, + "eval_loss": 0.2605174481868744, + "eval_runtime": 1553.7854, + "eval_samples_per_second": 6.7, + "eval_steps_per_second": 0.419, + "eval_wer": 25.330493178750874, + "step": 18000 + }, + { + "epoch": 10.01, + "learning_rate": 5.565316455696203e-06, + "loss": 0.0214, + "step": 18025 + }, + { + "epoch": 10.01, + "learning_rate": 5.558987341772152e-06, + "loss": 0.0193, + "step": 18050 + }, + { + "epoch": 10.01, + "learning_rate": 5.552658227848102e-06, + "loss": 0.0168, + "step": 18075 + }, + { + "epoch": 10.01, + "learning_rate": 5.546329113924052e-06, + "loss": 0.0147, + "step": 18100 + }, + { + "epoch": 10.02, + "learning_rate": 5.540000000000001e-06, + "loss": 0.0192, + "step": 18125 + }, + { + "epoch": 10.02, + "learning_rate": 5.533670886075951e-06, + "loss": 0.014, + "step": 18150 + }, + { + "epoch": 10.02, + "learning_rate": 5.527341772151899e-06, + "loss": 0.0163, + "step": 18175 + }, + { + "epoch": 10.02, + "learning_rate": 5.521012658227849e-06, + "loss": 0.0139, + "step": 18200 + }, + { + "epoch": 10.02, + "learning_rate": 5.514683544303798e-06, + "loss": 0.0168, + "step": 18225 + }, + { + "epoch": 10.02, + "learning_rate": 5.5083544303797475e-06, + "loss": 0.0118, + "step": 18250 + }, + { + "epoch": 10.02, + "learning_rate": 5.502025316455697e-06, + "loss": 0.0119, + "step": 18275 + }, + { + "epoch": 10.02, + "learning_rate": 5.495696202531646e-06, + "loss": 0.0136, + "step": 18300 + }, + { + "epoch": 10.02, + "learning_rate": 5.489367088607596e-06, + "loss": 0.0132, + "step": 18325 + }, + { + "epoch": 10.02, + "learning_rate": 5.483037974683545e-06, + "loss": 0.0148, + "step": 18350 + }, + { + "epoch": 10.02, + "learning_rate": 5.476708860759495e-06, + "loss": 0.015, + "step": 18375 + }, + { + "epoch": 10.02, + "learning_rate": 5.470379746835443e-06, + "loss": 0.0154, + "step": 18400 + }, + { + "epoch": 10.02, + "learning_rate": 5.464050632911393e-06, + "loss": 0.0139, + "step": 18425 + }, + { + "epoch": 10.02, + "learning_rate": 5.457721518987342e-06, + "loss": 0.0239, + "step": 18450 + }, + { + "epoch": 10.02, + "learning_rate": 5.4513924050632915e-06, + "loss": 0.0203, + "step": 18475 + }, + { + "epoch": 10.02, + "learning_rate": 5.445063291139241e-06, + "loss": 0.0199, + "step": 18500 + }, + { + "epoch": 10.03, + "learning_rate": 5.43873417721519e-06, + "loss": 0.013, + "step": 18525 + }, + { + "epoch": 10.03, + "learning_rate": 5.43240506329114e-06, + "loss": 0.0128, + "step": 18550 + }, + { + "epoch": 10.03, + "learning_rate": 5.426075949367089e-06, + "loss": 0.0161, + "step": 18575 + }, + { + "epoch": 10.03, + "learning_rate": 5.419746835443039e-06, + "loss": 0.0129, + "step": 18600 + }, + { + "epoch": 10.03, + "learning_rate": 5.413417721518987e-06, + "loss": 0.0124, + "step": 18625 + }, + { + "epoch": 10.03, + "learning_rate": 5.407088607594937e-06, + "loss": 0.0161, + "step": 18650 + }, + { + "epoch": 10.03, + "learning_rate": 5.400759493670886e-06, + "loss": 0.0104, + "step": 18675 + }, + { + "epoch": 10.03, + "learning_rate": 5.3944303797468355e-06, + "loss": 0.0109, + "step": 18700 + }, + { + "epoch": 10.03, + "learning_rate": 5.388101265822785e-06, + "loss": 0.0128, + "step": 18725 + }, + { + "epoch": 10.03, + "learning_rate": 5.381772151898734e-06, + "loss": 0.014, + "step": 18750 + }, + { + "epoch": 10.03, + "learning_rate": 5.375443037974684e-06, + "loss": 0.0159, + "step": 18775 + }, + { + "epoch": 10.03, + "learning_rate": 5.369113924050633e-06, + "loss": 0.017, + "step": 18800 + }, + { + "epoch": 10.03, + "learning_rate": 5.362784810126583e-06, + "loss": 0.012, + "step": 18825 + }, + { + "epoch": 10.03, + "learning_rate": 5.356455696202531e-06, + "loss": 0.0165, + "step": 18850 + }, + { + "epoch": 10.03, + "learning_rate": 5.350126582278481e-06, + "loss": 0.0153, + "step": 18875 + }, + { + "epoch": 10.03, + "learning_rate": 5.343797468354432e-06, + "loss": 0.0182, + "step": 18900 + }, + { + "epoch": 10.04, + "learning_rate": 5.33746835443038e-06, + "loss": 0.0194, + "step": 18925 + }, + { + "epoch": 10.04, + "learning_rate": 5.33113924050633e-06, + "loss": 0.0144, + "step": 18950 + }, + { + "epoch": 10.04, + "learning_rate": 5.324810126582279e-06, + "loss": 0.0157, + "step": 18975 + }, + { + "epoch": 10.04, + "learning_rate": 5.318481012658229e-06, + "loss": 0.0111, + "step": 19000 + }, + { + "epoch": 10.04, + "eval_loss": 0.2696678638458252, + "eval_runtime": 1463.5986, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.445, + "eval_wer": 24.79877721588969, + "step": 19000 + }, + { + "epoch": 10.04, + "learning_rate": 5.312151898734178e-06, + "loss": 0.0176, + "step": 19025 + }, + { + "epoch": 10.04, + "learning_rate": 5.3058227848101275e-06, + "loss": 0.0144, + "step": 19050 + }, + { + "epoch": 10.04, + "learning_rate": 5.299493670886077e-06, + "loss": 0.0129, + "step": 19075 + }, + { + "epoch": 10.04, + "learning_rate": 5.293164556962026e-06, + "loss": 0.0162, + "step": 19100 + }, + { + "epoch": 10.04, + "learning_rate": 5.286835443037976e-06, + "loss": 0.0128, + "step": 19125 + }, + { + "epoch": 10.04, + "learning_rate": 5.280506329113924e-06, + "loss": 0.0152, + "step": 19150 + }, + { + "epoch": 10.04, + "learning_rate": 5.274177215189874e-06, + "loss": 0.0183, + "step": 19175 + }, + { + "epoch": 10.04, + "learning_rate": 5.267848101265823e-06, + "loss": 0.0198, + "step": 19200 + }, + { + "epoch": 10.04, + "learning_rate": 5.261518987341773e-06, + "loss": 0.0179, + "step": 19225 + }, + { + "epoch": 10.04, + "learning_rate": 5.255443037974683e-06, + "loss": 0.019, + "step": 19250 + }, + { + "epoch": 11.0, + "learning_rate": 5.249113924050633e-06, + "loss": 0.0159, + "step": 19275 + }, + { + "epoch": 11.0, + "learning_rate": 5.242784810126583e-06, + "loss": 0.0156, + "step": 19300 + }, + { + "epoch": 11.0, + "learning_rate": 5.236455696202532e-06, + "loss": 0.015, + "step": 19325 + }, + { + "epoch": 11.0, + "learning_rate": 5.230126582278482e-06, + "loss": 0.0162, + "step": 19350 + }, + { + "epoch": 11.0, + "learning_rate": 5.223797468354431e-06, + "loss": 0.0262, + "step": 19375 + }, + { + "epoch": 11.0, + "learning_rate": 5.217468354430381e-06, + "loss": 0.0207, + "step": 19400 + }, + { + "epoch": 11.0, + "learning_rate": 5.21113924050633e-06, + "loss": 0.0222, + "step": 19425 + }, + { + "epoch": 11.0, + "learning_rate": 5.2048101265822795e-06, + "loss": 0.016, + "step": 19450 + }, + { + "epoch": 11.01, + "learning_rate": 5.198481012658229e-06, + "loss": 0.0176, + "step": 19475 + }, + { + "epoch": 11.01, + "learning_rate": 5.192151898734178e-06, + "loss": 0.0209, + "step": 19500 + }, + { + "epoch": 11.01, + "learning_rate": 5.185822784810127e-06, + "loss": 0.0212, + "step": 19525 + }, + { + "epoch": 11.01, + "learning_rate": 5.179493670886076e-06, + "loss": 0.0143, + "step": 19550 + }, + { + "epoch": 11.01, + "learning_rate": 5.173164556962026e-06, + "loss": 0.0224, + "step": 19575 + }, + { + "epoch": 11.01, + "learning_rate": 5.166835443037975e-06, + "loss": 0.0171, + "step": 19600 + }, + { + "epoch": 11.01, + "learning_rate": 5.160506329113925e-06, + "loss": 0.0166, + "step": 19625 + }, + { + "epoch": 11.01, + "learning_rate": 5.154177215189874e-06, + "loss": 0.0135, + "step": 19650 + }, + { + "epoch": 11.01, + "learning_rate": 5.1478481012658235e-06, + "loss": 0.0164, + "step": 19675 + }, + { + "epoch": 11.01, + "learning_rate": 5.141518987341773e-06, + "loss": 0.0154, + "step": 19700 + }, + { + "epoch": 11.01, + "learning_rate": 5.135189873417722e-06, + "loss": 0.0183, + "step": 19725 + }, + { + "epoch": 11.01, + "learning_rate": 5.128860759493671e-06, + "loss": 0.0149, + "step": 19750 + }, + { + "epoch": 11.01, + "learning_rate": 5.12253164556962e-06, + "loss": 0.0176, + "step": 19775 + }, + { + "epoch": 11.01, + "learning_rate": 5.11620253164557e-06, + "loss": 0.0135, + "step": 19800 + }, + { + "epoch": 11.01, + "learning_rate": 5.109873417721519e-06, + "loss": 0.0121, + "step": 19825 + }, + { + "epoch": 11.01, + "learning_rate": 5.103544303797469e-06, + "loss": 0.0158, + "step": 19850 + }, + { + "epoch": 11.02, + "learning_rate": 5.097215189873418e-06, + "loss": 0.0191, + "step": 19875 + }, + { + "epoch": 11.02, + "learning_rate": 5.0908860759493675e-06, + "loss": 0.0127, + "step": 19900 + }, + { + "epoch": 11.02, + "learning_rate": 5.084556962025317e-06, + "loss": 0.0149, + "step": 19925 + }, + { + "epoch": 11.02, + "learning_rate": 5.0782278481012655e-06, + "loss": 0.0152, + "step": 19950 + }, + { + "epoch": 11.02, + "learning_rate": 5.071898734177215e-06, + "loss": 0.0139, + "step": 19975 + }, + { + "epoch": 11.02, + "learning_rate": 5.065569620253164e-06, + "loss": 0.015, + "step": 20000 + }, + { + "epoch": 11.02, + "eval_loss": 0.2623814344406128, + "eval_runtime": 1462.1621, + "eval_samples_per_second": 7.12, + "eval_steps_per_second": 0.445, + "eval_wer": 24.392266540919362, + "step": 20000 + }, + { + "epoch": 11.02, + "learning_rate": 5.059240506329114e-06, + "loss": 0.0094, + "step": 20025 + }, + { + "epoch": 11.02, + "learning_rate": 5.052911392405063e-06, + "loss": 0.0108, + "step": 20050 + }, + { + "epoch": 11.02, + "learning_rate": 5.046582278481013e-06, + "loss": 0.0155, + "step": 20075 + }, + { + "epoch": 11.02, + "learning_rate": 5.040253164556962e-06, + "loss": 0.0103, + "step": 20100 + }, + { + "epoch": 11.02, + "learning_rate": 5.033924050632912e-06, + "loss": 0.0109, + "step": 20125 + }, + { + "epoch": 11.02, + "learning_rate": 5.027594936708862e-06, + "loss": 0.0188, + "step": 20150 + }, + { + "epoch": 11.02, + "learning_rate": 5.021265822784811e-06, + "loss": 0.0178, + "step": 20175 + }, + { + "epoch": 11.02, + "learning_rate": 5.014936708860761e-06, + "loss": 0.0262, + "step": 20200 + }, + { + "epoch": 11.02, + "learning_rate": 5.00860759493671e-06, + "loss": 0.0194, + "step": 20225 + }, + { + "epoch": 11.02, + "learning_rate": 5.002278481012659e-06, + "loss": 0.0107, + "step": 20250 + }, + { + "epoch": 11.03, + "learning_rate": 4.995949367088608e-06, + "loss": 0.0147, + "step": 20275 + }, + { + "epoch": 11.03, + "learning_rate": 4.9896202531645575e-06, + "loss": 0.0084, + "step": 20300 + }, + { + "epoch": 11.03, + "learning_rate": 4.983291139240507e-06, + "loss": 0.0214, + "step": 20325 + }, + { + "epoch": 11.03, + "learning_rate": 4.976962025316456e-06, + "loss": 0.014, + "step": 20350 + }, + { + "epoch": 11.03, + "learning_rate": 4.970632911392406e-06, + "loss": 0.0129, + "step": 20375 + }, + { + "epoch": 11.03, + "learning_rate": 4.964303797468355e-06, + "loss": 0.0145, + "step": 20400 + }, + { + "epoch": 11.03, + "learning_rate": 4.957974683544305e-06, + "loss": 0.0109, + "step": 20425 + }, + { + "epoch": 11.03, + "learning_rate": 4.951645569620254e-06, + "loss": 0.0117, + "step": 20450 + }, + { + "epoch": 11.03, + "learning_rate": 4.945316455696203e-06, + "loss": 0.0137, + "step": 20475 + }, + { + "epoch": 11.03, + "learning_rate": 4.938987341772152e-06, + "loss": 0.0086, + "step": 20500 + }, + { + "epoch": 11.03, + "learning_rate": 4.9326582278481015e-06, + "loss": 0.0128, + "step": 20525 + }, + { + "epoch": 11.03, + "learning_rate": 4.926329113924051e-06, + "loss": 0.0101, + "step": 20550 + }, + { + "epoch": 11.03, + "learning_rate": 4.92e-06, + "loss": 0.0132, + "step": 20575 + }, + { + "epoch": 11.03, + "learning_rate": 4.91367088607595e-06, + "loss": 0.0147, + "step": 20600 + }, + { + "epoch": 11.03, + "learning_rate": 4.907341772151899e-06, + "loss": 0.0077, + "step": 20625 + }, + { + "epoch": 11.03, + "learning_rate": 4.901012658227849e-06, + "loss": 0.0125, + "step": 20650 + }, + { + "epoch": 11.04, + "learning_rate": 4.894683544303798e-06, + "loss": 0.0115, + "step": 20675 + }, + { + "epoch": 11.04, + "learning_rate": 4.888354430379747e-06, + "loss": 0.0136, + "step": 20700 + }, + { + "epoch": 11.04, + "learning_rate": 4.882025316455696e-06, + "loss": 0.0098, + "step": 20725 + }, + { + "epoch": 11.04, + "learning_rate": 4.875696202531646e-06, + "loss": 0.0154, + "step": 20750 + }, + { + "epoch": 11.04, + "learning_rate": 4.869367088607596e-06, + "loss": 0.0126, + "step": 20775 + }, + { + "epoch": 11.04, + "learning_rate": 4.863037974683545e-06, + "loss": 0.0116, + "step": 20800 + }, + { + "epoch": 11.04, + "learning_rate": 4.856708860759495e-06, + "loss": 0.0133, + "step": 20825 + }, + { + "epoch": 11.04, + "learning_rate": 4.850379746835443e-06, + "loss": 0.0106, + "step": 20850 + }, + { + "epoch": 11.04, + "learning_rate": 4.844050632911393e-06, + "loss": 0.0151, + "step": 20875 + }, + { + "epoch": 11.04, + "learning_rate": 4.837721518987342e-06, + "loss": 0.0148, + "step": 20900 + }, + { + "epoch": 11.04, + "learning_rate": 4.8313924050632915e-06, + "loss": 0.0173, + "step": 20925 + }, + { + "epoch": 11.04, + "learning_rate": 4.825063291139241e-06, + "loss": 0.0126, + "step": 20950 + }, + { + "epoch": 11.04, + "learning_rate": 4.81873417721519e-06, + "loss": 0.0163, + "step": 20975 + }, + { + "epoch": 11.04, + "learning_rate": 4.81240506329114e-06, + "loss": 0.0203, + "step": 21000 + }, + { + "epoch": 11.04, + "eval_loss": 0.27788785099983215, + "eval_runtime": 1546.852, + "eval_samples_per_second": 6.73, + "eval_steps_per_second": 0.421, + "eval_wer": 25.284963983154196, + "step": 21000 + }, + { + "epoch": 12.0, + "learning_rate": 4.806075949367089e-06, + "loss": 0.0176, + "step": 21025 + }, + { + "epoch": 12.0, + "learning_rate": 4.799746835443039e-06, + "loss": 0.0157, + "step": 21050 + }, + { + "epoch": 12.0, + "learning_rate": 4.793417721518987e-06, + "loss": 0.0192, + "step": 21075 + }, + { + "epoch": 12.0, + "learning_rate": 4.787088607594937e-06, + "loss": 0.0124, + "step": 21100 + }, + { + "epoch": 12.0, + "learning_rate": 4.780759493670886e-06, + "loss": 0.0194, + "step": 21125 + }, + { + "epoch": 12.0, + "learning_rate": 4.774430379746836e-06, + "loss": 0.0138, + "step": 21150 + }, + { + "epoch": 12.0, + "learning_rate": 4.768101265822786e-06, + "loss": 0.0191, + "step": 21175 + }, + { + "epoch": 12.0, + "learning_rate": 4.761772151898735e-06, + "loss": 0.0124, + "step": 21200 + }, + { + "epoch": 12.01, + "learning_rate": 4.755443037974684e-06, + "loss": 0.0175, + "step": 21225 + }, + { + "epoch": 12.01, + "learning_rate": 4.749113924050633e-06, + "loss": 0.0118, + "step": 21250 + }, + { + "epoch": 12.01, + "learning_rate": 4.742784810126583e-06, + "loss": 0.0172, + "step": 21275 + }, + { + "epoch": 12.01, + "learning_rate": 4.736455696202532e-06, + "loss": 0.0136, + "step": 21300 + }, + { + "epoch": 12.01, + "learning_rate": 4.7301265822784815e-06, + "loss": 0.018, + "step": 21325 + }, + { + "epoch": 12.01, + "learning_rate": 4.723797468354431e-06, + "loss": 0.0184, + "step": 21350 + }, + { + "epoch": 12.01, + "learning_rate": 4.71746835443038e-06, + "loss": 0.0166, + "step": 21375 + }, + { + "epoch": 12.01, + "learning_rate": 4.71113924050633e-06, + "loss": 0.0107, + "step": 21400 + }, + { + "epoch": 12.01, + "learning_rate": 4.704810126582279e-06, + "loss": 0.0125, + "step": 21425 + }, + { + "epoch": 12.01, + "learning_rate": 4.698481012658228e-06, + "loss": 0.0098, + "step": 21450 + }, + { + "epoch": 12.01, + "learning_rate": 4.692151898734177e-06, + "loss": 0.0103, + "step": 21475 + }, + { + "epoch": 12.01, + "learning_rate": 4.685822784810127e-06, + "loss": 0.0211, + "step": 21500 + }, + { + "epoch": 12.01, + "learning_rate": 4.679493670886076e-06, + "loss": 0.0125, + "step": 21525 + }, + { + "epoch": 12.01, + "learning_rate": 4.6731645569620255e-06, + "loss": 0.0083, + "step": 21550 + }, + { + "epoch": 12.01, + "learning_rate": 4.666835443037975e-06, + "loss": 0.0156, + "step": 21575 + }, + { + "epoch": 12.01, + "learning_rate": 4.660506329113924e-06, + "loss": 0.0137, + "step": 21600 + }, + { + "epoch": 12.02, + "learning_rate": 4.654177215189874e-06, + "loss": 0.0137, + "step": 21625 + }, + { + "epoch": 12.02, + "learning_rate": 4.647848101265823e-06, + "loss": 0.0092, + "step": 21650 + }, + { + "epoch": 12.02, + "learning_rate": 4.641518987341773e-06, + "loss": 0.0101, + "step": 21675 + }, + { + "epoch": 12.02, + "learning_rate": 4.635189873417722e-06, + "loss": 0.0118, + "step": 21700 + }, + { + "epoch": 12.02, + "learning_rate": 4.6288607594936715e-06, + "loss": 0.0119, + "step": 21725 + }, + { + "epoch": 12.02, + "learning_rate": 4.622531645569621e-06, + "loss": 0.009, + "step": 21750 + }, + { + "epoch": 12.02, + "learning_rate": 4.61620253164557e-06, + "loss": 0.014, + "step": 21775 + }, + { + "epoch": 12.02, + "learning_rate": 4.609873417721519e-06, + "loss": 0.0108, + "step": 21800 + }, + { + "epoch": 12.02, + "learning_rate": 4.603544303797468e-06, + "loss": 0.0117, + "step": 21825 + }, + { + "epoch": 12.02, + "learning_rate": 4.597215189873418e-06, + "loss": 0.0147, + "step": 21850 + }, + { + "epoch": 12.02, + "learning_rate": 4.590886075949367e-06, + "loss": 0.0133, + "step": 21875 + }, + { + "epoch": 12.02, + "learning_rate": 4.584556962025317e-06, + "loss": 0.0131, + "step": 21900 + }, + { + "epoch": 12.02, + "learning_rate": 4.578227848101266e-06, + "loss": 0.0156, + "step": 21925 + }, + { + "epoch": 12.02, + "learning_rate": 4.5718987341772155e-06, + "loss": 0.0211, + "step": 21950 + }, + { + "epoch": 12.02, + "learning_rate": 4.565569620253165e-06, + "loss": 0.0127, + "step": 21975 + }, + { + "epoch": 12.02, + "learning_rate": 4.559240506329114e-06, + "loss": 0.011, + "step": 22000 + }, + { + "epoch": 12.02, + "eval_loss": 0.26152506470680237, + "eval_runtime": 1497.9019, + "eval_samples_per_second": 6.95, + "eval_steps_per_second": 0.435, + "eval_wer": 26.236198962584755, + "step": 22000 + }, + { + "epoch": 12.03, + "learning_rate": 4.552911392405064e-06, + "loss": 0.0164, + "step": 22025 + }, + { + "epoch": 12.03, + "learning_rate": 4.546582278481013e-06, + "loss": 0.01, + "step": 22050 + }, + { + "epoch": 12.03, + "learning_rate": 4.540253164556963e-06, + "loss": 0.0127, + "step": 22075 + }, + { + "epoch": 12.03, + "learning_rate": 4.533924050632912e-06, + "loss": 0.0089, + "step": 22100 + }, + { + "epoch": 12.03, + "learning_rate": 4.5275949367088615e-06, + "loss": 0.0077, + "step": 22125 + }, + { + "epoch": 12.03, + "learning_rate": 4.521265822784811e-06, + "loss": 0.0098, + "step": 22150 + }, + { + "epoch": 12.03, + "learning_rate": 4.5149367088607595e-06, + "loss": 0.0084, + "step": 22175 + }, + { + "epoch": 12.03, + "learning_rate": 4.508607594936709e-06, + "loss": 0.0096, + "step": 22200 + }, + { + "epoch": 12.03, + "learning_rate": 4.502278481012658e-06, + "loss": 0.0097, + "step": 22225 + }, + { + "epoch": 12.03, + "learning_rate": 4.495949367088608e-06, + "loss": 0.0114, + "step": 22250 + }, + { + "epoch": 12.03, + "learning_rate": 4.489620253164557e-06, + "loss": 0.0109, + "step": 22275 + }, + { + "epoch": 12.03, + "learning_rate": 4.483291139240507e-06, + "loss": 0.0096, + "step": 22300 + }, + { + "epoch": 12.03, + "learning_rate": 4.476962025316456e-06, + "loss": 0.0117, + "step": 22325 + }, + { + "epoch": 12.03, + "learning_rate": 4.4706329113924055e-06, + "loss": 0.0105, + "step": 22350 + }, + { + "epoch": 12.03, + "learning_rate": 4.464303797468355e-06, + "loss": 0.0103, + "step": 22375 + }, + { + "epoch": 12.03, + "learning_rate": 4.457974683544304e-06, + "loss": 0.0085, + "step": 22400 + }, + { + "epoch": 12.04, + "learning_rate": 4.451645569620254e-06, + "loss": 0.0129, + "step": 22425 + }, + { + "epoch": 12.04, + "learning_rate": 4.445316455696203e-06, + "loss": 0.0188, + "step": 22450 + }, + { + "epoch": 12.04, + "learning_rate": 4.438987341772153e-06, + "loss": 0.013, + "step": 22475 + }, + { + "epoch": 12.04, + "learning_rate": 4.432658227848102e-06, + "loss": 0.0095, + "step": 22500 + }, + { + "epoch": 12.04, + "learning_rate": 4.4263291139240515e-06, + "loss": 0.0111, + "step": 22525 + }, + { + "epoch": 12.04, + "learning_rate": 4.42e-06, + "loss": 0.0115, + "step": 22550 + }, + { + "epoch": 12.04, + "learning_rate": 4.4136708860759495e-06, + "loss": 0.0096, + "step": 22575 + }, + { + "epoch": 12.04, + "learning_rate": 4.407341772151899e-06, + "loss": 0.0108, + "step": 22600 + }, + { + "epoch": 12.04, + "learning_rate": 4.401012658227848e-06, + "loss": 0.0118, + "step": 22625 + }, + { + "epoch": 12.04, + "learning_rate": 4.394683544303798e-06, + "loss": 0.0136, + "step": 22650 + }, + { + "epoch": 12.04, + "learning_rate": 4.388354430379747e-06, + "loss": 0.0176, + "step": 22675 + }, + { + "epoch": 12.04, + "learning_rate": 4.382025316455697e-06, + "loss": 0.011, + "step": 22700 + }, + { + "epoch": 12.04, + "learning_rate": 4.375696202531646e-06, + "loss": 0.0125, + "step": 22725 + }, + { + "epoch": 12.04, + "learning_rate": 4.3693670886075955e-06, + "loss": 0.0164, + "step": 22750 + }, + { + "epoch": 13.0, + "learning_rate": 4.363037974683544e-06, + "loss": 0.0192, + "step": 22775 + }, + { + "epoch": 13.0, + "learning_rate": 4.356708860759494e-06, + "loss": 0.0145, + "step": 22800 + }, + { + "epoch": 13.0, + "learning_rate": 4.350379746835444e-06, + "loss": 0.0174, + "step": 22825 + }, + { + "epoch": 13.0, + "learning_rate": 4.344050632911393e-06, + "loss": 0.0142, + "step": 22850 + }, + { + "epoch": 13.0, + "learning_rate": 4.337721518987343e-06, + "loss": 0.0228, + "step": 22875 + }, + { + "epoch": 13.0, + "learning_rate": 4.331392405063292e-06, + "loss": 0.0201, + "step": 22900 + }, + { + "epoch": 13.0, + "learning_rate": 4.325063291139241e-06, + "loss": 0.0188, + "step": 22925 + }, + { + "epoch": 13.0, + "learning_rate": 4.31873417721519e-06, + "loss": 0.0185, + "step": 22950 + }, + { + "epoch": 13.01, + "learning_rate": 4.3124050632911395e-06, + "loss": 0.0186, + "step": 22975 + }, + { + "epoch": 13.01, + "learning_rate": 4.306075949367089e-06, + "loss": 0.0191, + "step": 23000 + }, + { + "epoch": 13.01, + "eval_loss": 0.25467297434806824, + "eval_runtime": 1486.1182, + "eval_samples_per_second": 7.005, + "eval_steps_per_second": 0.438, + "eval_wer": 23.982503780549276, + "step": 23000 + }, + { + "epoch": 13.01, + "learning_rate": 4.299746835443038e-06, + "loss": 0.0145, + "step": 23025 + }, + { + "epoch": 13.01, + "learning_rate": 4.293417721518988e-06, + "loss": 0.0106, + "step": 23050 + }, + { + "epoch": 13.01, + "learning_rate": 4.287088607594937e-06, + "loss": 0.0135, + "step": 23075 + }, + { + "epoch": 13.01, + "learning_rate": 4.280759493670887e-06, + "loss": 0.0173, + "step": 23100 + }, + { + "epoch": 13.01, + "learning_rate": 4.274430379746836e-06, + "loss": 0.0126, + "step": 23125 + }, + { + "epoch": 13.01, + "learning_rate": 4.268101265822785e-06, + "loss": 0.0166, + "step": 23150 + }, + { + "epoch": 13.01, + "learning_rate": 4.261772151898734e-06, + "loss": 0.0097, + "step": 23175 + }, + { + "epoch": 13.01, + "learning_rate": 4.255443037974684e-06, + "loss": 0.0094, + "step": 23200 + }, + { + "epoch": 13.01, + "learning_rate": 4.249113924050634e-06, + "loss": 0.012, + "step": 23225 + }, + { + "epoch": 13.01, + "learning_rate": 4.242784810126583e-06, + "loss": 0.0153, + "step": 23250 + }, + { + "epoch": 13.01, + "learning_rate": 4.236455696202532e-06, + "loss": 0.0169, + "step": 23275 + }, + { + "epoch": 13.01, + "learning_rate": 4.230379746835443e-06, + "loss": 0.0097, + "step": 23300 + }, + { + "epoch": 13.01, + "learning_rate": 4.224050632911393e-06, + "loss": 0.0098, + "step": 23325 + }, + { + "epoch": 13.01, + "learning_rate": 4.217721518987342e-06, + "loss": 0.0129, + "step": 23350 + }, + { + "epoch": 13.02, + "learning_rate": 4.2113924050632915e-06, + "loss": 0.0157, + "step": 23375 + }, + { + "epoch": 13.02, + "learning_rate": 4.205063291139241e-06, + "loss": 0.0088, + "step": 23400 + }, + { + "epoch": 13.02, + "learning_rate": 4.19873417721519e-06, + "loss": 0.0105, + "step": 23425 + }, + { + "epoch": 13.02, + "learning_rate": 4.19240506329114e-06, + "loss": 0.0131, + "step": 23450 + }, + { + "epoch": 13.02, + "learning_rate": 4.186075949367089e-06, + "loss": 0.0133, + "step": 23475 + }, + { + "epoch": 13.02, + "learning_rate": 4.179746835443038e-06, + "loss": 0.0054, + "step": 23500 + }, + { + "epoch": 13.02, + "learning_rate": 4.173417721518987e-06, + "loss": 0.01, + "step": 23525 + }, + { + "epoch": 13.02, + "learning_rate": 4.167088607594937e-06, + "loss": 0.013, + "step": 23550 + }, + { + "epoch": 13.02, + "learning_rate": 4.160759493670886e-06, + "loss": 0.0141, + "step": 23575 + }, + { + "epoch": 13.02, + "learning_rate": 4.154430379746836e-06, + "loss": 0.0126, + "step": 23600 + }, + { + "epoch": 13.02, + "learning_rate": 4.148101265822786e-06, + "loss": 0.015, + "step": 23625 + }, + { + "epoch": 13.02, + "learning_rate": 4.141772151898734e-06, + "loss": 0.0164, + "step": 23650 + }, + { + "epoch": 13.02, + "learning_rate": 4.135443037974684e-06, + "loss": 0.0135, + "step": 23675 + }, + { + "epoch": 13.02, + "learning_rate": 4.129113924050633e-06, + "loss": 0.0151, + "step": 23700 + }, + { + "epoch": 13.02, + "learning_rate": 4.122784810126583e-06, + "loss": 0.0157, + "step": 23725 + }, + { + "epoch": 13.02, + "learning_rate": 4.116455696202532e-06, + "loss": 0.0087, + "step": 23750 + }, + { + "epoch": 13.03, + "learning_rate": 4.1101265822784815e-06, + "loss": 0.0097, + "step": 23775 + }, + { + "epoch": 13.03, + "learning_rate": 4.103797468354431e-06, + "loss": 0.0141, + "step": 23800 + }, + { + "epoch": 13.03, + "learning_rate": 4.09746835443038e-06, + "loss": 0.0079, + "step": 23825 + }, + { + "epoch": 13.03, + "learning_rate": 4.09113924050633e-06, + "loss": 0.0116, + "step": 23850 + }, + { + "epoch": 13.03, + "learning_rate": 4.084810126582278e-06, + "loss": 0.0093, + "step": 23875 + }, + { + "epoch": 13.03, + "learning_rate": 4.078481012658228e-06, + "loss": 0.0149, + "step": 23900 + }, + { + "epoch": 13.03, + "learning_rate": 4.072151898734177e-06, + "loss": 0.0125, + "step": 23925 + }, + { + "epoch": 13.03, + "learning_rate": 4.065822784810127e-06, + "loss": 0.0106, + "step": 23950 + }, + { + "epoch": 13.03, + "learning_rate": 4.059493670886076e-06, + "loss": 0.0124, + "step": 23975 + }, + { + "epoch": 13.03, + "learning_rate": 4.0531645569620255e-06, + "loss": 0.0074, + "step": 24000 + }, + { + "epoch": 13.03, + "eval_loss": 0.27036768198013306, + "eval_runtime": 1458.9195, + "eval_samples_per_second": 7.136, + "eval_steps_per_second": 0.446, + "eval_wer": 24.153238264036816, + "step": 24000 + }, + { + "epoch": 13.03, + "learning_rate": 4.046835443037975e-06, + "loss": 0.0128, + "step": 24025 + }, + { + "epoch": 13.03, + "learning_rate": 4.040506329113924e-06, + "loss": 0.012, + "step": 24050 + }, + { + "epoch": 13.03, + "learning_rate": 4.034177215189874e-06, + "loss": 0.0098, + "step": 24075 + }, + { + "epoch": 13.03, + "learning_rate": 4.027848101265823e-06, + "loss": 0.0091, + "step": 24100 + }, + { + "epoch": 13.03, + "learning_rate": 4.021518987341773e-06, + "loss": 0.0077, + "step": 24125 + }, + { + "epoch": 13.03, + "learning_rate": 4.015189873417722e-06, + "loss": 0.0135, + "step": 24150 + }, + { + "epoch": 13.04, + "learning_rate": 4.0088607594936715e-06, + "loss": 0.0124, + "step": 24175 + }, + { + "epoch": 13.04, + "learning_rate": 4.002531645569621e-06, + "loss": 0.0152, + "step": 24200 + }, + { + "epoch": 13.04, + "learning_rate": 3.99620253164557e-06, + "loss": 0.0127, + "step": 24225 + }, + { + "epoch": 13.04, + "learning_rate": 3.989873417721519e-06, + "loss": 0.0066, + "step": 24250 + }, + { + "epoch": 13.04, + "learning_rate": 3.983544303797468e-06, + "loss": 0.0163, + "step": 24275 + }, + { + "epoch": 13.04, + "learning_rate": 3.977215189873418e-06, + "loss": 0.0095, + "step": 24300 + }, + { + "epoch": 13.04, + "learning_rate": 3.970886075949367e-06, + "loss": 0.0103, + "step": 24325 + }, + { + "epoch": 13.04, + "learning_rate": 3.964556962025317e-06, + "loss": 0.0112, + "step": 24350 + }, + { + "epoch": 13.04, + "learning_rate": 3.958227848101266e-06, + "loss": 0.01, + "step": 24375 + }, + { + "epoch": 13.04, + "learning_rate": 3.9518987341772155e-06, + "loss": 0.0131, + "step": 24400 + }, + { + "epoch": 13.04, + "learning_rate": 3.945569620253165e-06, + "loss": 0.0143, + "step": 24425 + }, + { + "epoch": 13.04, + "learning_rate": 3.939240506329114e-06, + "loss": 0.0151, + "step": 24450 + }, + { + "epoch": 13.04, + "learning_rate": 3.932911392405064e-06, + "loss": 0.0129, + "step": 24475 + }, + { + "epoch": 13.04, + "learning_rate": 3.926582278481013e-06, + "loss": 0.0125, + "step": 24500 + }, + { + "epoch": 14.0, + "learning_rate": 3.920506329113925e-06, + "loss": 0.0165, + "step": 24525 + }, + { + "epoch": 14.0, + "learning_rate": 3.914177215189874e-06, + "loss": 0.0114, + "step": 24550 + }, + { + "epoch": 14.0, + "learning_rate": 3.9078481012658235e-06, + "loss": 0.0111, + "step": 24575 + }, + { + "epoch": 14.0, + "learning_rate": 3.901518987341772e-06, + "loss": 0.0134, + "step": 24600 + }, + { + "epoch": 14.0, + "learning_rate": 3.8951898734177215e-06, + "loss": 0.0138, + "step": 24625 + }, + { + "epoch": 14.0, + "learning_rate": 3.888860759493671e-06, + "loss": 0.0151, + "step": 24650 + }, + { + "epoch": 14.0, + "learning_rate": 3.88253164556962e-06, + "loss": 0.0101, + "step": 24675 + }, + { + "epoch": 14.0, + "learning_rate": 3.87620253164557e-06, + "loss": 0.0108, + "step": 24700 + }, + { + "epoch": 14.01, + "learning_rate": 3.869873417721519e-06, + "loss": 0.0151, + "step": 24725 + }, + { + "epoch": 14.01, + "learning_rate": 3.863544303797469e-06, + "loss": 0.0132, + "step": 24750 + }, + { + "epoch": 14.01, + "learning_rate": 3.857215189873418e-06, + "loss": 0.012, + "step": 24775 + }, + { + "epoch": 14.01, + "learning_rate": 3.8508860759493675e-06, + "loss": 0.017, + "step": 24800 + }, + { + "epoch": 14.01, + "learning_rate": 3.844556962025317e-06, + "loss": 0.0141, + "step": 24825 + }, + { + "epoch": 14.01, + "learning_rate": 3.838227848101266e-06, + "loss": 0.0166, + "step": 24850 + }, + { + "epoch": 14.01, + "learning_rate": 3.831898734177216e-06, + "loss": 0.0112, + "step": 24875 + }, + { + "epoch": 14.01, + "learning_rate": 3.825569620253165e-06, + "loss": 0.0097, + "step": 24900 + }, + { + "epoch": 14.01, + "learning_rate": 3.819240506329115e-06, + "loss": 0.0138, + "step": 24925 + }, + { + "epoch": 14.01, + "learning_rate": 3.8129113924050636e-06, + "loss": 0.0069, + "step": 24950 + }, + { + "epoch": 14.01, + "learning_rate": 3.806582278481013e-06, + "loss": 0.0111, + "step": 24975 + }, + { + "epoch": 14.01, + "learning_rate": 3.8002531645569625e-06, + "loss": 0.0149, + "step": 25000 + }, + { + "epoch": 14.01, + "eval_loss": 0.2640444040298462, + "eval_runtime": 1478.4977, + "eval_samples_per_second": 7.042, + "eval_steps_per_second": 0.44, + "eval_wer": 24.063805915543345, + "step": 25000 + }, + { + "epoch": 14.01, + "learning_rate": 3.7939240506329115e-06, + "loss": 0.0176, + "step": 25025 + }, + { + "epoch": 14.01, + "learning_rate": 3.787594936708861e-06, + "loss": 0.0104, + "step": 25050 + }, + { + "epoch": 14.01, + "learning_rate": 3.7812658227848103e-06, + "loss": 0.0084, + "step": 25075 + }, + { + "epoch": 14.01, + "learning_rate": 3.7749367088607598e-06, + "loss": 0.0118, + "step": 25100 + }, + { + "epoch": 14.02, + "learning_rate": 3.768607594936709e-06, + "loss": 0.0099, + "step": 25125 + }, + { + "epoch": 14.02, + "learning_rate": 3.762278481012658e-06, + "loss": 0.0089, + "step": 25150 + }, + { + "epoch": 14.02, + "learning_rate": 3.7559493670886076e-06, + "loss": 0.0119, + "step": 25175 + }, + { + "epoch": 14.02, + "learning_rate": 3.749620253164557e-06, + "loss": 0.0118, + "step": 25200 + }, + { + "epoch": 14.02, + "learning_rate": 3.743291139240507e-06, + "loss": 0.0108, + "step": 25225 + }, + { + "epoch": 14.02, + "learning_rate": 3.7369620253164563e-06, + "loss": 0.0101, + "step": 25250 + }, + { + "epoch": 14.02, + "learning_rate": 3.7306329113924058e-06, + "loss": 0.0095, + "step": 25275 + }, + { + "epoch": 14.02, + "learning_rate": 3.7243037974683548e-06, + "loss": 0.0106, + "step": 25300 + }, + { + "epoch": 14.02, + "learning_rate": 3.717974683544304e-06, + "loss": 0.0135, + "step": 25325 + }, + { + "epoch": 14.02, + "learning_rate": 3.7116455696202536e-06, + "loss": 0.0115, + "step": 25350 + }, + { + "epoch": 14.02, + "learning_rate": 3.705316455696203e-06, + "loss": 0.0114, + "step": 25375 + }, + { + "epoch": 14.02, + "learning_rate": 3.698987341772152e-06, + "loss": 0.0119, + "step": 25400 + }, + { + "epoch": 14.02, + "learning_rate": 3.6926582278481015e-06, + "loss": 0.0123, + "step": 25425 + }, + { + "epoch": 14.02, + "learning_rate": 3.686329113924051e-06, + "loss": 0.0138, + "step": 25450 + }, + { + "epoch": 14.02, + "learning_rate": 3.6800000000000003e-06, + "loss": 0.0133, + "step": 25475 + }, + { + "epoch": 14.02, + "learning_rate": 3.6736708860759498e-06, + "loss": 0.0106, + "step": 25500 + }, + { + "epoch": 14.03, + "learning_rate": 3.6673417721518988e-06, + "loss": 0.0099, + "step": 25525 + }, + { + "epoch": 14.03, + "learning_rate": 3.661012658227848e-06, + "loss": 0.0104, + "step": 25550 + }, + { + "epoch": 14.03, + "learning_rate": 3.6546835443037976e-06, + "loss": 0.0143, + "step": 25575 + }, + { + "epoch": 14.03, + "learning_rate": 3.648354430379747e-06, + "loss": 0.0096, + "step": 25600 + }, + { + "epoch": 14.03, + "learning_rate": 3.642025316455696e-06, + "loss": 0.0094, + "step": 25625 + }, + { + "epoch": 14.03, + "learning_rate": 3.6356962025316463e-06, + "loss": 0.0162, + "step": 25650 + }, + { + "epoch": 14.03, + "learning_rate": 3.6293670886075953e-06, + "loss": 0.008, + "step": 25675 + }, + { + "epoch": 14.03, + "learning_rate": 3.6230379746835448e-06, + "loss": 0.0094, + "step": 25700 + }, + { + "epoch": 14.03, + "learning_rate": 3.616708860759494e-06, + "loss": 0.0122, + "step": 25725 + }, + { + "epoch": 14.03, + "learning_rate": 3.6103797468354436e-06, + "loss": 0.0128, + "step": 25750 + }, + { + "epoch": 14.03, + "learning_rate": 3.6040506329113926e-06, + "loss": 0.0117, + "step": 25775 + }, + { + "epoch": 14.03, + "learning_rate": 3.597721518987342e-06, + "loss": 0.0067, + "step": 25800 + }, + { + "epoch": 14.03, + "learning_rate": 3.5913924050632915e-06, + "loss": 0.0078, + "step": 25825 + }, + { + "epoch": 14.03, + "learning_rate": 3.585063291139241e-06, + "loss": 0.0071, + "step": 25850 + }, + { + "epoch": 14.03, + "learning_rate": 3.57873417721519e-06, + "loss": 0.0093, + "step": 25875 + }, + { + "epoch": 14.03, + "learning_rate": 3.5724050632911393e-06, + "loss": 0.0064, + "step": 25900 + }, + { + "epoch": 14.04, + "learning_rate": 3.5660759493670888e-06, + "loss": 0.0067, + "step": 25925 + }, + { + "epoch": 14.04, + "learning_rate": 3.559746835443038e-06, + "loss": 0.0076, + "step": 25950 + }, + { + "epoch": 14.04, + "learning_rate": 3.5534177215189876e-06, + "loss": 0.0099, + "step": 25975 + }, + { + "epoch": 14.04, + "learning_rate": 3.5470886075949366e-06, + "loss": 0.0111, + "step": 26000 + }, + { + "epoch": 14.04, + "eval_loss": 0.2734878659248352, + "eval_runtime": 1456.7487, + "eval_samples_per_second": 7.147, + "eval_steps_per_second": 0.447, + "eval_wer": 24.359745686921737, + "step": 26000 + }, + { + "epoch": 14.04, + "learning_rate": 3.540759493670886e-06, + "loss": 0.0134, + "step": 26025 + }, + { + "epoch": 14.04, + "learning_rate": 3.534430379746836e-06, + "loss": 0.008, + "step": 26050 + }, + { + "epoch": 14.04, + "learning_rate": 3.5281012658227853e-06, + "loss": 0.0138, + "step": 26075 + }, + { + "epoch": 14.04, + "learning_rate": 3.5217721518987348e-06, + "loss": 0.0075, + "step": 26100 + }, + { + "epoch": 14.04, + "learning_rate": 3.515443037974684e-06, + "loss": 0.0097, + "step": 26125 + }, + { + "epoch": 14.04, + "learning_rate": 3.509113924050633e-06, + "loss": 0.0139, + "step": 26150 + }, + { + "epoch": 14.04, + "learning_rate": 3.5027848101265826e-06, + "loss": 0.0099, + "step": 26175 + }, + { + "epoch": 14.04, + "learning_rate": 3.496455696202532e-06, + "loss": 0.0148, + "step": 26200 + }, + { + "epoch": 14.04, + "learning_rate": 3.4901265822784815e-06, + "loss": 0.0119, + "step": 26225 + }, + { + "epoch": 14.04, + "learning_rate": 3.4837974683544305e-06, + "loss": 0.0086, + "step": 26250 + }, + { + "epoch": 15.0, + "learning_rate": 3.47746835443038e-06, + "loss": 0.0091, + "step": 26275 + }, + { + "epoch": 15.0, + "learning_rate": 3.4711392405063293e-06, + "loss": 0.0105, + "step": 26300 + }, + { + "epoch": 15.0, + "learning_rate": 3.465063291139241e-06, + "loss": 0.0138, + "step": 26325 + }, + { + "epoch": 15.0, + "learning_rate": 3.45873417721519e-06, + "loss": 0.0185, + "step": 26350 + }, + { + "epoch": 15.0, + "learning_rate": 3.4524050632911392e-06, + "loss": 0.015, + "step": 26375 + }, + { + "epoch": 15.0, + "learning_rate": 3.4460759493670886e-06, + "loss": 0.0177, + "step": 26400 + }, + { + "epoch": 15.0, + "learning_rate": 3.439746835443038e-06, + "loss": 0.014, + "step": 26425 + }, + { + "epoch": 15.0, + "learning_rate": 3.433417721518988e-06, + "loss": 0.0127, + "step": 26450 + }, + { + "epoch": 15.01, + "learning_rate": 3.4270886075949374e-06, + "loss": 0.0135, + "step": 26475 + }, + { + "epoch": 15.01, + "learning_rate": 3.4207594936708864e-06, + "loss": 0.0171, + "step": 26500 + }, + { + "epoch": 15.01, + "learning_rate": 3.4144303797468358e-06, + "loss": 0.0114, + "step": 26525 + }, + { + "epoch": 15.01, + "learning_rate": 3.4081012658227852e-06, + "loss": 0.0099, + "step": 26550 + }, + { + "epoch": 15.01, + "learning_rate": 3.4017721518987346e-06, + "loss": 0.0126, + "step": 26575 + }, + { + "epoch": 15.01, + "learning_rate": 3.395443037974684e-06, + "loss": 0.0116, + "step": 26600 + }, + { + "epoch": 15.01, + "learning_rate": 3.389113924050633e-06, + "loss": 0.0087, + "step": 26625 + }, + { + "epoch": 15.01, + "learning_rate": 3.3827848101265825e-06, + "loss": 0.0073, + "step": 26650 + }, + { + "epoch": 15.01, + "learning_rate": 3.376455696202532e-06, + "loss": 0.0102, + "step": 26675 + }, + { + "epoch": 15.01, + "learning_rate": 3.3701265822784814e-06, + "loss": 0.0106, + "step": 26700 + }, + { + "epoch": 15.01, + "learning_rate": 3.3637974683544304e-06, + "loss": 0.0094, + "step": 26725 + }, + { + "epoch": 15.01, + "learning_rate": 3.35746835443038e-06, + "loss": 0.0113, + "step": 26750 + }, + { + "epoch": 15.01, + "learning_rate": 3.3511392405063292e-06, + "loss": 0.0169, + "step": 26775 + }, + { + "epoch": 15.01, + "learning_rate": 3.3448101265822786e-06, + "loss": 0.0084, + "step": 26800 + }, + { + "epoch": 15.01, + "learning_rate": 3.338481012658228e-06, + "loss": 0.0153, + "step": 26825 + }, + { + "epoch": 15.01, + "learning_rate": 3.332151898734177e-06, + "loss": 0.012, + "step": 26850 + }, + { + "epoch": 15.02, + "learning_rate": 3.325822784810127e-06, + "loss": 0.0106, + "step": 26875 + }, + { + "epoch": 15.02, + "learning_rate": 3.3194936708860764e-06, + "loss": 0.0099, + "step": 26900 + }, + { + "epoch": 15.02, + "learning_rate": 3.3131645569620258e-06, + "loss": 0.008, + "step": 26925 + }, + { + "epoch": 15.02, + "learning_rate": 3.306835443037975e-06, + "loss": 0.0076, + "step": 26950 + }, + { + "epoch": 15.02, + "learning_rate": 3.3005063291139246e-06, + "loss": 0.0088, + "step": 26975 + }, + { + "epoch": 15.02, + "learning_rate": 3.2941772151898736e-06, + "loss": 0.009, + "step": 27000 + }, + { + "epoch": 15.02, + "eval_loss": 0.2693029046058655, + "eval_runtime": 1479.1374, + "eval_samples_per_second": 7.039, + "eval_steps_per_second": 0.44, + "eval_wer": 24.28819980812696, + "step": 27000 + }, + { + "epoch": 15.02, + "learning_rate": 3.287848101265823e-06, + "loss": 0.0115, + "step": 27025 + }, + { + "epoch": 15.02, + "learning_rate": 3.2815189873417725e-06, + "loss": 0.0099, + "step": 27050 + }, + { + "epoch": 15.02, + "learning_rate": 3.275189873417722e-06, + "loss": 0.0073, + "step": 27075 + }, + { + "epoch": 15.02, + "learning_rate": 3.268860759493671e-06, + "loss": 0.0061, + "step": 27100 + }, + { + "epoch": 15.02, + "learning_rate": 3.2625316455696204e-06, + "loss": 0.007, + "step": 27125 + }, + { + "epoch": 15.02, + "learning_rate": 3.2562025316455698e-06, + "loss": 0.0109, + "step": 27150 + }, + { + "epoch": 15.02, + "learning_rate": 3.2498734177215192e-06, + "loss": 0.0123, + "step": 27175 + }, + { + "epoch": 15.02, + "learning_rate": 3.2435443037974686e-06, + "loss": 0.0121, + "step": 27200 + }, + { + "epoch": 15.02, + "learning_rate": 3.2372151898734176e-06, + "loss": 0.0129, + "step": 27225 + }, + { + "epoch": 15.02, + "learning_rate": 3.230886075949367e-06, + "loss": 0.0128, + "step": 27250 + }, + { + "epoch": 15.03, + "learning_rate": 3.224556962025317e-06, + "loss": 0.0094, + "step": 27275 + }, + { + "epoch": 15.03, + "learning_rate": 3.2182278481012664e-06, + "loss": 0.0081, + "step": 27300 + }, + { + "epoch": 15.03, + "learning_rate": 3.2118987341772158e-06, + "loss": 0.0087, + "step": 27325 + }, + { + "epoch": 15.03, + "learning_rate": 3.2055696202531648e-06, + "loss": 0.0131, + "step": 27350 + }, + { + "epoch": 15.03, + "learning_rate": 3.199240506329114e-06, + "loss": 0.0073, + "step": 27375 + }, + { + "epoch": 15.03, + "learning_rate": 3.1929113924050636e-06, + "loss": 0.0086, + "step": 27400 + }, + { + "epoch": 15.03, + "learning_rate": 3.186582278481013e-06, + "loss": 0.0108, + "step": 27425 + }, + { + "epoch": 15.03, + "learning_rate": 3.1802531645569625e-06, + "loss": 0.01, + "step": 27450 + }, + { + "epoch": 15.03, + "learning_rate": 3.1739240506329115e-06, + "loss": 0.0123, + "step": 27475 + }, + { + "epoch": 15.03, + "learning_rate": 3.167594936708861e-06, + "loss": 0.0081, + "step": 27500 + }, + { + "epoch": 15.03, + "learning_rate": 3.1612658227848104e-06, + "loss": 0.0123, + "step": 27525 + }, + { + "epoch": 15.03, + "learning_rate": 3.1549367088607598e-06, + "loss": 0.0096, + "step": 27550 + }, + { + "epoch": 15.03, + "learning_rate": 3.1486075949367088e-06, + "loss": 0.0054, + "step": 27575 + }, + { + "epoch": 15.03, + "learning_rate": 3.1422784810126582e-06, + "loss": 0.006, + "step": 27600 + }, + { + "epoch": 15.03, + "learning_rate": 3.1359493670886076e-06, + "loss": 0.0089, + "step": 27625 + }, + { + "epoch": 15.03, + "learning_rate": 3.129620253164557e-06, + "loss": 0.0109, + "step": 27650 + }, + { + "epoch": 15.04, + "learning_rate": 3.1232911392405065e-06, + "loss": 0.0089, + "step": 27675 + }, + { + "epoch": 15.04, + "learning_rate": 3.1169620253164563e-06, + "loss": 0.0094, + "step": 27700 + }, + { + "epoch": 15.04, + "learning_rate": 3.1106329113924054e-06, + "loss": 0.0102, + "step": 27725 + }, + { + "epoch": 15.04, + "learning_rate": 3.1043037974683548e-06, + "loss": 0.0097, + "step": 27750 + }, + { + "epoch": 15.04, + "learning_rate": 3.097974683544304e-06, + "loss": 0.0078, + "step": 27775 + }, + { + "epoch": 15.04, + "learning_rate": 3.0916455696202536e-06, + "loss": 0.0073, + "step": 27800 + }, + { + "epoch": 15.04, + "learning_rate": 3.085316455696203e-06, + "loss": 0.0105, + "step": 27825 + }, + { + "epoch": 15.04, + "learning_rate": 3.078987341772152e-06, + "loss": 0.0083, + "step": 27850 + }, + { + "epoch": 15.04, + "learning_rate": 3.0726582278481015e-06, + "loss": 0.0138, + "step": 27875 + }, + { + "epoch": 15.04, + "learning_rate": 3.066329113924051e-06, + "loss": 0.0049, + "step": 27900 + }, + { + "epoch": 15.04, + "learning_rate": 3.0600000000000003e-06, + "loss": 0.0107, + "step": 27925 + }, + { + "epoch": 15.04, + "learning_rate": 3.0536708860759494e-06, + "loss": 0.0105, + "step": 27950 + }, + { + "epoch": 15.04, + "learning_rate": 3.0473417721518988e-06, + "loss": 0.0134, + "step": 27975 + }, + { + "epoch": 15.04, + "learning_rate": 3.041012658227848e-06, + "loss": 0.0137, + "step": 28000 + }, + { + "epoch": 15.04, + "eval_loss": 0.2780563235282898, + "eval_runtime": 1465.9542, + "eval_samples_per_second": 7.102, + "eval_steps_per_second": 0.444, + "eval_wer": 25.010162766874256, + "step": 28000 + }, + { + "epoch": 16.0, + "learning_rate": 3.0346835443037976e-06, + "loss": 0.009, + "step": 28025 + }, + { + "epoch": 16.0, + "learning_rate": 3.028354430379747e-06, + "loss": 0.0081, + "step": 28050 + }, + { + "epoch": 16.0, + "learning_rate": 3.022025316455696e-06, + "loss": 0.0109, + "step": 28075 + }, + { + "epoch": 16.0, + "learning_rate": 3.015696202531646e-06, + "loss": 0.0132, + "step": 28100 + }, + { + "epoch": 16.0, + "learning_rate": 3.0093670886075953e-06, + "loss": 0.0142, + "step": 28125 + }, + { + "epoch": 16.0, + "learning_rate": 3.0030379746835448e-06, + "loss": 0.0152, + "step": 28150 + }, + { + "epoch": 16.0, + "learning_rate": 2.996708860759494e-06, + "loss": 0.0074, + "step": 28175 + }, + { + "epoch": 16.0, + "learning_rate": 2.990379746835443e-06, + "loss": 0.0129, + "step": 28200 + }, + { + "epoch": 16.01, + "learning_rate": 2.9840506329113926e-06, + "loss": 0.0118, + "step": 28225 + }, + { + "epoch": 16.01, + "learning_rate": 2.977721518987342e-06, + "loss": 0.0153, + "step": 28250 + }, + { + "epoch": 16.01, + "learning_rate": 2.9713924050632915e-06, + "loss": 0.0121, + "step": 28275 + }, + { + "epoch": 16.01, + "learning_rate": 2.965063291139241e-06, + "loss": 0.0097, + "step": 28300 + }, + { + "epoch": 16.01, + "learning_rate": 2.95873417721519e-06, + "loss": 0.0083, + "step": 28325 + }, + { + "epoch": 16.01, + "learning_rate": 2.9524050632911393e-06, + "loss": 0.01, + "step": 28350 + }, + { + "epoch": 16.01, + "learning_rate": 2.9460759493670888e-06, + "loss": 0.0084, + "step": 28375 + }, + { + "epoch": 16.01, + "learning_rate": 2.939746835443038e-06, + "loss": 0.0124, + "step": 28400 + }, + { + "epoch": 16.01, + "learning_rate": 2.933417721518987e-06, + "loss": 0.0071, + "step": 28425 + }, + { + "epoch": 16.01, + "learning_rate": 2.9270886075949366e-06, + "loss": 0.0113, + "step": 28450 + }, + { + "epoch": 16.01, + "learning_rate": 2.920759493670886e-06, + "loss": 0.0085, + "step": 28475 + }, + { + "epoch": 16.01, + "learning_rate": 2.914430379746836e-06, + "loss": 0.0133, + "step": 28500 + }, + { + "epoch": 16.01, + "learning_rate": 2.9081012658227853e-06, + "loss": 0.014, + "step": 28525 + }, + { + "epoch": 16.01, + "learning_rate": 2.9017721518987348e-06, + "loss": 0.0105, + "step": 28550 + }, + { + "epoch": 16.01, + "learning_rate": 2.8954430379746838e-06, + "loss": 0.01, + "step": 28575 + }, + { + "epoch": 16.01, + "learning_rate": 2.889113924050633e-06, + "loss": 0.0135, + "step": 28600 + }, + { + "epoch": 16.02, + "learning_rate": 2.8827848101265826e-06, + "loss": 0.0118, + "step": 28625 + }, + { + "epoch": 16.02, + "learning_rate": 2.876455696202532e-06, + "loss": 0.0111, + "step": 28650 + }, + { + "epoch": 16.02, + "learning_rate": 2.8701265822784815e-06, + "loss": 0.008, + "step": 28675 + }, + { + "epoch": 16.02, + "learning_rate": 2.8637974683544305e-06, + "loss": 0.0103, + "step": 28700 + }, + { + "epoch": 16.02, + "learning_rate": 2.85746835443038e-06, + "loss": 0.0082, + "step": 28725 + }, + { + "epoch": 16.02, + "learning_rate": 2.8511392405063293e-06, + "loss": 0.0057, + "step": 28750 + }, + { + "epoch": 16.02, + "learning_rate": 2.8448101265822788e-06, + "loss": 0.0062, + "step": 28775 + }, + { + "epoch": 16.02, + "learning_rate": 2.8384810126582278e-06, + "loss": 0.0092, + "step": 28800 + }, + { + "epoch": 16.02, + "learning_rate": 2.832151898734177e-06, + "loss": 0.0084, + "step": 28825 + }, + { + "epoch": 16.02, + "learning_rate": 2.8258227848101266e-06, + "loss": 0.0099, + "step": 28850 + }, + { + "epoch": 16.02, + "learning_rate": 2.819493670886076e-06, + "loss": 0.0117, + "step": 28875 + }, + { + "epoch": 16.02, + "learning_rate": 2.8131645569620255e-06, + "loss": 0.0117, + "step": 28900 + }, + { + "epoch": 16.02, + "learning_rate": 2.8068354430379753e-06, + "loss": 0.0127, + "step": 28925 + }, + { + "epoch": 16.02, + "learning_rate": 2.8005063291139243e-06, + "loss": 0.0123, + "step": 28950 + }, + { + "epoch": 16.02, + "learning_rate": 2.7941772151898738e-06, + "loss": 0.0128, + "step": 28975 + }, + { + "epoch": 16.02, + "learning_rate": 2.787848101265823e-06, + "loss": 0.0073, + "step": 29000 + }, + { + "epoch": 16.02, + "eval_loss": 0.2685891091823578, + "eval_runtime": 1456.562, + "eval_samples_per_second": 7.148, + "eval_steps_per_second": 0.447, + "eval_wer": 23.278427291500677, + "step": 29000 + }, + { + "epoch": 16.03, + "learning_rate": 2.7815189873417726e-06, + "loss": 0.0068, + "step": 29025 + }, + { + "epoch": 16.03, + "learning_rate": 2.7751898734177216e-06, + "loss": 0.0087, + "step": 29050 + }, + { + "epoch": 16.03, + "learning_rate": 2.768860759493671e-06, + "loss": 0.0084, + "step": 29075 + }, + { + "epoch": 16.03, + "learning_rate": 2.7625316455696205e-06, + "loss": 0.0039, + "step": 29100 + }, + { + "epoch": 16.03, + "learning_rate": 2.75620253164557e-06, + "loss": 0.0066, + "step": 29125 + }, + { + "epoch": 16.03, + "learning_rate": 2.7498734177215193e-06, + "loss": 0.0108, + "step": 29150 + }, + { + "epoch": 16.03, + "learning_rate": 2.7435443037974683e-06, + "loss": 0.0052, + "step": 29175 + }, + { + "epoch": 16.03, + "learning_rate": 2.7372151898734178e-06, + "loss": 0.0061, + "step": 29200 + }, + { + "epoch": 16.03, + "learning_rate": 2.730886075949367e-06, + "loss": 0.0083, + "step": 29225 + }, + { + "epoch": 16.03, + "learning_rate": 2.7245569620253166e-06, + "loss": 0.0062, + "step": 29250 + }, + { + "epoch": 16.03, + "learning_rate": 2.7182278481012656e-06, + "loss": 0.0078, + "step": 29275 + }, + { + "epoch": 16.03, + "learning_rate": 2.711898734177215e-06, + "loss": 0.0127, + "step": 29300 + }, + { + "epoch": 16.03, + "learning_rate": 2.705569620253165e-06, + "loss": 0.0092, + "step": 29325 + }, + { + "epoch": 16.03, + "learning_rate": 2.6992405063291143e-06, + "loss": 0.0087, + "step": 29350 + }, + { + "epoch": 16.03, + "learning_rate": 2.6929113924050638e-06, + "loss": 0.0062, + "step": 29375 + }, + { + "epoch": 16.03, + "learning_rate": 2.686582278481013e-06, + "loss": 0.0065, + "step": 29400 + }, + { + "epoch": 16.04, + "learning_rate": 2.680253164556962e-06, + "loss": 0.0057, + "step": 29425 + }, + { + "epoch": 16.04, + "learning_rate": 2.6739240506329116e-06, + "loss": 0.0071, + "step": 29450 + }, + { + "epoch": 16.04, + "learning_rate": 2.667594936708861e-06, + "loss": 0.01, + "step": 29475 + }, + { + "epoch": 16.04, + "learning_rate": 2.6612658227848105e-06, + "loss": 0.009, + "step": 29500 + }, + { + "epoch": 16.04, + "learning_rate": 2.65493670886076e-06, + "loss": 0.0091, + "step": 29525 + }, + { + "epoch": 16.04, + "learning_rate": 2.648607594936709e-06, + "loss": 0.0104, + "step": 29550 + }, + { + "epoch": 16.04, + "learning_rate": 2.6422784810126583e-06, + "loss": 0.0071, + "step": 29575 + }, + { + "epoch": 16.04, + "learning_rate": 2.6359493670886078e-06, + "loss": 0.0081, + "step": 29600 + }, + { + "epoch": 16.04, + "learning_rate": 2.629620253164557e-06, + "loss": 0.008, + "step": 29625 + }, + { + "epoch": 16.04, + "learning_rate": 2.623291139240506e-06, + "loss": 0.0067, + "step": 29650 + }, + { + "epoch": 16.04, + "learning_rate": 2.6169620253164556e-06, + "loss": 0.0066, + "step": 29675 + }, + { + "epoch": 16.04, + "learning_rate": 2.610632911392405e-06, + "loss": 0.0121, + "step": 29700 + }, + { + "epoch": 16.04, + "learning_rate": 2.6043037974683545e-06, + "loss": 0.0075, + "step": 29725 + }, + { + "epoch": 16.04, + "learning_rate": 2.5979746835443043e-06, + "loss": 0.0111, + "step": 29750 + }, + { + "epoch": 17.0, + "learning_rate": 2.5916455696202538e-06, + "loss": 0.0095, + "step": 29775 + }, + { + "epoch": 17.0, + "learning_rate": 2.5853164556962028e-06, + "loss": 0.0077, + "step": 29800 + }, + { + "epoch": 17.0, + "learning_rate": 2.578987341772152e-06, + "loss": 0.0047, + "step": 29825 + }, + { + "epoch": 17.0, + "learning_rate": 2.5726582278481016e-06, + "loss": 0.0092, + "step": 29850 + }, + { + "epoch": 17.0, + "learning_rate": 2.566329113924051e-06, + "loss": 0.0191, + "step": 29875 + }, + { + "epoch": 17.0, + "learning_rate": 2.56e-06, + "loss": 0.0102, + "step": 29900 + }, + { + "epoch": 17.0, + "learning_rate": 2.5536708860759495e-06, + "loss": 0.0146, + "step": 29925 + }, + { + "epoch": 17.0, + "learning_rate": 2.547341772151899e-06, + "loss": 0.0096, + "step": 29950 + }, + { + "epoch": 17.01, + "learning_rate": 2.5410126582278483e-06, + "loss": 0.0159, + "step": 29975 + }, + { + "epoch": 17.01, + "learning_rate": 2.5346835443037978e-06, + "loss": 0.0094, + "step": 30000 + }, + { + "epoch": 17.01, + "eval_loss": 0.2636851966381073, + "eval_runtime": 1508.7196, + "eval_samples_per_second": 6.901, + "eval_steps_per_second": 0.431, + "eval_wer": 23.029642758418838, + "step": 30000 + }, + { + "epoch": 17.01, + "learning_rate": 2.5283544303797468e-06, + "loss": 0.0078, + "step": 30025 + }, + { + "epoch": 17.01, + "learning_rate": 2.522025316455696e-06, + "loss": 0.01, + "step": 30050 + }, + { + "epoch": 17.01, + "learning_rate": 2.5156962025316456e-06, + "loss": 0.0058, + "step": 30075 + }, + { + "epoch": 17.01, + "learning_rate": 2.509367088607595e-06, + "loss": 0.0092, + "step": 30100 + }, + { + "epoch": 17.01, + "learning_rate": 2.503037974683544e-06, + "loss": 0.0084, + "step": 30125 + }, + { + "epoch": 17.01, + "learning_rate": 2.496708860759494e-06, + "loss": 0.0069, + "step": 30150 + }, + { + "epoch": 17.01, + "learning_rate": 2.4903797468354433e-06, + "loss": 0.0059, + "step": 30175 + }, + { + "epoch": 17.01, + "learning_rate": 2.4840506329113923e-06, + "loss": 0.0097, + "step": 30200 + }, + { + "epoch": 17.01, + "learning_rate": 2.4777215189873418e-06, + "loss": 0.0126, + "step": 30225 + }, + { + "epoch": 17.01, + "learning_rate": 2.4713924050632916e-06, + "loss": 0.0096, + "step": 30250 + }, + { + "epoch": 17.01, + "learning_rate": 2.4650632911392406e-06, + "loss": 0.0094, + "step": 30275 + }, + { + "epoch": 17.01, + "learning_rate": 2.45873417721519e-06, + "loss": 0.0111, + "step": 30300 + }, + { + "epoch": 17.01, + "learning_rate": 2.4524050632911395e-06, + "loss": 0.0062, + "step": 30325 + }, + { + "epoch": 17.01, + "learning_rate": 2.446075949367089e-06, + "loss": 0.0078, + "step": 30350 + }, + { + "epoch": 17.02, + "learning_rate": 2.4397468354430383e-06, + "loss": 0.0043, + "step": 30375 + }, + { + "epoch": 17.02, + "learning_rate": 2.4336708860759494e-06, + "loss": 0.0102, + "step": 30400 + }, + { + "epoch": 17.02, + "learning_rate": 2.4273417721518988e-06, + "loss": 0.0101, + "step": 30425 + }, + { + "epoch": 17.02, + "learning_rate": 2.4210126582278482e-06, + "loss": 0.0066, + "step": 30450 + }, + { + "epoch": 17.02, + "learning_rate": 2.4146835443037976e-06, + "loss": 0.0078, + "step": 30475 + }, + { + "epoch": 17.02, + "learning_rate": 2.408354430379747e-06, + "loss": 0.0074, + "step": 30500 + }, + { + "epoch": 17.02, + "learning_rate": 2.4020253164556965e-06, + "loss": 0.0072, + "step": 30525 + }, + { + "epoch": 17.02, + "learning_rate": 2.395696202531646e-06, + "loss": 0.0109, + "step": 30550 + }, + { + "epoch": 17.02, + "learning_rate": 2.389367088607595e-06, + "loss": 0.0113, + "step": 30575 + }, + { + "epoch": 17.02, + "learning_rate": 2.3830379746835444e-06, + "loss": 0.0109, + "step": 30600 + }, + { + "epoch": 17.02, + "learning_rate": 2.3767088607594938e-06, + "loss": 0.0089, + "step": 30625 + }, + { + "epoch": 17.02, + "learning_rate": 2.3703797468354432e-06, + "loss": 0.0062, + "step": 30650 + }, + { + "epoch": 17.02, + "learning_rate": 2.3640506329113926e-06, + "loss": 0.0101, + "step": 30675 + }, + { + "epoch": 17.02, + "learning_rate": 2.357721518987342e-06, + "loss": 0.0096, + "step": 30700 + }, + { + "epoch": 17.02, + "learning_rate": 2.3513924050632915e-06, + "loss": 0.0093, + "step": 30725 + }, + { + "epoch": 17.02, + "learning_rate": 2.3450632911392405e-06, + "loss": 0.0113, + "step": 30750 + }, + { + "epoch": 17.03, + "learning_rate": 2.33873417721519e-06, + "loss": 0.0094, + "step": 30775 + }, + { + "epoch": 17.03, + "learning_rate": 2.3324050632911394e-06, + "loss": 0.0086, + "step": 30800 + }, + { + "epoch": 17.03, + "learning_rate": 2.3260759493670888e-06, + "loss": 0.0096, + "step": 30825 + }, + { + "epoch": 17.03, + "learning_rate": 2.319746835443038e-06, + "loss": 0.0086, + "step": 30850 + }, + { + "epoch": 17.03, + "learning_rate": 2.3134177215189876e-06, + "loss": 0.0064, + "step": 30875 + }, + { + "epoch": 17.03, + "learning_rate": 2.307088607594937e-06, + "loss": 0.0107, + "step": 30900 + }, + { + "epoch": 17.03, + "learning_rate": 2.3007594936708865e-06, + "loss": 0.0073, + "step": 30925 + }, + { + "epoch": 17.03, + "learning_rate": 2.2944303797468355e-06, + "loss": 0.005, + "step": 30950 + }, + { + "epoch": 17.03, + "learning_rate": 2.288101265822785e-06, + "loss": 0.0055, + "step": 30975 + }, + { + "epoch": 17.03, + "learning_rate": 2.2817721518987344e-06, + "loss": 0.006, + "step": 31000 + }, + { + "epoch": 17.03, + "eval_loss": 0.2709895372390747, + "eval_runtime": 1461.5544, + "eval_samples_per_second": 7.123, + "eval_steps_per_second": 0.445, + "eval_wer": 23.29468771849949, + "step": 31000 + }, + { + "epoch": 17.03, + "learning_rate": 2.2754430379746838e-06, + "loss": 0.0094, + "step": 31025 + }, + { + "epoch": 17.03, + "learning_rate": 2.2691139240506328e-06, + "loss": 0.0088, + "step": 31050 + }, + { + "epoch": 17.03, + "learning_rate": 2.2627848101265826e-06, + "loss": 0.0057, + "step": 31075 + }, + { + "epoch": 17.03, + "learning_rate": 2.256455696202532e-06, + "loss": 0.0066, + "step": 31100 + }, + { + "epoch": 17.03, + "learning_rate": 2.250126582278481e-06, + "loss": 0.008, + "step": 31125 + }, + { + "epoch": 17.03, + "learning_rate": 2.2437974683544305e-06, + "loss": 0.0048, + "step": 31150 + }, + { + "epoch": 17.04, + "learning_rate": 2.23746835443038e-06, + "loss": 0.0074, + "step": 31175 + }, + { + "epoch": 17.04, + "learning_rate": 2.2311392405063294e-06, + "loss": 0.0061, + "step": 31200 + }, + { + "epoch": 17.04, + "learning_rate": 2.2248101265822788e-06, + "loss": 0.0088, + "step": 31225 + }, + { + "epoch": 17.04, + "learning_rate": 2.2184810126582278e-06, + "loss": 0.0087, + "step": 31250 + }, + { + "epoch": 17.04, + "learning_rate": 2.2121518987341776e-06, + "loss": 0.0089, + "step": 31275 + }, + { + "epoch": 17.04, + "learning_rate": 2.2058227848101266e-06, + "loss": 0.0061, + "step": 31300 + }, + { + "epoch": 17.04, + "learning_rate": 2.199493670886076e-06, + "loss": 0.0097, + "step": 31325 + }, + { + "epoch": 17.04, + "learning_rate": 2.1931645569620255e-06, + "loss": 0.0063, + "step": 31350 + }, + { + "epoch": 17.04, + "learning_rate": 2.186835443037975e-06, + "loss": 0.0084, + "step": 31375 + }, + { + "epoch": 17.04, + "learning_rate": 2.1805063291139243e-06, + "loss": 0.0089, + "step": 31400 + }, + { + "epoch": 17.04, + "learning_rate": 2.1741772151898734e-06, + "loss": 0.0077, + "step": 31425 + }, + { + "epoch": 17.04, + "learning_rate": 2.1678481012658228e-06, + "loss": 0.0153, + "step": 31450 + }, + { + "epoch": 17.04, + "learning_rate": 2.1615189873417726e-06, + "loss": 0.0088, + "step": 31475 + }, + { + "epoch": 17.04, + "learning_rate": 2.1551898734177216e-06, + "loss": 0.0095, + "step": 31500 + }, + { + "epoch": 18.0, + "learning_rate": 2.148860759493671e-06, + "loss": 0.0084, + "step": 31525 + }, + { + "epoch": 18.0, + "learning_rate": 2.1425316455696205e-06, + "loss": 0.0054, + "step": 31550 + }, + { + "epoch": 18.0, + "learning_rate": 2.13620253164557e-06, + "loss": 0.013, + "step": 31575 + }, + { + "epoch": 18.0, + "learning_rate": 2.129873417721519e-06, + "loss": 0.0074, + "step": 31600 + }, + { + "epoch": 18.0, + "learning_rate": 2.1235443037974684e-06, + "loss": 0.0083, + "step": 31625 + }, + { + "epoch": 18.0, + "learning_rate": 2.1172151898734178e-06, + "loss": 0.0118, + "step": 31650 + }, + { + "epoch": 18.0, + "learning_rate": 2.110886075949367e-06, + "loss": 0.0095, + "step": 31675 + }, + { + "epoch": 18.0, + "learning_rate": 2.1045569620253166e-06, + "loss": 0.0075, + "step": 31700 + }, + { + "epoch": 18.01, + "learning_rate": 2.098227848101266e-06, + "loss": 0.0149, + "step": 31725 + }, + { + "epoch": 18.01, + "learning_rate": 2.0918987341772155e-06, + "loss": 0.0095, + "step": 31750 + }, + { + "epoch": 18.01, + "learning_rate": 2.085569620253165e-06, + "loss": 0.0098, + "step": 31775 + }, + { + "epoch": 18.01, + "learning_rate": 2.079240506329114e-06, + "loss": 0.0058, + "step": 31800 + }, + { + "epoch": 18.01, + "learning_rate": 2.0729113924050633e-06, + "loss": 0.0072, + "step": 31825 + }, + { + "epoch": 18.01, + "learning_rate": 2.0665822784810128e-06, + "loss": 0.0103, + "step": 31850 + }, + { + "epoch": 18.01, + "learning_rate": 2.060253164556962e-06, + "loss": 0.0085, + "step": 31875 + }, + { + "epoch": 18.01, + "learning_rate": 2.0539240506329116e-06, + "loss": 0.0064, + "step": 31900 + }, + { + "epoch": 18.01, + "learning_rate": 2.047594936708861e-06, + "loss": 0.0077, + "step": 31925 + }, + { + "epoch": 18.01, + "learning_rate": 2.0412658227848105e-06, + "loss": 0.0086, + "step": 31950 + }, + { + "epoch": 18.01, + "learning_rate": 2.0349367088607595e-06, + "loss": 0.0136, + "step": 31975 + }, + { + "epoch": 18.01, + "learning_rate": 2.028607594936709e-06, + "loss": 0.0085, + "step": 32000 + }, + { + "epoch": 18.01, + "eval_loss": 0.26488059759140015, + "eval_runtime": 1466.953, + "eval_samples_per_second": 7.097, + "eval_steps_per_second": 0.444, + "eval_wer": 23.089806338314446, + "step": 32000 + }, + { + "epoch": 18.01, + "learning_rate": 2.0222784810126583e-06, + "loss": 0.011, + "step": 32025 + }, + { + "epoch": 18.01, + "learning_rate": 2.0159493670886078e-06, + "loss": 0.0086, + "step": 32050 + }, + { + "epoch": 18.01, + "learning_rate": 2.009620253164557e-06, + "loss": 0.008, + "step": 32075 + }, + { + "epoch": 18.01, + "learning_rate": 2.0032911392405066e-06, + "loss": 0.0083, + "step": 32100 + }, + { + "epoch": 18.02, + "learning_rate": 1.996962025316456e-06, + "loss": 0.0073, + "step": 32125 + }, + { + "epoch": 18.02, + "learning_rate": 1.9906329113924055e-06, + "loss": 0.0036, + "step": 32150 + }, + { + "epoch": 18.02, + "learning_rate": 1.9843037974683545e-06, + "loss": 0.0096, + "step": 32175 + }, + { + "epoch": 18.02, + "learning_rate": 1.977974683544304e-06, + "loss": 0.0071, + "step": 32200 + }, + { + "epoch": 18.02, + "learning_rate": 1.9716455696202533e-06, + "loss": 0.009, + "step": 32225 + }, + { + "epoch": 18.02, + "learning_rate": 1.9653164556962028e-06, + "loss": 0.0093, + "step": 32250 + }, + { + "epoch": 18.02, + "learning_rate": 1.9589873417721518e-06, + "loss": 0.0071, + "step": 32275 + }, + { + "epoch": 18.02, + "learning_rate": 1.9526582278481016e-06, + "loss": 0.0037, + "step": 32300 + }, + { + "epoch": 18.02, + "learning_rate": 1.946329113924051e-06, + "loss": 0.007, + "step": 32325 + }, + { + "epoch": 18.02, + "learning_rate": 1.94e-06, + "loss": 0.0104, + "step": 32350 + }, + { + "epoch": 18.02, + "learning_rate": 1.9336708860759495e-06, + "loss": 0.0082, + "step": 32375 + }, + { + "epoch": 18.02, + "learning_rate": 1.927341772151899e-06, + "loss": 0.0095, + "step": 32400 + }, + { + "epoch": 18.02, + "learning_rate": 1.9210126582278483e-06, + "loss": 0.0101, + "step": 32425 + }, + { + "epoch": 18.02, + "learning_rate": 1.9146835443037973e-06, + "loss": 0.0122, + "step": 32450 + }, + { + "epoch": 18.02, + "learning_rate": 1.9083544303797468e-06, + "loss": 0.0082, + "step": 32475 + }, + { + "epoch": 18.02, + "learning_rate": 1.9020253164556964e-06, + "loss": 0.0093, + "step": 32500 + }, + { + "epoch": 18.03, + "learning_rate": 1.8956962025316458e-06, + "loss": 0.0073, + "step": 32525 + }, + { + "epoch": 18.03, + "learning_rate": 1.889367088607595e-06, + "loss": 0.0061, + "step": 32550 + }, + { + "epoch": 18.03, + "learning_rate": 1.8830379746835445e-06, + "loss": 0.0072, + "step": 32575 + }, + { + "epoch": 18.03, + "learning_rate": 1.876708860759494e-06, + "loss": 0.0064, + "step": 32600 + }, + { + "epoch": 18.03, + "learning_rate": 1.8703797468354431e-06, + "loss": 0.0084, + "step": 32625 + }, + { + "epoch": 18.03, + "learning_rate": 1.8640506329113926e-06, + "loss": 0.0067, + "step": 32650 + }, + { + "epoch": 18.03, + "learning_rate": 1.8577215189873418e-06, + "loss": 0.0052, + "step": 32675 + }, + { + "epoch": 18.03, + "learning_rate": 1.8513924050632912e-06, + "loss": 0.0085, + "step": 32700 + }, + { + "epoch": 18.03, + "learning_rate": 1.8450632911392408e-06, + "loss": 0.0077, + "step": 32725 + }, + { + "epoch": 18.03, + "learning_rate": 1.83873417721519e-06, + "loss": 0.0068, + "step": 32750 + }, + { + "epoch": 18.03, + "learning_rate": 1.8324050632911395e-06, + "loss": 0.0079, + "step": 32775 + }, + { + "epoch": 18.03, + "learning_rate": 1.8260759493670887e-06, + "loss": 0.008, + "step": 32800 + }, + { + "epoch": 18.03, + "learning_rate": 1.8197468354430381e-06, + "loss": 0.0071, + "step": 32825 + }, + { + "epoch": 18.03, + "learning_rate": 1.8134177215189873e-06, + "loss": 0.0091, + "step": 32850 + }, + { + "epoch": 18.03, + "learning_rate": 1.8070886075949368e-06, + "loss": 0.0059, + "step": 32875 + }, + { + "epoch": 18.03, + "learning_rate": 1.8007594936708862e-06, + "loss": 0.0079, + "step": 32900 + }, + { + "epoch": 18.04, + "learning_rate": 1.7944303797468356e-06, + "loss": 0.0055, + "step": 32925 + }, + { + "epoch": 18.04, + "learning_rate": 1.788101265822785e-06, + "loss": 0.0095, + "step": 32950 + }, + { + "epoch": 18.04, + "learning_rate": 1.7817721518987343e-06, + "loss": 0.0086, + "step": 32975 + }, + { + "epoch": 18.04, + "learning_rate": 1.7754430379746837e-06, + "loss": 0.0057, + "step": 33000 + }, + { + "epoch": 18.04, + "eval_loss": 0.2678743898868561, + "eval_runtime": 1484.126, + "eval_samples_per_second": 7.015, + "eval_steps_per_second": 0.439, + "eval_wer": 23.405258622091416, + "step": 33000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.523211459969024e+20, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-33000/training_args.bin b/checkpoint-33000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-33000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-34000/config.json b/checkpoint-34000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-34000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-34000/generation_config.json b/checkpoint-34000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-34000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-34000/optimizer.pt b/checkpoint-34000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6b5f7f78cbec863ec4f5f36a15c549b5114455a4 --- /dev/null +++ b/checkpoint-34000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:765d190dc30375a0547c195d26871667ba39ecfb53659ffe48a05972daa4b1c0 +size 1934161093 diff --git a/checkpoint-34000/preprocessor_config.json b/checkpoint-34000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-34000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-34000/pytorch_model.bin b/checkpoint-34000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..6634551a1e9d7312376ebd9296177fccf01e45af --- /dev/null +++ b/checkpoint-34000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05856fee5e4cdedc09e37f7eaacb94eee6b2f05ba88ae07c3fe823d533d40170 +size 967102601 diff --git a/checkpoint-34000/rng_state.pth b/checkpoint-34000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ca44a82f55ad7a7cabe761bcee45c522706a27be --- /dev/null +++ b/checkpoint-34000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5033b5b44c52b1d5e645702a70f173eedc7e3f813b4955dd1cff6df1b0656db8 +size 14639 diff --git a/checkpoint-34000/scaler.pt b/checkpoint-34000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..800305f79f06556cf749f22eaa567bd2cfa81f9a --- /dev/null +++ b/checkpoint-34000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee2ebb33a5da8074199c215cff53e57b34efe9fdca5c9be15fe93edc5d13a417 +size 557 diff --git a/checkpoint-34000/scheduler.pt b/checkpoint-34000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..eaab9b2f289a65ce1012d689e6693926f6547016 --- /dev/null +++ b/checkpoint-34000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:352030744fe2994f9dbebfac352ad774d3aae0a24d41c96e4a375692bfbdba78 +size 627 diff --git a/checkpoint-34000/trainer_state.json b/checkpoint-34000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3552ba1e6d9b3ad20ea54aeea3b40a52d438776a --- /dev/null +++ b/checkpoint-34000/trainer_state.json @@ -0,0 +1,8482 @@ +{ + "best_metric": 23.029642758418838, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-30000", + "epoch": 19.018275, + "global_step": 34000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + }, + { + "epoch": 7.02, + "learning_rate": 6.830632911392405e-06, + "loss": 0.0159, + "step": 13025 + }, + { + "epoch": 7.02, + "learning_rate": 6.824303797468354e-06, + "loss": 0.0186, + "step": 13050 + }, + { + "epoch": 7.02, + "learning_rate": 6.817974683544304e-06, + "loss": 0.0246, + "step": 13075 + }, + { + "epoch": 7.02, + "learning_rate": 6.811645569620253e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 7.02, + "learning_rate": 6.805316455696203e-06, + "loss": 0.0287, + "step": 13125 + }, + { + "epoch": 7.02, + "learning_rate": 6.798987341772152e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 7.02, + "learning_rate": 6.792658227848102e-06, + "loss": 0.035, + "step": 13175 + }, + { + "epoch": 7.02, + "learning_rate": 6.786329113924052e-06, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 7.02, + "learning_rate": 6.780000000000001e-06, + "loss": 0.0283, + "step": 13225 + }, + { + "epoch": 7.02, + "learning_rate": 6.773670886075951e-06, + "loss": 0.0203, + "step": 13250 + }, + { + "epoch": 7.03, + "learning_rate": 6.7673417721519e-06, + "loss": 0.0193, + "step": 13275 + }, + { + "epoch": 7.03, + "learning_rate": 6.7610126582278494e-06, + "loss": 0.0174, + "step": 13300 + }, + { + "epoch": 7.03, + "learning_rate": 6.754683544303798e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 7.03, + "learning_rate": 6.7483544303797474e-06, + "loss": 0.023, + "step": 13350 + }, + { + "epoch": 7.03, + "learning_rate": 6.742025316455697e-06, + "loss": 0.0266, + "step": 13375 + }, + { + "epoch": 7.03, + "learning_rate": 6.735696202531646e-06, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 7.03, + "learning_rate": 6.729367088607596e-06, + "loss": 0.0164, + "step": 13425 + }, + { + "epoch": 7.03, + "learning_rate": 6.723037974683545e-06, + "loss": 0.019, + "step": 13450 + }, + { + "epoch": 7.03, + "learning_rate": 6.716708860759495e-06, + "loss": 0.0209, + "step": 13475 + }, + { + "epoch": 7.03, + "learning_rate": 6.710379746835444e-06, + "loss": 0.0241, + "step": 13500 + }, + { + "epoch": 7.03, + "learning_rate": 6.7040506329113934e-06, + "loss": 0.0233, + "step": 13525 + }, + { + "epoch": 7.03, + "learning_rate": 6.697721518987342e-06, + "loss": 0.0204, + "step": 13550 + }, + { + "epoch": 7.03, + "learning_rate": 6.6913924050632914e-06, + "loss": 0.0131, + "step": 13575 + }, + { + "epoch": 7.03, + "learning_rate": 6.685063291139241e-06, + "loss": 0.0268, + "step": 13600 + }, + { + "epoch": 7.03, + "learning_rate": 6.67873417721519e-06, + "loss": 0.0156, + "step": 13625 + }, + { + "epoch": 7.03, + "learning_rate": 6.67240506329114e-06, + "loss": 0.0205, + "step": 13650 + }, + { + "epoch": 7.04, + "learning_rate": 6.666075949367089e-06, + "loss": 0.0217, + "step": 13675 + }, + { + "epoch": 7.04, + "learning_rate": 6.659746835443039e-06, + "loss": 0.0258, + "step": 13700 + }, + { + "epoch": 7.04, + "learning_rate": 6.653417721518988e-06, + "loss": 0.0211, + "step": 13725 + }, + { + "epoch": 7.04, + "learning_rate": 6.647088607594937e-06, + "loss": 0.0188, + "step": 13750 + }, + { + "epoch": 7.04, + "learning_rate": 6.640759493670886e-06, + "loss": 0.0234, + "step": 13775 + }, + { + "epoch": 7.04, + "learning_rate": 6.6344303797468355e-06, + "loss": 0.0202, + "step": 13800 + }, + { + "epoch": 7.04, + "learning_rate": 6.628101265822785e-06, + "loss": 0.0251, + "step": 13825 + }, + { + "epoch": 7.04, + "learning_rate": 6.621772151898734e-06, + "loss": 0.017, + "step": 13850 + }, + { + "epoch": 7.04, + "learning_rate": 6.615443037974684e-06, + "loss": 0.0254, + "step": 13875 + }, + { + "epoch": 7.04, + "learning_rate": 6.609113924050633e-06, + "loss": 0.0289, + "step": 13900 + }, + { + "epoch": 7.04, + "learning_rate": 6.602784810126583e-06, + "loss": 0.0238, + "step": 13925 + }, + { + "epoch": 7.04, + "learning_rate": 6.596455696202532e-06, + "loss": 0.0218, + "step": 13950 + }, + { + "epoch": 7.04, + "learning_rate": 6.590126582278481e-06, + "loss": 0.0289, + "step": 13975 + }, + { + "epoch": 7.04, + "learning_rate": 6.583797468354432e-06, + "loss": 0.0271, + "step": 14000 + }, + { + "epoch": 7.04, + "eval_loss": 0.26684999465942383, + "eval_runtime": 1448.0228, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "eval_wer": 42.464105107400115, + "step": 14000 + }, + { + "epoch": 8.0, + "learning_rate": 6.577468354430381e-06, + "loss": 0.0292, + "step": 14025 + }, + { + "epoch": 8.0, + "learning_rate": 6.57113924050633e-06, + "loss": 0.0192, + "step": 14050 + }, + { + "epoch": 8.0, + "learning_rate": 6.564810126582279e-06, + "loss": 0.027, + "step": 14075 + }, + { + "epoch": 8.0, + "learning_rate": 6.558481012658229e-06, + "loss": 0.03, + "step": 14100 + }, + { + "epoch": 8.0, + "learning_rate": 6.552151898734178e-06, + "loss": 0.0375, + "step": 14125 + }, + { + "epoch": 8.0, + "learning_rate": 6.5458227848101274e-06, + "loss": 0.0338, + "step": 14150 + }, + { + "epoch": 8.0, + "learning_rate": 6.539493670886077e-06, + "loss": 0.0305, + "step": 14175 + }, + { + "epoch": 8.0, + "learning_rate": 6.533164556962026e-06, + "loss": 0.0266, + "step": 14200 + }, + { + "epoch": 8.01, + "learning_rate": 6.526835443037976e-06, + "loss": 0.0378, + "step": 14225 + }, + { + "epoch": 8.01, + "learning_rate": 6.520506329113925e-06, + "loss": 0.0256, + "step": 14250 + }, + { + "epoch": 8.01, + "learning_rate": 6.514177215189874e-06, + "loss": 0.0203, + "step": 14275 + }, + { + "epoch": 8.01, + "learning_rate": 6.507848101265823e-06, + "loss": 0.0212, + "step": 14300 + }, + { + "epoch": 8.01, + "learning_rate": 6.501518987341773e-06, + "loss": 0.0204, + "step": 14325 + }, + { + "epoch": 8.01, + "learning_rate": 6.495189873417722e-06, + "loss": 0.0259, + "step": 14350 + }, + { + "epoch": 8.01, + "learning_rate": 6.4888607594936714e-06, + "loss": 0.0214, + "step": 14375 + }, + { + "epoch": 8.01, + "learning_rate": 6.482531645569621e-06, + "loss": 0.021, + "step": 14400 + }, + { + "epoch": 8.01, + "learning_rate": 6.47620253164557e-06, + "loss": 0.0241, + "step": 14425 + }, + { + "epoch": 8.01, + "learning_rate": 6.46987341772152e-06, + "loss": 0.0216, + "step": 14450 + }, + { + "epoch": 8.01, + "learning_rate": 6.463544303797469e-06, + "loss": 0.0209, + "step": 14475 + }, + { + "epoch": 8.01, + "learning_rate": 6.457215189873418e-06, + "loss": 0.0292, + "step": 14500 + }, + { + "epoch": 8.01, + "learning_rate": 6.450886075949367e-06, + "loss": 0.0286, + "step": 14525 + }, + { + "epoch": 8.01, + "learning_rate": 6.444556962025317e-06, + "loss": 0.0218, + "step": 14550 + }, + { + "epoch": 8.01, + "learning_rate": 6.438227848101266e-06, + "loss": 0.0209, + "step": 14575 + }, + { + "epoch": 8.01, + "learning_rate": 6.4318987341772154e-06, + "loss": 0.0244, + "step": 14600 + }, + { + "epoch": 8.02, + "learning_rate": 6.425569620253165e-06, + "loss": 0.0212, + "step": 14625 + }, + { + "epoch": 8.02, + "learning_rate": 6.419240506329114e-06, + "loss": 0.0155, + "step": 14650 + }, + { + "epoch": 8.02, + "learning_rate": 6.412911392405064e-06, + "loss": 0.0209, + "step": 14675 + }, + { + "epoch": 8.02, + "learning_rate": 6.406582278481013e-06, + "loss": 0.0203, + "step": 14700 + }, + { + "epoch": 8.02, + "learning_rate": 6.400253164556962e-06, + "loss": 0.0192, + "step": 14725 + }, + { + "epoch": 8.02, + "learning_rate": 6.393924050632911e-06, + "loss": 0.0216, + "step": 14750 + }, + { + "epoch": 8.02, + "learning_rate": 6.387594936708861e-06, + "loss": 0.012, + "step": 14775 + }, + { + "epoch": 8.02, + "learning_rate": 6.38126582278481e-06, + "loss": 0.0241, + "step": 14800 + }, + { + "epoch": 8.02, + "learning_rate": 6.37493670886076e-06, + "loss": 0.0192, + "step": 14825 + }, + { + "epoch": 8.02, + "learning_rate": 6.36860759493671e-06, + "loss": 0.0202, + "step": 14850 + }, + { + "epoch": 8.02, + "learning_rate": 6.362278481012659e-06, + "loss": 0.0257, + "step": 14875 + }, + { + "epoch": 8.02, + "learning_rate": 6.3559493670886086e-06, + "loss": 0.0218, + "step": 14900 + }, + { + "epoch": 8.02, + "learning_rate": 6.349620253164558e-06, + "loss": 0.0232, + "step": 14925 + }, + { + "epoch": 8.02, + "learning_rate": 6.3432911392405074e-06, + "loss": 0.03, + "step": 14950 + }, + { + "epoch": 8.02, + "learning_rate": 6.337215189873418e-06, + "loss": 0.0252, + "step": 14975 + }, + { + "epoch": 8.02, + "learning_rate": 6.3308860759493675e-06, + "loss": 0.0176, + "step": 15000 + }, + { + "epoch": 8.02, + "eval_loss": 0.2596448063850403, + "eval_runtime": 1417.6481, + "eval_samples_per_second": 7.344, + "eval_steps_per_second": 0.459, + "eval_wer": 28.52241499861786, + "step": 15000 + }, + { + "epoch": 8.03, + "learning_rate": 6.324556962025317e-06, + "loss": 0.0166, + "step": 15025 + }, + { + "epoch": 8.03, + "learning_rate": 6.318227848101266e-06, + "loss": 0.017, + "step": 15050 + }, + { + "epoch": 8.03, + "learning_rate": 6.311898734177216e-06, + "loss": 0.0291, + "step": 15075 + }, + { + "epoch": 8.03, + "learning_rate": 6.305569620253164e-06, + "loss": 0.0149, + "step": 15100 + }, + { + "epoch": 8.03, + "learning_rate": 6.299240506329114e-06, + "loss": 0.0223, + "step": 15125 + }, + { + "epoch": 8.03, + "learning_rate": 6.292911392405063e-06, + "loss": 0.0212, + "step": 15150 + }, + { + "epoch": 8.03, + "learning_rate": 6.286582278481013e-06, + "loss": 0.0125, + "step": 15175 + }, + { + "epoch": 8.03, + "learning_rate": 6.280253164556962e-06, + "loss": 0.0169, + "step": 15200 + }, + { + "epoch": 8.03, + "learning_rate": 6.273924050632912e-06, + "loss": 0.0244, + "step": 15225 + }, + { + "epoch": 8.03, + "learning_rate": 6.267594936708862e-06, + "loss": 0.015, + "step": 15250 + }, + { + "epoch": 8.03, + "learning_rate": 6.261265822784811e-06, + "loss": 0.0171, + "step": 15275 + }, + { + "epoch": 8.03, + "learning_rate": 6.254936708860761e-06, + "loss": 0.017, + "step": 15300 + }, + { + "epoch": 8.03, + "learning_rate": 6.24860759493671e-06, + "loss": 0.0162, + "step": 15325 + }, + { + "epoch": 8.03, + "learning_rate": 6.2422784810126594e-06, + "loss": 0.0133, + "step": 15350 + }, + { + "epoch": 8.03, + "learning_rate": 6.235949367088608e-06, + "loss": 0.0139, + "step": 15375 + }, + { + "epoch": 8.03, + "learning_rate": 6.2296202531645575e-06, + "loss": 0.0177, + "step": 15400 + }, + { + "epoch": 8.04, + "learning_rate": 6.223291139240507e-06, + "loss": 0.0207, + "step": 15425 + }, + { + "epoch": 8.04, + "learning_rate": 6.216962025316456e-06, + "loss": 0.0192, + "step": 15450 + }, + { + "epoch": 8.04, + "learning_rate": 6.210632911392406e-06, + "loss": 0.0176, + "step": 15475 + }, + { + "epoch": 8.04, + "learning_rate": 6.204303797468355e-06, + "loss": 0.0133, + "step": 15500 + }, + { + "epoch": 8.04, + "learning_rate": 6.197974683544305e-06, + "loss": 0.0199, + "step": 15525 + }, + { + "epoch": 8.04, + "learning_rate": 6.191645569620254e-06, + "loss": 0.0135, + "step": 15550 + }, + { + "epoch": 8.04, + "learning_rate": 6.1853164556962035e-06, + "loss": 0.0121, + "step": 15575 + }, + { + "epoch": 8.04, + "learning_rate": 6.178987341772152e-06, + "loss": 0.0152, + "step": 15600 + }, + { + "epoch": 8.04, + "learning_rate": 6.1726582278481015e-06, + "loss": 0.0201, + "step": 15625 + }, + { + "epoch": 8.04, + "learning_rate": 6.166329113924051e-06, + "loss": 0.0226, + "step": 15650 + }, + { + "epoch": 8.04, + "learning_rate": 6.16e-06, + "loss": 0.0222, + "step": 15675 + }, + { + "epoch": 8.04, + "learning_rate": 6.15367088607595e-06, + "loss": 0.0184, + "step": 15700 + }, + { + "epoch": 8.04, + "learning_rate": 6.147341772151899e-06, + "loss": 0.0219, + "step": 15725 + }, + { + "epoch": 8.04, + "learning_rate": 6.141012658227849e-06, + "loss": 0.0219, + "step": 15750 + }, + { + "epoch": 9.0, + "learning_rate": 6.134683544303798e-06, + "loss": 0.0208, + "step": 15775 + }, + { + "epoch": 9.0, + "learning_rate": 6.1283544303797475e-06, + "loss": 0.0143, + "step": 15800 + }, + { + "epoch": 9.0, + "learning_rate": 6.122025316455696e-06, + "loss": 0.0254, + "step": 15825 + }, + { + "epoch": 9.0, + "learning_rate": 6.1156962025316455e-06, + "loss": 0.0287, + "step": 15850 + }, + { + "epoch": 9.0, + "learning_rate": 6.109367088607595e-06, + "loss": 0.0252, + "step": 15875 + }, + { + "epoch": 9.0, + "learning_rate": 6.103037974683544e-06, + "loss": 0.0275, + "step": 15900 + }, + { + "epoch": 9.0, + "learning_rate": 6.096708860759494e-06, + "loss": 0.0193, + "step": 15925 + }, + { + "epoch": 9.0, + "learning_rate": 6.090379746835443e-06, + "loss": 0.0255, + "step": 15950 + }, + { + "epoch": 9.01, + "learning_rate": 6.084050632911393e-06, + "loss": 0.0248, + "step": 15975 + }, + { + "epoch": 9.01, + "learning_rate": 6.077721518987342e-06, + "loss": 0.0328, + "step": 16000 + }, + { + "epoch": 9.01, + "eval_loss": 0.2512108087539673, + "eval_runtime": 1474.2883, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.442, + "eval_wer": 35.66236849379665, + "step": 16000 + }, + { + "epoch": 9.01, + "learning_rate": 6.0713924050632915e-06, + "loss": 0.0181, + "step": 16025 + }, + { + "epoch": 9.01, + "learning_rate": 6.065063291139242e-06, + "loss": 0.0134, + "step": 16050 + }, + { + "epoch": 9.01, + "learning_rate": 6.058734177215191e-06, + "loss": 0.0203, + "step": 16075 + }, + { + "epoch": 9.01, + "learning_rate": 6.052405063291141e-06, + "loss": 0.0224, + "step": 16100 + }, + { + "epoch": 9.01, + "learning_rate": 6.046075949367089e-06, + "loss": 0.0164, + "step": 16125 + }, + { + "epoch": 9.01, + "learning_rate": 6.039746835443039e-06, + "loss": 0.019, + "step": 16150 + }, + { + "epoch": 9.01, + "learning_rate": 6.033417721518988e-06, + "loss": 0.0152, + "step": 16175 + }, + { + "epoch": 9.01, + "learning_rate": 6.0270886075949374e-06, + "loss": 0.0197, + "step": 16200 + }, + { + "epoch": 9.01, + "learning_rate": 6.020759493670887e-06, + "loss": 0.0188, + "step": 16225 + }, + { + "epoch": 9.01, + "learning_rate": 6.014430379746836e-06, + "loss": 0.0245, + "step": 16250 + }, + { + "epoch": 9.01, + "learning_rate": 6.008101265822786e-06, + "loss": 0.0202, + "step": 16275 + }, + { + "epoch": 9.01, + "learning_rate": 6.001772151898735e-06, + "loss": 0.0184, + "step": 16300 + }, + { + "epoch": 9.01, + "learning_rate": 5.995443037974685e-06, + "loss": 0.0158, + "step": 16325 + }, + { + "epoch": 9.01, + "learning_rate": 5.989113924050633e-06, + "loss": 0.0175, + "step": 16350 + }, + { + "epoch": 9.02, + "learning_rate": 5.982784810126583e-06, + "loss": 0.014, + "step": 16375 + }, + { + "epoch": 9.02, + "learning_rate": 5.976455696202532e-06, + "loss": 0.0149, + "step": 16400 + }, + { + "epoch": 9.02, + "learning_rate": 5.9701265822784815e-06, + "loss": 0.0188, + "step": 16425 + }, + { + "epoch": 9.02, + "learning_rate": 5.963797468354431e-06, + "loss": 0.0215, + "step": 16450 + }, + { + "epoch": 9.02, + "learning_rate": 5.95746835443038e-06, + "loss": 0.017, + "step": 16475 + }, + { + "epoch": 9.02, + "learning_rate": 5.95113924050633e-06, + "loss": 0.0095, + "step": 16500 + }, + { + "epoch": 9.02, + "learning_rate": 5.944810126582279e-06, + "loss": 0.0112, + "step": 16525 + }, + { + "epoch": 9.02, + "learning_rate": 5.938481012658229e-06, + "loss": 0.0198, + "step": 16550 + }, + { + "epoch": 9.02, + "learning_rate": 5.932151898734177e-06, + "loss": 0.0252, + "step": 16575 + }, + { + "epoch": 9.02, + "learning_rate": 5.925822784810127e-06, + "loss": 0.0226, + "step": 16600 + }, + { + "epoch": 9.02, + "learning_rate": 5.919493670886076e-06, + "loss": 0.0167, + "step": 16625 + }, + { + "epoch": 9.02, + "learning_rate": 5.9131645569620255e-06, + "loss": 0.0259, + "step": 16650 + }, + { + "epoch": 9.02, + "learning_rate": 5.906835443037975e-06, + "loss": 0.0229, + "step": 16675 + }, + { + "epoch": 9.02, + "learning_rate": 5.900506329113924e-06, + "loss": 0.0238, + "step": 16700 + }, + { + "epoch": 9.02, + "learning_rate": 5.894177215189874e-06, + "loss": 0.02, + "step": 16725 + }, + { + "epoch": 9.02, + "learning_rate": 5.887848101265823e-06, + "loss": 0.0182, + "step": 16750 + }, + { + "epoch": 9.03, + "learning_rate": 5.881518987341773e-06, + "loss": 0.0157, + "step": 16775 + }, + { + "epoch": 9.03, + "learning_rate": 5.875189873417721e-06, + "loss": 0.0145, + "step": 16800 + }, + { + "epoch": 9.03, + "learning_rate": 5.868860759493671e-06, + "loss": 0.0166, + "step": 16825 + }, + { + "epoch": 9.03, + "learning_rate": 5.86253164556962e-06, + "loss": 0.0108, + "step": 16850 + }, + { + "epoch": 9.03, + "learning_rate": 5.85620253164557e-06, + "loss": 0.014, + "step": 16875 + }, + { + "epoch": 9.03, + "learning_rate": 5.84987341772152e-06, + "loss": 0.0167, + "step": 16900 + }, + { + "epoch": 9.03, + "learning_rate": 5.843544303797469e-06, + "loss": 0.0121, + "step": 16925 + }, + { + "epoch": 9.03, + "learning_rate": 5.837215189873419e-06, + "loss": 0.0149, + "step": 16950 + }, + { + "epoch": 9.03, + "learning_rate": 5.831139240506329e-06, + "loss": 0.0143, + "step": 16975 + }, + { + "epoch": 9.03, + "learning_rate": 5.824810126582279e-06, + "loss": 0.0139, + "step": 17000 + }, + { + "epoch": 9.03, + "eval_loss": 0.2688957154750824, + "eval_runtime": 1423.1196, + "eval_samples_per_second": 7.316, + "eval_steps_per_second": 0.457, + "eval_wer": 25.920746678807788, + "step": 17000 + }, + { + "epoch": 9.03, + "learning_rate": 5.818481012658228e-06, + "loss": 0.0169, + "step": 17025 + }, + { + "epoch": 9.03, + "learning_rate": 5.8121518987341775e-06, + "loss": 0.013, + "step": 17050 + }, + { + "epoch": 9.03, + "learning_rate": 5.805822784810127e-06, + "loss": 0.0123, + "step": 17075 + }, + { + "epoch": 9.03, + "learning_rate": 5.799493670886076e-06, + "loss": 0.0161, + "step": 17100 + }, + { + "epoch": 9.03, + "learning_rate": 5.793164556962026e-06, + "loss": 0.0114, + "step": 17125 + }, + { + "epoch": 9.03, + "learning_rate": 5.786835443037974e-06, + "loss": 0.0194, + "step": 17150 + }, + { + "epoch": 9.04, + "learning_rate": 5.780506329113924e-06, + "loss": 0.0179, + "step": 17175 + }, + { + "epoch": 9.04, + "learning_rate": 5.774177215189873e-06, + "loss": 0.0165, + "step": 17200 + }, + { + "epoch": 9.04, + "learning_rate": 5.767848101265823e-06, + "loss": 0.0165, + "step": 17225 + }, + { + "epoch": 9.04, + "learning_rate": 5.761518987341773e-06, + "loss": 0.0125, + "step": 17250 + }, + { + "epoch": 9.04, + "learning_rate": 5.755189873417722e-06, + "loss": 0.0172, + "step": 17275 + }, + { + "epoch": 9.04, + "learning_rate": 5.748860759493672e-06, + "loss": 0.0158, + "step": 17300 + }, + { + "epoch": 9.04, + "learning_rate": 5.742531645569621e-06, + "loss": 0.0174, + "step": 17325 + }, + { + "epoch": 9.04, + "learning_rate": 5.736202531645571e-06, + "loss": 0.0162, + "step": 17350 + }, + { + "epoch": 9.04, + "learning_rate": 5.72987341772152e-06, + "loss": 0.0203, + "step": 17375 + }, + { + "epoch": 9.04, + "learning_rate": 5.7235443037974695e-06, + "loss": 0.0157, + "step": 17400 + }, + { + "epoch": 9.04, + "learning_rate": 5.717215189873419e-06, + "loss": 0.018, + "step": 17425 + }, + { + "epoch": 9.04, + "learning_rate": 5.7108860759493675e-06, + "loss": 0.0199, + "step": 17450 + }, + { + "epoch": 9.04, + "learning_rate": 5.704556962025317e-06, + "loss": 0.0153, + "step": 17475 + }, + { + "epoch": 9.04, + "learning_rate": 5.698227848101266e-06, + "loss": 0.0234, + "step": 17500 + }, + { + "epoch": 10.0, + "learning_rate": 5.691898734177216e-06, + "loss": 0.0168, + "step": 17525 + }, + { + "epoch": 10.0, + "learning_rate": 5.685569620253165e-06, + "loss": 0.0214, + "step": 17550 + }, + { + "epoch": 10.0, + "learning_rate": 5.679240506329115e-06, + "loss": 0.0185, + "step": 17575 + }, + { + "epoch": 10.0, + "learning_rate": 5.672911392405064e-06, + "loss": 0.0228, + "step": 17600 + }, + { + "epoch": 10.0, + "learning_rate": 5.6665822784810135e-06, + "loss": 0.024, + "step": 17625 + }, + { + "epoch": 10.0, + "learning_rate": 5.660253164556963e-06, + "loss": 0.0235, + "step": 17650 + }, + { + "epoch": 10.0, + "learning_rate": 5.6539240506329115e-06, + "loss": 0.0262, + "step": 17675 + }, + { + "epoch": 10.0, + "learning_rate": 5.647594936708861e-06, + "loss": 0.0214, + "step": 17700 + }, + { + "epoch": 10.01, + "learning_rate": 5.64126582278481e-06, + "loss": 0.0279, + "step": 17725 + }, + { + "epoch": 10.01, + "learning_rate": 5.63493670886076e-06, + "loss": 0.0248, + "step": 17750 + }, + { + "epoch": 10.01, + "learning_rate": 5.628607594936709e-06, + "loss": 0.0155, + "step": 17775 + }, + { + "epoch": 10.01, + "learning_rate": 5.622278481012659e-06, + "loss": 0.017, + "step": 17800 + }, + { + "epoch": 10.01, + "learning_rate": 5.615949367088608e-06, + "loss": 0.0205, + "step": 17825 + }, + { + "epoch": 10.01, + "learning_rate": 5.6096202531645575e-06, + "loss": 0.021, + "step": 17850 + }, + { + "epoch": 10.01, + "learning_rate": 5.603291139240507e-06, + "loss": 0.0178, + "step": 17875 + }, + { + "epoch": 10.01, + "learning_rate": 5.5969620253164555e-06, + "loss": 0.0169, + "step": 17900 + }, + { + "epoch": 10.01, + "learning_rate": 5.590632911392405e-06, + "loss": 0.0158, + "step": 17925 + }, + { + "epoch": 10.01, + "learning_rate": 5.584303797468354e-06, + "loss": 0.0195, + "step": 17950 + }, + { + "epoch": 10.01, + "learning_rate": 5.577974683544304e-06, + "loss": 0.0195, + "step": 17975 + }, + { + "epoch": 10.01, + "learning_rate": 5.571645569620253e-06, + "loss": 0.0187, + "step": 18000 + }, + { + "epoch": 10.01, + "eval_loss": 0.2605174481868744, + "eval_runtime": 1553.7854, + "eval_samples_per_second": 6.7, + "eval_steps_per_second": 0.419, + "eval_wer": 25.330493178750874, + "step": 18000 + }, + { + "epoch": 10.01, + "learning_rate": 5.565316455696203e-06, + "loss": 0.0214, + "step": 18025 + }, + { + "epoch": 10.01, + "learning_rate": 5.558987341772152e-06, + "loss": 0.0193, + "step": 18050 + }, + { + "epoch": 10.01, + "learning_rate": 5.552658227848102e-06, + "loss": 0.0168, + "step": 18075 + }, + { + "epoch": 10.01, + "learning_rate": 5.546329113924052e-06, + "loss": 0.0147, + "step": 18100 + }, + { + "epoch": 10.02, + "learning_rate": 5.540000000000001e-06, + "loss": 0.0192, + "step": 18125 + }, + { + "epoch": 10.02, + "learning_rate": 5.533670886075951e-06, + "loss": 0.014, + "step": 18150 + }, + { + "epoch": 10.02, + "learning_rate": 5.527341772151899e-06, + "loss": 0.0163, + "step": 18175 + }, + { + "epoch": 10.02, + "learning_rate": 5.521012658227849e-06, + "loss": 0.0139, + "step": 18200 + }, + { + "epoch": 10.02, + "learning_rate": 5.514683544303798e-06, + "loss": 0.0168, + "step": 18225 + }, + { + "epoch": 10.02, + "learning_rate": 5.5083544303797475e-06, + "loss": 0.0118, + "step": 18250 + }, + { + "epoch": 10.02, + "learning_rate": 5.502025316455697e-06, + "loss": 0.0119, + "step": 18275 + }, + { + "epoch": 10.02, + "learning_rate": 5.495696202531646e-06, + "loss": 0.0136, + "step": 18300 + }, + { + "epoch": 10.02, + "learning_rate": 5.489367088607596e-06, + "loss": 0.0132, + "step": 18325 + }, + { + "epoch": 10.02, + "learning_rate": 5.483037974683545e-06, + "loss": 0.0148, + "step": 18350 + }, + { + "epoch": 10.02, + "learning_rate": 5.476708860759495e-06, + "loss": 0.015, + "step": 18375 + }, + { + "epoch": 10.02, + "learning_rate": 5.470379746835443e-06, + "loss": 0.0154, + "step": 18400 + }, + { + "epoch": 10.02, + "learning_rate": 5.464050632911393e-06, + "loss": 0.0139, + "step": 18425 + }, + { + "epoch": 10.02, + "learning_rate": 5.457721518987342e-06, + "loss": 0.0239, + "step": 18450 + }, + { + "epoch": 10.02, + "learning_rate": 5.4513924050632915e-06, + "loss": 0.0203, + "step": 18475 + }, + { + "epoch": 10.02, + "learning_rate": 5.445063291139241e-06, + "loss": 0.0199, + "step": 18500 + }, + { + "epoch": 10.03, + "learning_rate": 5.43873417721519e-06, + "loss": 0.013, + "step": 18525 + }, + { + "epoch": 10.03, + "learning_rate": 5.43240506329114e-06, + "loss": 0.0128, + "step": 18550 + }, + { + "epoch": 10.03, + "learning_rate": 5.426075949367089e-06, + "loss": 0.0161, + "step": 18575 + }, + { + "epoch": 10.03, + "learning_rate": 5.419746835443039e-06, + "loss": 0.0129, + "step": 18600 + }, + { + "epoch": 10.03, + "learning_rate": 5.413417721518987e-06, + "loss": 0.0124, + "step": 18625 + }, + { + "epoch": 10.03, + "learning_rate": 5.407088607594937e-06, + "loss": 0.0161, + "step": 18650 + }, + { + "epoch": 10.03, + "learning_rate": 5.400759493670886e-06, + "loss": 0.0104, + "step": 18675 + }, + { + "epoch": 10.03, + "learning_rate": 5.3944303797468355e-06, + "loss": 0.0109, + "step": 18700 + }, + { + "epoch": 10.03, + "learning_rate": 5.388101265822785e-06, + "loss": 0.0128, + "step": 18725 + }, + { + "epoch": 10.03, + "learning_rate": 5.381772151898734e-06, + "loss": 0.014, + "step": 18750 + }, + { + "epoch": 10.03, + "learning_rate": 5.375443037974684e-06, + "loss": 0.0159, + "step": 18775 + }, + { + "epoch": 10.03, + "learning_rate": 5.369113924050633e-06, + "loss": 0.017, + "step": 18800 + }, + { + "epoch": 10.03, + "learning_rate": 5.362784810126583e-06, + "loss": 0.012, + "step": 18825 + }, + { + "epoch": 10.03, + "learning_rate": 5.356455696202531e-06, + "loss": 0.0165, + "step": 18850 + }, + { + "epoch": 10.03, + "learning_rate": 5.350126582278481e-06, + "loss": 0.0153, + "step": 18875 + }, + { + "epoch": 10.03, + "learning_rate": 5.343797468354432e-06, + "loss": 0.0182, + "step": 18900 + }, + { + "epoch": 10.04, + "learning_rate": 5.33746835443038e-06, + "loss": 0.0194, + "step": 18925 + }, + { + "epoch": 10.04, + "learning_rate": 5.33113924050633e-06, + "loss": 0.0144, + "step": 18950 + }, + { + "epoch": 10.04, + "learning_rate": 5.324810126582279e-06, + "loss": 0.0157, + "step": 18975 + }, + { + "epoch": 10.04, + "learning_rate": 5.318481012658229e-06, + "loss": 0.0111, + "step": 19000 + }, + { + "epoch": 10.04, + "eval_loss": 0.2696678638458252, + "eval_runtime": 1463.5986, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.445, + "eval_wer": 24.79877721588969, + "step": 19000 + }, + { + "epoch": 10.04, + "learning_rate": 5.312151898734178e-06, + "loss": 0.0176, + "step": 19025 + }, + { + "epoch": 10.04, + "learning_rate": 5.3058227848101275e-06, + "loss": 0.0144, + "step": 19050 + }, + { + "epoch": 10.04, + "learning_rate": 5.299493670886077e-06, + "loss": 0.0129, + "step": 19075 + }, + { + "epoch": 10.04, + "learning_rate": 5.293164556962026e-06, + "loss": 0.0162, + "step": 19100 + }, + { + "epoch": 10.04, + "learning_rate": 5.286835443037976e-06, + "loss": 0.0128, + "step": 19125 + }, + { + "epoch": 10.04, + "learning_rate": 5.280506329113924e-06, + "loss": 0.0152, + "step": 19150 + }, + { + "epoch": 10.04, + "learning_rate": 5.274177215189874e-06, + "loss": 0.0183, + "step": 19175 + }, + { + "epoch": 10.04, + "learning_rate": 5.267848101265823e-06, + "loss": 0.0198, + "step": 19200 + }, + { + "epoch": 10.04, + "learning_rate": 5.261518987341773e-06, + "loss": 0.0179, + "step": 19225 + }, + { + "epoch": 10.04, + "learning_rate": 5.255443037974683e-06, + "loss": 0.019, + "step": 19250 + }, + { + "epoch": 11.0, + "learning_rate": 5.249113924050633e-06, + "loss": 0.0159, + "step": 19275 + }, + { + "epoch": 11.0, + "learning_rate": 5.242784810126583e-06, + "loss": 0.0156, + "step": 19300 + }, + { + "epoch": 11.0, + "learning_rate": 5.236455696202532e-06, + "loss": 0.015, + "step": 19325 + }, + { + "epoch": 11.0, + "learning_rate": 5.230126582278482e-06, + "loss": 0.0162, + "step": 19350 + }, + { + "epoch": 11.0, + "learning_rate": 5.223797468354431e-06, + "loss": 0.0262, + "step": 19375 + }, + { + "epoch": 11.0, + "learning_rate": 5.217468354430381e-06, + "loss": 0.0207, + "step": 19400 + }, + { + "epoch": 11.0, + "learning_rate": 5.21113924050633e-06, + "loss": 0.0222, + "step": 19425 + }, + { + "epoch": 11.0, + "learning_rate": 5.2048101265822795e-06, + "loss": 0.016, + "step": 19450 + }, + { + "epoch": 11.01, + "learning_rate": 5.198481012658229e-06, + "loss": 0.0176, + "step": 19475 + }, + { + "epoch": 11.01, + "learning_rate": 5.192151898734178e-06, + "loss": 0.0209, + "step": 19500 + }, + { + "epoch": 11.01, + "learning_rate": 5.185822784810127e-06, + "loss": 0.0212, + "step": 19525 + }, + { + "epoch": 11.01, + "learning_rate": 5.179493670886076e-06, + "loss": 0.0143, + "step": 19550 + }, + { + "epoch": 11.01, + "learning_rate": 5.173164556962026e-06, + "loss": 0.0224, + "step": 19575 + }, + { + "epoch": 11.01, + "learning_rate": 5.166835443037975e-06, + "loss": 0.0171, + "step": 19600 + }, + { + "epoch": 11.01, + "learning_rate": 5.160506329113925e-06, + "loss": 0.0166, + "step": 19625 + }, + { + "epoch": 11.01, + "learning_rate": 5.154177215189874e-06, + "loss": 0.0135, + "step": 19650 + }, + { + "epoch": 11.01, + "learning_rate": 5.1478481012658235e-06, + "loss": 0.0164, + "step": 19675 + }, + { + "epoch": 11.01, + "learning_rate": 5.141518987341773e-06, + "loss": 0.0154, + "step": 19700 + }, + { + "epoch": 11.01, + "learning_rate": 5.135189873417722e-06, + "loss": 0.0183, + "step": 19725 + }, + { + "epoch": 11.01, + "learning_rate": 5.128860759493671e-06, + "loss": 0.0149, + "step": 19750 + }, + { + "epoch": 11.01, + "learning_rate": 5.12253164556962e-06, + "loss": 0.0176, + "step": 19775 + }, + { + "epoch": 11.01, + "learning_rate": 5.11620253164557e-06, + "loss": 0.0135, + "step": 19800 + }, + { + "epoch": 11.01, + "learning_rate": 5.109873417721519e-06, + "loss": 0.0121, + "step": 19825 + }, + { + "epoch": 11.01, + "learning_rate": 5.103544303797469e-06, + "loss": 0.0158, + "step": 19850 + }, + { + "epoch": 11.02, + "learning_rate": 5.097215189873418e-06, + "loss": 0.0191, + "step": 19875 + }, + { + "epoch": 11.02, + "learning_rate": 5.0908860759493675e-06, + "loss": 0.0127, + "step": 19900 + }, + { + "epoch": 11.02, + "learning_rate": 5.084556962025317e-06, + "loss": 0.0149, + "step": 19925 + }, + { + "epoch": 11.02, + "learning_rate": 5.0782278481012655e-06, + "loss": 0.0152, + "step": 19950 + }, + { + "epoch": 11.02, + "learning_rate": 5.071898734177215e-06, + "loss": 0.0139, + "step": 19975 + }, + { + "epoch": 11.02, + "learning_rate": 5.065569620253164e-06, + "loss": 0.015, + "step": 20000 + }, + { + "epoch": 11.02, + "eval_loss": 0.2623814344406128, + "eval_runtime": 1462.1621, + "eval_samples_per_second": 7.12, + "eval_steps_per_second": 0.445, + "eval_wer": 24.392266540919362, + "step": 20000 + }, + { + "epoch": 11.02, + "learning_rate": 5.059240506329114e-06, + "loss": 0.0094, + "step": 20025 + }, + { + "epoch": 11.02, + "learning_rate": 5.052911392405063e-06, + "loss": 0.0108, + "step": 20050 + }, + { + "epoch": 11.02, + "learning_rate": 5.046582278481013e-06, + "loss": 0.0155, + "step": 20075 + }, + { + "epoch": 11.02, + "learning_rate": 5.040253164556962e-06, + "loss": 0.0103, + "step": 20100 + }, + { + "epoch": 11.02, + "learning_rate": 5.033924050632912e-06, + "loss": 0.0109, + "step": 20125 + }, + { + "epoch": 11.02, + "learning_rate": 5.027594936708862e-06, + "loss": 0.0188, + "step": 20150 + }, + { + "epoch": 11.02, + "learning_rate": 5.021265822784811e-06, + "loss": 0.0178, + "step": 20175 + }, + { + "epoch": 11.02, + "learning_rate": 5.014936708860761e-06, + "loss": 0.0262, + "step": 20200 + }, + { + "epoch": 11.02, + "learning_rate": 5.00860759493671e-06, + "loss": 0.0194, + "step": 20225 + }, + { + "epoch": 11.02, + "learning_rate": 5.002278481012659e-06, + "loss": 0.0107, + "step": 20250 + }, + { + "epoch": 11.03, + "learning_rate": 4.995949367088608e-06, + "loss": 0.0147, + "step": 20275 + }, + { + "epoch": 11.03, + "learning_rate": 4.9896202531645575e-06, + "loss": 0.0084, + "step": 20300 + }, + { + "epoch": 11.03, + "learning_rate": 4.983291139240507e-06, + "loss": 0.0214, + "step": 20325 + }, + { + "epoch": 11.03, + "learning_rate": 4.976962025316456e-06, + "loss": 0.014, + "step": 20350 + }, + { + "epoch": 11.03, + "learning_rate": 4.970632911392406e-06, + "loss": 0.0129, + "step": 20375 + }, + { + "epoch": 11.03, + "learning_rate": 4.964303797468355e-06, + "loss": 0.0145, + "step": 20400 + }, + { + "epoch": 11.03, + "learning_rate": 4.957974683544305e-06, + "loss": 0.0109, + "step": 20425 + }, + { + "epoch": 11.03, + "learning_rate": 4.951645569620254e-06, + "loss": 0.0117, + "step": 20450 + }, + { + "epoch": 11.03, + "learning_rate": 4.945316455696203e-06, + "loss": 0.0137, + "step": 20475 + }, + { + "epoch": 11.03, + "learning_rate": 4.938987341772152e-06, + "loss": 0.0086, + "step": 20500 + }, + { + "epoch": 11.03, + "learning_rate": 4.9326582278481015e-06, + "loss": 0.0128, + "step": 20525 + }, + { + "epoch": 11.03, + "learning_rate": 4.926329113924051e-06, + "loss": 0.0101, + "step": 20550 + }, + { + "epoch": 11.03, + "learning_rate": 4.92e-06, + "loss": 0.0132, + "step": 20575 + }, + { + "epoch": 11.03, + "learning_rate": 4.91367088607595e-06, + "loss": 0.0147, + "step": 20600 + }, + { + "epoch": 11.03, + "learning_rate": 4.907341772151899e-06, + "loss": 0.0077, + "step": 20625 + }, + { + "epoch": 11.03, + "learning_rate": 4.901012658227849e-06, + "loss": 0.0125, + "step": 20650 + }, + { + "epoch": 11.04, + "learning_rate": 4.894683544303798e-06, + "loss": 0.0115, + "step": 20675 + }, + { + "epoch": 11.04, + "learning_rate": 4.888354430379747e-06, + "loss": 0.0136, + "step": 20700 + }, + { + "epoch": 11.04, + "learning_rate": 4.882025316455696e-06, + "loss": 0.0098, + "step": 20725 + }, + { + "epoch": 11.04, + "learning_rate": 4.875696202531646e-06, + "loss": 0.0154, + "step": 20750 + }, + { + "epoch": 11.04, + "learning_rate": 4.869367088607596e-06, + "loss": 0.0126, + "step": 20775 + }, + { + "epoch": 11.04, + "learning_rate": 4.863037974683545e-06, + "loss": 0.0116, + "step": 20800 + }, + { + "epoch": 11.04, + "learning_rate": 4.856708860759495e-06, + "loss": 0.0133, + "step": 20825 + }, + { + "epoch": 11.04, + "learning_rate": 4.850379746835443e-06, + "loss": 0.0106, + "step": 20850 + }, + { + "epoch": 11.04, + "learning_rate": 4.844050632911393e-06, + "loss": 0.0151, + "step": 20875 + }, + { + "epoch": 11.04, + "learning_rate": 4.837721518987342e-06, + "loss": 0.0148, + "step": 20900 + }, + { + "epoch": 11.04, + "learning_rate": 4.8313924050632915e-06, + "loss": 0.0173, + "step": 20925 + }, + { + "epoch": 11.04, + "learning_rate": 4.825063291139241e-06, + "loss": 0.0126, + "step": 20950 + }, + { + "epoch": 11.04, + "learning_rate": 4.81873417721519e-06, + "loss": 0.0163, + "step": 20975 + }, + { + "epoch": 11.04, + "learning_rate": 4.81240506329114e-06, + "loss": 0.0203, + "step": 21000 + }, + { + "epoch": 11.04, + "eval_loss": 0.27788785099983215, + "eval_runtime": 1546.852, + "eval_samples_per_second": 6.73, + "eval_steps_per_second": 0.421, + "eval_wer": 25.284963983154196, + "step": 21000 + }, + { + "epoch": 12.0, + "learning_rate": 4.806075949367089e-06, + "loss": 0.0176, + "step": 21025 + }, + { + "epoch": 12.0, + "learning_rate": 4.799746835443039e-06, + "loss": 0.0157, + "step": 21050 + }, + { + "epoch": 12.0, + "learning_rate": 4.793417721518987e-06, + "loss": 0.0192, + "step": 21075 + }, + { + "epoch": 12.0, + "learning_rate": 4.787088607594937e-06, + "loss": 0.0124, + "step": 21100 + }, + { + "epoch": 12.0, + "learning_rate": 4.780759493670886e-06, + "loss": 0.0194, + "step": 21125 + }, + { + "epoch": 12.0, + "learning_rate": 4.774430379746836e-06, + "loss": 0.0138, + "step": 21150 + }, + { + "epoch": 12.0, + "learning_rate": 4.768101265822786e-06, + "loss": 0.0191, + "step": 21175 + }, + { + "epoch": 12.0, + "learning_rate": 4.761772151898735e-06, + "loss": 0.0124, + "step": 21200 + }, + { + "epoch": 12.01, + "learning_rate": 4.755443037974684e-06, + "loss": 0.0175, + "step": 21225 + }, + { + "epoch": 12.01, + "learning_rate": 4.749113924050633e-06, + "loss": 0.0118, + "step": 21250 + }, + { + "epoch": 12.01, + "learning_rate": 4.742784810126583e-06, + "loss": 0.0172, + "step": 21275 + }, + { + "epoch": 12.01, + "learning_rate": 4.736455696202532e-06, + "loss": 0.0136, + "step": 21300 + }, + { + "epoch": 12.01, + "learning_rate": 4.7301265822784815e-06, + "loss": 0.018, + "step": 21325 + }, + { + "epoch": 12.01, + "learning_rate": 4.723797468354431e-06, + "loss": 0.0184, + "step": 21350 + }, + { + "epoch": 12.01, + "learning_rate": 4.71746835443038e-06, + "loss": 0.0166, + "step": 21375 + }, + { + "epoch": 12.01, + "learning_rate": 4.71113924050633e-06, + "loss": 0.0107, + "step": 21400 + }, + { + "epoch": 12.01, + "learning_rate": 4.704810126582279e-06, + "loss": 0.0125, + "step": 21425 + }, + { + "epoch": 12.01, + "learning_rate": 4.698481012658228e-06, + "loss": 0.0098, + "step": 21450 + }, + { + "epoch": 12.01, + "learning_rate": 4.692151898734177e-06, + "loss": 0.0103, + "step": 21475 + }, + { + "epoch": 12.01, + "learning_rate": 4.685822784810127e-06, + "loss": 0.0211, + "step": 21500 + }, + { + "epoch": 12.01, + "learning_rate": 4.679493670886076e-06, + "loss": 0.0125, + "step": 21525 + }, + { + "epoch": 12.01, + "learning_rate": 4.6731645569620255e-06, + "loss": 0.0083, + "step": 21550 + }, + { + "epoch": 12.01, + "learning_rate": 4.666835443037975e-06, + "loss": 0.0156, + "step": 21575 + }, + { + "epoch": 12.01, + "learning_rate": 4.660506329113924e-06, + "loss": 0.0137, + "step": 21600 + }, + { + "epoch": 12.02, + "learning_rate": 4.654177215189874e-06, + "loss": 0.0137, + "step": 21625 + }, + { + "epoch": 12.02, + "learning_rate": 4.647848101265823e-06, + "loss": 0.0092, + "step": 21650 + }, + { + "epoch": 12.02, + "learning_rate": 4.641518987341773e-06, + "loss": 0.0101, + "step": 21675 + }, + { + "epoch": 12.02, + "learning_rate": 4.635189873417722e-06, + "loss": 0.0118, + "step": 21700 + }, + { + "epoch": 12.02, + "learning_rate": 4.6288607594936715e-06, + "loss": 0.0119, + "step": 21725 + }, + { + "epoch": 12.02, + "learning_rate": 4.622531645569621e-06, + "loss": 0.009, + "step": 21750 + }, + { + "epoch": 12.02, + "learning_rate": 4.61620253164557e-06, + "loss": 0.014, + "step": 21775 + }, + { + "epoch": 12.02, + "learning_rate": 4.609873417721519e-06, + "loss": 0.0108, + "step": 21800 + }, + { + "epoch": 12.02, + "learning_rate": 4.603544303797468e-06, + "loss": 0.0117, + "step": 21825 + }, + { + "epoch": 12.02, + "learning_rate": 4.597215189873418e-06, + "loss": 0.0147, + "step": 21850 + }, + { + "epoch": 12.02, + "learning_rate": 4.590886075949367e-06, + "loss": 0.0133, + "step": 21875 + }, + { + "epoch": 12.02, + "learning_rate": 4.584556962025317e-06, + "loss": 0.0131, + "step": 21900 + }, + { + "epoch": 12.02, + "learning_rate": 4.578227848101266e-06, + "loss": 0.0156, + "step": 21925 + }, + { + "epoch": 12.02, + "learning_rate": 4.5718987341772155e-06, + "loss": 0.0211, + "step": 21950 + }, + { + "epoch": 12.02, + "learning_rate": 4.565569620253165e-06, + "loss": 0.0127, + "step": 21975 + }, + { + "epoch": 12.02, + "learning_rate": 4.559240506329114e-06, + "loss": 0.011, + "step": 22000 + }, + { + "epoch": 12.02, + "eval_loss": 0.26152506470680237, + "eval_runtime": 1497.9019, + "eval_samples_per_second": 6.95, + "eval_steps_per_second": 0.435, + "eval_wer": 26.236198962584755, + "step": 22000 + }, + { + "epoch": 12.03, + "learning_rate": 4.552911392405064e-06, + "loss": 0.0164, + "step": 22025 + }, + { + "epoch": 12.03, + "learning_rate": 4.546582278481013e-06, + "loss": 0.01, + "step": 22050 + }, + { + "epoch": 12.03, + "learning_rate": 4.540253164556963e-06, + "loss": 0.0127, + "step": 22075 + }, + { + "epoch": 12.03, + "learning_rate": 4.533924050632912e-06, + "loss": 0.0089, + "step": 22100 + }, + { + "epoch": 12.03, + "learning_rate": 4.5275949367088615e-06, + "loss": 0.0077, + "step": 22125 + }, + { + "epoch": 12.03, + "learning_rate": 4.521265822784811e-06, + "loss": 0.0098, + "step": 22150 + }, + { + "epoch": 12.03, + "learning_rate": 4.5149367088607595e-06, + "loss": 0.0084, + "step": 22175 + }, + { + "epoch": 12.03, + "learning_rate": 4.508607594936709e-06, + "loss": 0.0096, + "step": 22200 + }, + { + "epoch": 12.03, + "learning_rate": 4.502278481012658e-06, + "loss": 0.0097, + "step": 22225 + }, + { + "epoch": 12.03, + "learning_rate": 4.495949367088608e-06, + "loss": 0.0114, + "step": 22250 + }, + { + "epoch": 12.03, + "learning_rate": 4.489620253164557e-06, + "loss": 0.0109, + "step": 22275 + }, + { + "epoch": 12.03, + "learning_rate": 4.483291139240507e-06, + "loss": 0.0096, + "step": 22300 + }, + { + "epoch": 12.03, + "learning_rate": 4.476962025316456e-06, + "loss": 0.0117, + "step": 22325 + }, + { + "epoch": 12.03, + "learning_rate": 4.4706329113924055e-06, + "loss": 0.0105, + "step": 22350 + }, + { + "epoch": 12.03, + "learning_rate": 4.464303797468355e-06, + "loss": 0.0103, + "step": 22375 + }, + { + "epoch": 12.03, + "learning_rate": 4.457974683544304e-06, + "loss": 0.0085, + "step": 22400 + }, + { + "epoch": 12.04, + "learning_rate": 4.451645569620254e-06, + "loss": 0.0129, + "step": 22425 + }, + { + "epoch": 12.04, + "learning_rate": 4.445316455696203e-06, + "loss": 0.0188, + "step": 22450 + }, + { + "epoch": 12.04, + "learning_rate": 4.438987341772153e-06, + "loss": 0.013, + "step": 22475 + }, + { + "epoch": 12.04, + "learning_rate": 4.432658227848102e-06, + "loss": 0.0095, + "step": 22500 + }, + { + "epoch": 12.04, + "learning_rate": 4.4263291139240515e-06, + "loss": 0.0111, + "step": 22525 + }, + { + "epoch": 12.04, + "learning_rate": 4.42e-06, + "loss": 0.0115, + "step": 22550 + }, + { + "epoch": 12.04, + "learning_rate": 4.4136708860759495e-06, + "loss": 0.0096, + "step": 22575 + }, + { + "epoch": 12.04, + "learning_rate": 4.407341772151899e-06, + "loss": 0.0108, + "step": 22600 + }, + { + "epoch": 12.04, + "learning_rate": 4.401012658227848e-06, + "loss": 0.0118, + "step": 22625 + }, + { + "epoch": 12.04, + "learning_rate": 4.394683544303798e-06, + "loss": 0.0136, + "step": 22650 + }, + { + "epoch": 12.04, + "learning_rate": 4.388354430379747e-06, + "loss": 0.0176, + "step": 22675 + }, + { + "epoch": 12.04, + "learning_rate": 4.382025316455697e-06, + "loss": 0.011, + "step": 22700 + }, + { + "epoch": 12.04, + "learning_rate": 4.375696202531646e-06, + "loss": 0.0125, + "step": 22725 + }, + { + "epoch": 12.04, + "learning_rate": 4.3693670886075955e-06, + "loss": 0.0164, + "step": 22750 + }, + { + "epoch": 13.0, + "learning_rate": 4.363037974683544e-06, + "loss": 0.0192, + "step": 22775 + }, + { + "epoch": 13.0, + "learning_rate": 4.356708860759494e-06, + "loss": 0.0145, + "step": 22800 + }, + { + "epoch": 13.0, + "learning_rate": 4.350379746835444e-06, + "loss": 0.0174, + "step": 22825 + }, + { + "epoch": 13.0, + "learning_rate": 4.344050632911393e-06, + "loss": 0.0142, + "step": 22850 + }, + { + "epoch": 13.0, + "learning_rate": 4.337721518987343e-06, + "loss": 0.0228, + "step": 22875 + }, + { + "epoch": 13.0, + "learning_rate": 4.331392405063292e-06, + "loss": 0.0201, + "step": 22900 + }, + { + "epoch": 13.0, + "learning_rate": 4.325063291139241e-06, + "loss": 0.0188, + "step": 22925 + }, + { + "epoch": 13.0, + "learning_rate": 4.31873417721519e-06, + "loss": 0.0185, + "step": 22950 + }, + { + "epoch": 13.01, + "learning_rate": 4.3124050632911395e-06, + "loss": 0.0186, + "step": 22975 + }, + { + "epoch": 13.01, + "learning_rate": 4.306075949367089e-06, + "loss": 0.0191, + "step": 23000 + }, + { + "epoch": 13.01, + "eval_loss": 0.25467297434806824, + "eval_runtime": 1486.1182, + "eval_samples_per_second": 7.005, + "eval_steps_per_second": 0.438, + "eval_wer": 23.982503780549276, + "step": 23000 + }, + { + "epoch": 13.01, + "learning_rate": 4.299746835443038e-06, + "loss": 0.0145, + "step": 23025 + }, + { + "epoch": 13.01, + "learning_rate": 4.293417721518988e-06, + "loss": 0.0106, + "step": 23050 + }, + { + "epoch": 13.01, + "learning_rate": 4.287088607594937e-06, + "loss": 0.0135, + "step": 23075 + }, + { + "epoch": 13.01, + "learning_rate": 4.280759493670887e-06, + "loss": 0.0173, + "step": 23100 + }, + { + "epoch": 13.01, + "learning_rate": 4.274430379746836e-06, + "loss": 0.0126, + "step": 23125 + }, + { + "epoch": 13.01, + "learning_rate": 4.268101265822785e-06, + "loss": 0.0166, + "step": 23150 + }, + { + "epoch": 13.01, + "learning_rate": 4.261772151898734e-06, + "loss": 0.0097, + "step": 23175 + }, + { + "epoch": 13.01, + "learning_rate": 4.255443037974684e-06, + "loss": 0.0094, + "step": 23200 + }, + { + "epoch": 13.01, + "learning_rate": 4.249113924050634e-06, + "loss": 0.012, + "step": 23225 + }, + { + "epoch": 13.01, + "learning_rate": 4.242784810126583e-06, + "loss": 0.0153, + "step": 23250 + }, + { + "epoch": 13.01, + "learning_rate": 4.236455696202532e-06, + "loss": 0.0169, + "step": 23275 + }, + { + "epoch": 13.01, + "learning_rate": 4.230379746835443e-06, + "loss": 0.0097, + "step": 23300 + }, + { + "epoch": 13.01, + "learning_rate": 4.224050632911393e-06, + "loss": 0.0098, + "step": 23325 + }, + { + "epoch": 13.01, + "learning_rate": 4.217721518987342e-06, + "loss": 0.0129, + "step": 23350 + }, + { + "epoch": 13.02, + "learning_rate": 4.2113924050632915e-06, + "loss": 0.0157, + "step": 23375 + }, + { + "epoch": 13.02, + "learning_rate": 4.205063291139241e-06, + "loss": 0.0088, + "step": 23400 + }, + { + "epoch": 13.02, + "learning_rate": 4.19873417721519e-06, + "loss": 0.0105, + "step": 23425 + }, + { + "epoch": 13.02, + "learning_rate": 4.19240506329114e-06, + "loss": 0.0131, + "step": 23450 + }, + { + "epoch": 13.02, + "learning_rate": 4.186075949367089e-06, + "loss": 0.0133, + "step": 23475 + }, + { + "epoch": 13.02, + "learning_rate": 4.179746835443038e-06, + "loss": 0.0054, + "step": 23500 + }, + { + "epoch": 13.02, + "learning_rate": 4.173417721518987e-06, + "loss": 0.01, + "step": 23525 + }, + { + "epoch": 13.02, + "learning_rate": 4.167088607594937e-06, + "loss": 0.013, + "step": 23550 + }, + { + "epoch": 13.02, + "learning_rate": 4.160759493670886e-06, + "loss": 0.0141, + "step": 23575 + }, + { + "epoch": 13.02, + "learning_rate": 4.154430379746836e-06, + "loss": 0.0126, + "step": 23600 + }, + { + "epoch": 13.02, + "learning_rate": 4.148101265822786e-06, + "loss": 0.015, + "step": 23625 + }, + { + "epoch": 13.02, + "learning_rate": 4.141772151898734e-06, + "loss": 0.0164, + "step": 23650 + }, + { + "epoch": 13.02, + "learning_rate": 4.135443037974684e-06, + "loss": 0.0135, + "step": 23675 + }, + { + "epoch": 13.02, + "learning_rate": 4.129113924050633e-06, + "loss": 0.0151, + "step": 23700 + }, + { + "epoch": 13.02, + "learning_rate": 4.122784810126583e-06, + "loss": 0.0157, + "step": 23725 + }, + { + "epoch": 13.02, + "learning_rate": 4.116455696202532e-06, + "loss": 0.0087, + "step": 23750 + }, + { + "epoch": 13.03, + "learning_rate": 4.1101265822784815e-06, + "loss": 0.0097, + "step": 23775 + }, + { + "epoch": 13.03, + "learning_rate": 4.103797468354431e-06, + "loss": 0.0141, + "step": 23800 + }, + { + "epoch": 13.03, + "learning_rate": 4.09746835443038e-06, + "loss": 0.0079, + "step": 23825 + }, + { + "epoch": 13.03, + "learning_rate": 4.09113924050633e-06, + "loss": 0.0116, + "step": 23850 + }, + { + "epoch": 13.03, + "learning_rate": 4.084810126582278e-06, + "loss": 0.0093, + "step": 23875 + }, + { + "epoch": 13.03, + "learning_rate": 4.078481012658228e-06, + "loss": 0.0149, + "step": 23900 + }, + { + "epoch": 13.03, + "learning_rate": 4.072151898734177e-06, + "loss": 0.0125, + "step": 23925 + }, + { + "epoch": 13.03, + "learning_rate": 4.065822784810127e-06, + "loss": 0.0106, + "step": 23950 + }, + { + "epoch": 13.03, + "learning_rate": 4.059493670886076e-06, + "loss": 0.0124, + "step": 23975 + }, + { + "epoch": 13.03, + "learning_rate": 4.0531645569620255e-06, + "loss": 0.0074, + "step": 24000 + }, + { + "epoch": 13.03, + "eval_loss": 0.27036768198013306, + "eval_runtime": 1458.9195, + "eval_samples_per_second": 7.136, + "eval_steps_per_second": 0.446, + "eval_wer": 24.153238264036816, + "step": 24000 + }, + { + "epoch": 13.03, + "learning_rate": 4.046835443037975e-06, + "loss": 0.0128, + "step": 24025 + }, + { + "epoch": 13.03, + "learning_rate": 4.040506329113924e-06, + "loss": 0.012, + "step": 24050 + }, + { + "epoch": 13.03, + "learning_rate": 4.034177215189874e-06, + "loss": 0.0098, + "step": 24075 + }, + { + "epoch": 13.03, + "learning_rate": 4.027848101265823e-06, + "loss": 0.0091, + "step": 24100 + }, + { + "epoch": 13.03, + "learning_rate": 4.021518987341773e-06, + "loss": 0.0077, + "step": 24125 + }, + { + "epoch": 13.03, + "learning_rate": 4.015189873417722e-06, + "loss": 0.0135, + "step": 24150 + }, + { + "epoch": 13.04, + "learning_rate": 4.0088607594936715e-06, + "loss": 0.0124, + "step": 24175 + }, + { + "epoch": 13.04, + "learning_rate": 4.002531645569621e-06, + "loss": 0.0152, + "step": 24200 + }, + { + "epoch": 13.04, + "learning_rate": 3.99620253164557e-06, + "loss": 0.0127, + "step": 24225 + }, + { + "epoch": 13.04, + "learning_rate": 3.989873417721519e-06, + "loss": 0.0066, + "step": 24250 + }, + { + "epoch": 13.04, + "learning_rate": 3.983544303797468e-06, + "loss": 0.0163, + "step": 24275 + }, + { + "epoch": 13.04, + "learning_rate": 3.977215189873418e-06, + "loss": 0.0095, + "step": 24300 + }, + { + "epoch": 13.04, + "learning_rate": 3.970886075949367e-06, + "loss": 0.0103, + "step": 24325 + }, + { + "epoch": 13.04, + "learning_rate": 3.964556962025317e-06, + "loss": 0.0112, + "step": 24350 + }, + { + "epoch": 13.04, + "learning_rate": 3.958227848101266e-06, + "loss": 0.01, + "step": 24375 + }, + { + "epoch": 13.04, + "learning_rate": 3.9518987341772155e-06, + "loss": 0.0131, + "step": 24400 + }, + { + "epoch": 13.04, + "learning_rate": 3.945569620253165e-06, + "loss": 0.0143, + "step": 24425 + }, + { + "epoch": 13.04, + "learning_rate": 3.939240506329114e-06, + "loss": 0.0151, + "step": 24450 + }, + { + "epoch": 13.04, + "learning_rate": 3.932911392405064e-06, + "loss": 0.0129, + "step": 24475 + }, + { + "epoch": 13.04, + "learning_rate": 3.926582278481013e-06, + "loss": 0.0125, + "step": 24500 + }, + { + "epoch": 14.0, + "learning_rate": 3.920506329113925e-06, + "loss": 0.0165, + "step": 24525 + }, + { + "epoch": 14.0, + "learning_rate": 3.914177215189874e-06, + "loss": 0.0114, + "step": 24550 + }, + { + "epoch": 14.0, + "learning_rate": 3.9078481012658235e-06, + "loss": 0.0111, + "step": 24575 + }, + { + "epoch": 14.0, + "learning_rate": 3.901518987341772e-06, + "loss": 0.0134, + "step": 24600 + }, + { + "epoch": 14.0, + "learning_rate": 3.8951898734177215e-06, + "loss": 0.0138, + "step": 24625 + }, + { + "epoch": 14.0, + "learning_rate": 3.888860759493671e-06, + "loss": 0.0151, + "step": 24650 + }, + { + "epoch": 14.0, + "learning_rate": 3.88253164556962e-06, + "loss": 0.0101, + "step": 24675 + }, + { + "epoch": 14.0, + "learning_rate": 3.87620253164557e-06, + "loss": 0.0108, + "step": 24700 + }, + { + "epoch": 14.01, + "learning_rate": 3.869873417721519e-06, + "loss": 0.0151, + "step": 24725 + }, + { + "epoch": 14.01, + "learning_rate": 3.863544303797469e-06, + "loss": 0.0132, + "step": 24750 + }, + { + "epoch": 14.01, + "learning_rate": 3.857215189873418e-06, + "loss": 0.012, + "step": 24775 + }, + { + "epoch": 14.01, + "learning_rate": 3.8508860759493675e-06, + "loss": 0.017, + "step": 24800 + }, + { + "epoch": 14.01, + "learning_rate": 3.844556962025317e-06, + "loss": 0.0141, + "step": 24825 + }, + { + "epoch": 14.01, + "learning_rate": 3.838227848101266e-06, + "loss": 0.0166, + "step": 24850 + }, + { + "epoch": 14.01, + "learning_rate": 3.831898734177216e-06, + "loss": 0.0112, + "step": 24875 + }, + { + "epoch": 14.01, + "learning_rate": 3.825569620253165e-06, + "loss": 0.0097, + "step": 24900 + }, + { + "epoch": 14.01, + "learning_rate": 3.819240506329115e-06, + "loss": 0.0138, + "step": 24925 + }, + { + "epoch": 14.01, + "learning_rate": 3.8129113924050636e-06, + "loss": 0.0069, + "step": 24950 + }, + { + "epoch": 14.01, + "learning_rate": 3.806582278481013e-06, + "loss": 0.0111, + "step": 24975 + }, + { + "epoch": 14.01, + "learning_rate": 3.8002531645569625e-06, + "loss": 0.0149, + "step": 25000 + }, + { + "epoch": 14.01, + "eval_loss": 0.2640444040298462, + "eval_runtime": 1478.4977, + "eval_samples_per_second": 7.042, + "eval_steps_per_second": 0.44, + "eval_wer": 24.063805915543345, + "step": 25000 + }, + { + "epoch": 14.01, + "learning_rate": 3.7939240506329115e-06, + "loss": 0.0176, + "step": 25025 + }, + { + "epoch": 14.01, + "learning_rate": 3.787594936708861e-06, + "loss": 0.0104, + "step": 25050 + }, + { + "epoch": 14.01, + "learning_rate": 3.7812658227848103e-06, + "loss": 0.0084, + "step": 25075 + }, + { + "epoch": 14.01, + "learning_rate": 3.7749367088607598e-06, + "loss": 0.0118, + "step": 25100 + }, + { + "epoch": 14.02, + "learning_rate": 3.768607594936709e-06, + "loss": 0.0099, + "step": 25125 + }, + { + "epoch": 14.02, + "learning_rate": 3.762278481012658e-06, + "loss": 0.0089, + "step": 25150 + }, + { + "epoch": 14.02, + "learning_rate": 3.7559493670886076e-06, + "loss": 0.0119, + "step": 25175 + }, + { + "epoch": 14.02, + "learning_rate": 3.749620253164557e-06, + "loss": 0.0118, + "step": 25200 + }, + { + "epoch": 14.02, + "learning_rate": 3.743291139240507e-06, + "loss": 0.0108, + "step": 25225 + }, + { + "epoch": 14.02, + "learning_rate": 3.7369620253164563e-06, + "loss": 0.0101, + "step": 25250 + }, + { + "epoch": 14.02, + "learning_rate": 3.7306329113924058e-06, + "loss": 0.0095, + "step": 25275 + }, + { + "epoch": 14.02, + "learning_rate": 3.7243037974683548e-06, + "loss": 0.0106, + "step": 25300 + }, + { + "epoch": 14.02, + "learning_rate": 3.717974683544304e-06, + "loss": 0.0135, + "step": 25325 + }, + { + "epoch": 14.02, + "learning_rate": 3.7116455696202536e-06, + "loss": 0.0115, + "step": 25350 + }, + { + "epoch": 14.02, + "learning_rate": 3.705316455696203e-06, + "loss": 0.0114, + "step": 25375 + }, + { + "epoch": 14.02, + "learning_rate": 3.698987341772152e-06, + "loss": 0.0119, + "step": 25400 + }, + { + "epoch": 14.02, + "learning_rate": 3.6926582278481015e-06, + "loss": 0.0123, + "step": 25425 + }, + { + "epoch": 14.02, + "learning_rate": 3.686329113924051e-06, + "loss": 0.0138, + "step": 25450 + }, + { + "epoch": 14.02, + "learning_rate": 3.6800000000000003e-06, + "loss": 0.0133, + "step": 25475 + }, + { + "epoch": 14.02, + "learning_rate": 3.6736708860759498e-06, + "loss": 0.0106, + "step": 25500 + }, + { + "epoch": 14.03, + "learning_rate": 3.6673417721518988e-06, + "loss": 0.0099, + "step": 25525 + }, + { + "epoch": 14.03, + "learning_rate": 3.661012658227848e-06, + "loss": 0.0104, + "step": 25550 + }, + { + "epoch": 14.03, + "learning_rate": 3.6546835443037976e-06, + "loss": 0.0143, + "step": 25575 + }, + { + "epoch": 14.03, + "learning_rate": 3.648354430379747e-06, + "loss": 0.0096, + "step": 25600 + }, + { + "epoch": 14.03, + "learning_rate": 3.642025316455696e-06, + "loss": 0.0094, + "step": 25625 + }, + { + "epoch": 14.03, + "learning_rate": 3.6356962025316463e-06, + "loss": 0.0162, + "step": 25650 + }, + { + "epoch": 14.03, + "learning_rate": 3.6293670886075953e-06, + "loss": 0.008, + "step": 25675 + }, + { + "epoch": 14.03, + "learning_rate": 3.6230379746835448e-06, + "loss": 0.0094, + "step": 25700 + }, + { + "epoch": 14.03, + "learning_rate": 3.616708860759494e-06, + "loss": 0.0122, + "step": 25725 + }, + { + "epoch": 14.03, + "learning_rate": 3.6103797468354436e-06, + "loss": 0.0128, + "step": 25750 + }, + { + "epoch": 14.03, + "learning_rate": 3.6040506329113926e-06, + "loss": 0.0117, + "step": 25775 + }, + { + "epoch": 14.03, + "learning_rate": 3.597721518987342e-06, + "loss": 0.0067, + "step": 25800 + }, + { + "epoch": 14.03, + "learning_rate": 3.5913924050632915e-06, + "loss": 0.0078, + "step": 25825 + }, + { + "epoch": 14.03, + "learning_rate": 3.585063291139241e-06, + "loss": 0.0071, + "step": 25850 + }, + { + "epoch": 14.03, + "learning_rate": 3.57873417721519e-06, + "loss": 0.0093, + "step": 25875 + }, + { + "epoch": 14.03, + "learning_rate": 3.5724050632911393e-06, + "loss": 0.0064, + "step": 25900 + }, + { + "epoch": 14.04, + "learning_rate": 3.5660759493670888e-06, + "loss": 0.0067, + "step": 25925 + }, + { + "epoch": 14.04, + "learning_rate": 3.559746835443038e-06, + "loss": 0.0076, + "step": 25950 + }, + { + "epoch": 14.04, + "learning_rate": 3.5534177215189876e-06, + "loss": 0.0099, + "step": 25975 + }, + { + "epoch": 14.04, + "learning_rate": 3.5470886075949366e-06, + "loss": 0.0111, + "step": 26000 + }, + { + "epoch": 14.04, + "eval_loss": 0.2734878659248352, + "eval_runtime": 1456.7487, + "eval_samples_per_second": 7.147, + "eval_steps_per_second": 0.447, + "eval_wer": 24.359745686921737, + "step": 26000 + }, + { + "epoch": 14.04, + "learning_rate": 3.540759493670886e-06, + "loss": 0.0134, + "step": 26025 + }, + { + "epoch": 14.04, + "learning_rate": 3.534430379746836e-06, + "loss": 0.008, + "step": 26050 + }, + { + "epoch": 14.04, + "learning_rate": 3.5281012658227853e-06, + "loss": 0.0138, + "step": 26075 + }, + { + "epoch": 14.04, + "learning_rate": 3.5217721518987348e-06, + "loss": 0.0075, + "step": 26100 + }, + { + "epoch": 14.04, + "learning_rate": 3.515443037974684e-06, + "loss": 0.0097, + "step": 26125 + }, + { + "epoch": 14.04, + "learning_rate": 3.509113924050633e-06, + "loss": 0.0139, + "step": 26150 + }, + { + "epoch": 14.04, + "learning_rate": 3.5027848101265826e-06, + "loss": 0.0099, + "step": 26175 + }, + { + "epoch": 14.04, + "learning_rate": 3.496455696202532e-06, + "loss": 0.0148, + "step": 26200 + }, + { + "epoch": 14.04, + "learning_rate": 3.4901265822784815e-06, + "loss": 0.0119, + "step": 26225 + }, + { + "epoch": 14.04, + "learning_rate": 3.4837974683544305e-06, + "loss": 0.0086, + "step": 26250 + }, + { + "epoch": 15.0, + "learning_rate": 3.47746835443038e-06, + "loss": 0.0091, + "step": 26275 + }, + { + "epoch": 15.0, + "learning_rate": 3.4711392405063293e-06, + "loss": 0.0105, + "step": 26300 + }, + { + "epoch": 15.0, + "learning_rate": 3.465063291139241e-06, + "loss": 0.0138, + "step": 26325 + }, + { + "epoch": 15.0, + "learning_rate": 3.45873417721519e-06, + "loss": 0.0185, + "step": 26350 + }, + { + "epoch": 15.0, + "learning_rate": 3.4524050632911392e-06, + "loss": 0.015, + "step": 26375 + }, + { + "epoch": 15.0, + "learning_rate": 3.4460759493670886e-06, + "loss": 0.0177, + "step": 26400 + }, + { + "epoch": 15.0, + "learning_rate": 3.439746835443038e-06, + "loss": 0.014, + "step": 26425 + }, + { + "epoch": 15.0, + "learning_rate": 3.433417721518988e-06, + "loss": 0.0127, + "step": 26450 + }, + { + "epoch": 15.01, + "learning_rate": 3.4270886075949374e-06, + "loss": 0.0135, + "step": 26475 + }, + { + "epoch": 15.01, + "learning_rate": 3.4207594936708864e-06, + "loss": 0.0171, + "step": 26500 + }, + { + "epoch": 15.01, + "learning_rate": 3.4144303797468358e-06, + "loss": 0.0114, + "step": 26525 + }, + { + "epoch": 15.01, + "learning_rate": 3.4081012658227852e-06, + "loss": 0.0099, + "step": 26550 + }, + { + "epoch": 15.01, + "learning_rate": 3.4017721518987346e-06, + "loss": 0.0126, + "step": 26575 + }, + { + "epoch": 15.01, + "learning_rate": 3.395443037974684e-06, + "loss": 0.0116, + "step": 26600 + }, + { + "epoch": 15.01, + "learning_rate": 3.389113924050633e-06, + "loss": 0.0087, + "step": 26625 + }, + { + "epoch": 15.01, + "learning_rate": 3.3827848101265825e-06, + "loss": 0.0073, + "step": 26650 + }, + { + "epoch": 15.01, + "learning_rate": 3.376455696202532e-06, + "loss": 0.0102, + "step": 26675 + }, + { + "epoch": 15.01, + "learning_rate": 3.3701265822784814e-06, + "loss": 0.0106, + "step": 26700 + }, + { + "epoch": 15.01, + "learning_rate": 3.3637974683544304e-06, + "loss": 0.0094, + "step": 26725 + }, + { + "epoch": 15.01, + "learning_rate": 3.35746835443038e-06, + "loss": 0.0113, + "step": 26750 + }, + { + "epoch": 15.01, + "learning_rate": 3.3511392405063292e-06, + "loss": 0.0169, + "step": 26775 + }, + { + "epoch": 15.01, + "learning_rate": 3.3448101265822786e-06, + "loss": 0.0084, + "step": 26800 + }, + { + "epoch": 15.01, + "learning_rate": 3.338481012658228e-06, + "loss": 0.0153, + "step": 26825 + }, + { + "epoch": 15.01, + "learning_rate": 3.332151898734177e-06, + "loss": 0.012, + "step": 26850 + }, + { + "epoch": 15.02, + "learning_rate": 3.325822784810127e-06, + "loss": 0.0106, + "step": 26875 + }, + { + "epoch": 15.02, + "learning_rate": 3.3194936708860764e-06, + "loss": 0.0099, + "step": 26900 + }, + { + "epoch": 15.02, + "learning_rate": 3.3131645569620258e-06, + "loss": 0.008, + "step": 26925 + }, + { + "epoch": 15.02, + "learning_rate": 3.306835443037975e-06, + "loss": 0.0076, + "step": 26950 + }, + { + "epoch": 15.02, + "learning_rate": 3.3005063291139246e-06, + "loss": 0.0088, + "step": 26975 + }, + { + "epoch": 15.02, + "learning_rate": 3.2941772151898736e-06, + "loss": 0.009, + "step": 27000 + }, + { + "epoch": 15.02, + "eval_loss": 0.2693029046058655, + "eval_runtime": 1479.1374, + "eval_samples_per_second": 7.039, + "eval_steps_per_second": 0.44, + "eval_wer": 24.28819980812696, + "step": 27000 + }, + { + "epoch": 15.02, + "learning_rate": 3.287848101265823e-06, + "loss": 0.0115, + "step": 27025 + }, + { + "epoch": 15.02, + "learning_rate": 3.2815189873417725e-06, + "loss": 0.0099, + "step": 27050 + }, + { + "epoch": 15.02, + "learning_rate": 3.275189873417722e-06, + "loss": 0.0073, + "step": 27075 + }, + { + "epoch": 15.02, + "learning_rate": 3.268860759493671e-06, + "loss": 0.0061, + "step": 27100 + }, + { + "epoch": 15.02, + "learning_rate": 3.2625316455696204e-06, + "loss": 0.007, + "step": 27125 + }, + { + "epoch": 15.02, + "learning_rate": 3.2562025316455698e-06, + "loss": 0.0109, + "step": 27150 + }, + { + "epoch": 15.02, + "learning_rate": 3.2498734177215192e-06, + "loss": 0.0123, + "step": 27175 + }, + { + "epoch": 15.02, + "learning_rate": 3.2435443037974686e-06, + "loss": 0.0121, + "step": 27200 + }, + { + "epoch": 15.02, + "learning_rate": 3.2372151898734176e-06, + "loss": 0.0129, + "step": 27225 + }, + { + "epoch": 15.02, + "learning_rate": 3.230886075949367e-06, + "loss": 0.0128, + "step": 27250 + }, + { + "epoch": 15.03, + "learning_rate": 3.224556962025317e-06, + "loss": 0.0094, + "step": 27275 + }, + { + "epoch": 15.03, + "learning_rate": 3.2182278481012664e-06, + "loss": 0.0081, + "step": 27300 + }, + { + "epoch": 15.03, + "learning_rate": 3.2118987341772158e-06, + "loss": 0.0087, + "step": 27325 + }, + { + "epoch": 15.03, + "learning_rate": 3.2055696202531648e-06, + "loss": 0.0131, + "step": 27350 + }, + { + "epoch": 15.03, + "learning_rate": 3.199240506329114e-06, + "loss": 0.0073, + "step": 27375 + }, + { + "epoch": 15.03, + "learning_rate": 3.1929113924050636e-06, + "loss": 0.0086, + "step": 27400 + }, + { + "epoch": 15.03, + "learning_rate": 3.186582278481013e-06, + "loss": 0.0108, + "step": 27425 + }, + { + "epoch": 15.03, + "learning_rate": 3.1802531645569625e-06, + "loss": 0.01, + "step": 27450 + }, + { + "epoch": 15.03, + "learning_rate": 3.1739240506329115e-06, + "loss": 0.0123, + "step": 27475 + }, + { + "epoch": 15.03, + "learning_rate": 3.167594936708861e-06, + "loss": 0.0081, + "step": 27500 + }, + { + "epoch": 15.03, + "learning_rate": 3.1612658227848104e-06, + "loss": 0.0123, + "step": 27525 + }, + { + "epoch": 15.03, + "learning_rate": 3.1549367088607598e-06, + "loss": 0.0096, + "step": 27550 + }, + { + "epoch": 15.03, + "learning_rate": 3.1486075949367088e-06, + "loss": 0.0054, + "step": 27575 + }, + { + "epoch": 15.03, + "learning_rate": 3.1422784810126582e-06, + "loss": 0.006, + "step": 27600 + }, + { + "epoch": 15.03, + "learning_rate": 3.1359493670886076e-06, + "loss": 0.0089, + "step": 27625 + }, + { + "epoch": 15.03, + "learning_rate": 3.129620253164557e-06, + "loss": 0.0109, + "step": 27650 + }, + { + "epoch": 15.04, + "learning_rate": 3.1232911392405065e-06, + "loss": 0.0089, + "step": 27675 + }, + { + "epoch": 15.04, + "learning_rate": 3.1169620253164563e-06, + "loss": 0.0094, + "step": 27700 + }, + { + "epoch": 15.04, + "learning_rate": 3.1106329113924054e-06, + "loss": 0.0102, + "step": 27725 + }, + { + "epoch": 15.04, + "learning_rate": 3.1043037974683548e-06, + "loss": 0.0097, + "step": 27750 + }, + { + "epoch": 15.04, + "learning_rate": 3.097974683544304e-06, + "loss": 0.0078, + "step": 27775 + }, + { + "epoch": 15.04, + "learning_rate": 3.0916455696202536e-06, + "loss": 0.0073, + "step": 27800 + }, + { + "epoch": 15.04, + "learning_rate": 3.085316455696203e-06, + "loss": 0.0105, + "step": 27825 + }, + { + "epoch": 15.04, + "learning_rate": 3.078987341772152e-06, + "loss": 0.0083, + "step": 27850 + }, + { + "epoch": 15.04, + "learning_rate": 3.0726582278481015e-06, + "loss": 0.0138, + "step": 27875 + }, + { + "epoch": 15.04, + "learning_rate": 3.066329113924051e-06, + "loss": 0.0049, + "step": 27900 + }, + { + "epoch": 15.04, + "learning_rate": 3.0600000000000003e-06, + "loss": 0.0107, + "step": 27925 + }, + { + "epoch": 15.04, + "learning_rate": 3.0536708860759494e-06, + "loss": 0.0105, + "step": 27950 + }, + { + "epoch": 15.04, + "learning_rate": 3.0473417721518988e-06, + "loss": 0.0134, + "step": 27975 + }, + { + "epoch": 15.04, + "learning_rate": 3.041012658227848e-06, + "loss": 0.0137, + "step": 28000 + }, + { + "epoch": 15.04, + "eval_loss": 0.2780563235282898, + "eval_runtime": 1465.9542, + "eval_samples_per_second": 7.102, + "eval_steps_per_second": 0.444, + "eval_wer": 25.010162766874256, + "step": 28000 + }, + { + "epoch": 16.0, + "learning_rate": 3.0346835443037976e-06, + "loss": 0.009, + "step": 28025 + }, + { + "epoch": 16.0, + "learning_rate": 3.028354430379747e-06, + "loss": 0.0081, + "step": 28050 + }, + { + "epoch": 16.0, + "learning_rate": 3.022025316455696e-06, + "loss": 0.0109, + "step": 28075 + }, + { + "epoch": 16.0, + "learning_rate": 3.015696202531646e-06, + "loss": 0.0132, + "step": 28100 + }, + { + "epoch": 16.0, + "learning_rate": 3.0093670886075953e-06, + "loss": 0.0142, + "step": 28125 + }, + { + "epoch": 16.0, + "learning_rate": 3.0030379746835448e-06, + "loss": 0.0152, + "step": 28150 + }, + { + "epoch": 16.0, + "learning_rate": 2.996708860759494e-06, + "loss": 0.0074, + "step": 28175 + }, + { + "epoch": 16.0, + "learning_rate": 2.990379746835443e-06, + "loss": 0.0129, + "step": 28200 + }, + { + "epoch": 16.01, + "learning_rate": 2.9840506329113926e-06, + "loss": 0.0118, + "step": 28225 + }, + { + "epoch": 16.01, + "learning_rate": 2.977721518987342e-06, + "loss": 0.0153, + "step": 28250 + }, + { + "epoch": 16.01, + "learning_rate": 2.9713924050632915e-06, + "loss": 0.0121, + "step": 28275 + }, + { + "epoch": 16.01, + "learning_rate": 2.965063291139241e-06, + "loss": 0.0097, + "step": 28300 + }, + { + "epoch": 16.01, + "learning_rate": 2.95873417721519e-06, + "loss": 0.0083, + "step": 28325 + }, + { + "epoch": 16.01, + "learning_rate": 2.9524050632911393e-06, + "loss": 0.01, + "step": 28350 + }, + { + "epoch": 16.01, + "learning_rate": 2.9460759493670888e-06, + "loss": 0.0084, + "step": 28375 + }, + { + "epoch": 16.01, + "learning_rate": 2.939746835443038e-06, + "loss": 0.0124, + "step": 28400 + }, + { + "epoch": 16.01, + "learning_rate": 2.933417721518987e-06, + "loss": 0.0071, + "step": 28425 + }, + { + "epoch": 16.01, + "learning_rate": 2.9270886075949366e-06, + "loss": 0.0113, + "step": 28450 + }, + { + "epoch": 16.01, + "learning_rate": 2.920759493670886e-06, + "loss": 0.0085, + "step": 28475 + }, + { + "epoch": 16.01, + "learning_rate": 2.914430379746836e-06, + "loss": 0.0133, + "step": 28500 + }, + { + "epoch": 16.01, + "learning_rate": 2.9081012658227853e-06, + "loss": 0.014, + "step": 28525 + }, + { + "epoch": 16.01, + "learning_rate": 2.9017721518987348e-06, + "loss": 0.0105, + "step": 28550 + }, + { + "epoch": 16.01, + "learning_rate": 2.8954430379746838e-06, + "loss": 0.01, + "step": 28575 + }, + { + "epoch": 16.01, + "learning_rate": 2.889113924050633e-06, + "loss": 0.0135, + "step": 28600 + }, + { + "epoch": 16.02, + "learning_rate": 2.8827848101265826e-06, + "loss": 0.0118, + "step": 28625 + }, + { + "epoch": 16.02, + "learning_rate": 2.876455696202532e-06, + "loss": 0.0111, + "step": 28650 + }, + { + "epoch": 16.02, + "learning_rate": 2.8701265822784815e-06, + "loss": 0.008, + "step": 28675 + }, + { + "epoch": 16.02, + "learning_rate": 2.8637974683544305e-06, + "loss": 0.0103, + "step": 28700 + }, + { + "epoch": 16.02, + "learning_rate": 2.85746835443038e-06, + "loss": 0.0082, + "step": 28725 + }, + { + "epoch": 16.02, + "learning_rate": 2.8511392405063293e-06, + "loss": 0.0057, + "step": 28750 + }, + { + "epoch": 16.02, + "learning_rate": 2.8448101265822788e-06, + "loss": 0.0062, + "step": 28775 + }, + { + "epoch": 16.02, + "learning_rate": 2.8384810126582278e-06, + "loss": 0.0092, + "step": 28800 + }, + { + "epoch": 16.02, + "learning_rate": 2.832151898734177e-06, + "loss": 0.0084, + "step": 28825 + }, + { + "epoch": 16.02, + "learning_rate": 2.8258227848101266e-06, + "loss": 0.0099, + "step": 28850 + }, + { + "epoch": 16.02, + "learning_rate": 2.819493670886076e-06, + "loss": 0.0117, + "step": 28875 + }, + { + "epoch": 16.02, + "learning_rate": 2.8131645569620255e-06, + "loss": 0.0117, + "step": 28900 + }, + { + "epoch": 16.02, + "learning_rate": 2.8068354430379753e-06, + "loss": 0.0127, + "step": 28925 + }, + { + "epoch": 16.02, + "learning_rate": 2.8005063291139243e-06, + "loss": 0.0123, + "step": 28950 + }, + { + "epoch": 16.02, + "learning_rate": 2.7941772151898738e-06, + "loss": 0.0128, + "step": 28975 + }, + { + "epoch": 16.02, + "learning_rate": 2.787848101265823e-06, + "loss": 0.0073, + "step": 29000 + }, + { + "epoch": 16.02, + "eval_loss": 0.2685891091823578, + "eval_runtime": 1456.562, + "eval_samples_per_second": 7.148, + "eval_steps_per_second": 0.447, + "eval_wer": 23.278427291500677, + "step": 29000 + }, + { + "epoch": 16.03, + "learning_rate": 2.7815189873417726e-06, + "loss": 0.0068, + "step": 29025 + }, + { + "epoch": 16.03, + "learning_rate": 2.7751898734177216e-06, + "loss": 0.0087, + "step": 29050 + }, + { + "epoch": 16.03, + "learning_rate": 2.768860759493671e-06, + "loss": 0.0084, + "step": 29075 + }, + { + "epoch": 16.03, + "learning_rate": 2.7625316455696205e-06, + "loss": 0.0039, + "step": 29100 + }, + { + "epoch": 16.03, + "learning_rate": 2.75620253164557e-06, + "loss": 0.0066, + "step": 29125 + }, + { + "epoch": 16.03, + "learning_rate": 2.7498734177215193e-06, + "loss": 0.0108, + "step": 29150 + }, + { + "epoch": 16.03, + "learning_rate": 2.7435443037974683e-06, + "loss": 0.0052, + "step": 29175 + }, + { + "epoch": 16.03, + "learning_rate": 2.7372151898734178e-06, + "loss": 0.0061, + "step": 29200 + }, + { + "epoch": 16.03, + "learning_rate": 2.730886075949367e-06, + "loss": 0.0083, + "step": 29225 + }, + { + "epoch": 16.03, + "learning_rate": 2.7245569620253166e-06, + "loss": 0.0062, + "step": 29250 + }, + { + "epoch": 16.03, + "learning_rate": 2.7182278481012656e-06, + "loss": 0.0078, + "step": 29275 + }, + { + "epoch": 16.03, + "learning_rate": 2.711898734177215e-06, + "loss": 0.0127, + "step": 29300 + }, + { + "epoch": 16.03, + "learning_rate": 2.705569620253165e-06, + "loss": 0.0092, + "step": 29325 + }, + { + "epoch": 16.03, + "learning_rate": 2.6992405063291143e-06, + "loss": 0.0087, + "step": 29350 + }, + { + "epoch": 16.03, + "learning_rate": 2.6929113924050638e-06, + "loss": 0.0062, + "step": 29375 + }, + { + "epoch": 16.03, + "learning_rate": 2.686582278481013e-06, + "loss": 0.0065, + "step": 29400 + }, + { + "epoch": 16.04, + "learning_rate": 2.680253164556962e-06, + "loss": 0.0057, + "step": 29425 + }, + { + "epoch": 16.04, + "learning_rate": 2.6739240506329116e-06, + "loss": 0.0071, + "step": 29450 + }, + { + "epoch": 16.04, + "learning_rate": 2.667594936708861e-06, + "loss": 0.01, + "step": 29475 + }, + { + "epoch": 16.04, + "learning_rate": 2.6612658227848105e-06, + "loss": 0.009, + "step": 29500 + }, + { + "epoch": 16.04, + "learning_rate": 2.65493670886076e-06, + "loss": 0.0091, + "step": 29525 + }, + { + "epoch": 16.04, + "learning_rate": 2.648607594936709e-06, + "loss": 0.0104, + "step": 29550 + }, + { + "epoch": 16.04, + "learning_rate": 2.6422784810126583e-06, + "loss": 0.0071, + "step": 29575 + }, + { + "epoch": 16.04, + "learning_rate": 2.6359493670886078e-06, + "loss": 0.0081, + "step": 29600 + }, + { + "epoch": 16.04, + "learning_rate": 2.629620253164557e-06, + "loss": 0.008, + "step": 29625 + }, + { + "epoch": 16.04, + "learning_rate": 2.623291139240506e-06, + "loss": 0.0067, + "step": 29650 + }, + { + "epoch": 16.04, + "learning_rate": 2.6169620253164556e-06, + "loss": 0.0066, + "step": 29675 + }, + { + "epoch": 16.04, + "learning_rate": 2.610632911392405e-06, + "loss": 0.0121, + "step": 29700 + }, + { + "epoch": 16.04, + "learning_rate": 2.6043037974683545e-06, + "loss": 0.0075, + "step": 29725 + }, + { + "epoch": 16.04, + "learning_rate": 2.5979746835443043e-06, + "loss": 0.0111, + "step": 29750 + }, + { + "epoch": 17.0, + "learning_rate": 2.5916455696202538e-06, + "loss": 0.0095, + "step": 29775 + }, + { + "epoch": 17.0, + "learning_rate": 2.5853164556962028e-06, + "loss": 0.0077, + "step": 29800 + }, + { + "epoch": 17.0, + "learning_rate": 2.578987341772152e-06, + "loss": 0.0047, + "step": 29825 + }, + { + "epoch": 17.0, + "learning_rate": 2.5726582278481016e-06, + "loss": 0.0092, + "step": 29850 + }, + { + "epoch": 17.0, + "learning_rate": 2.566329113924051e-06, + "loss": 0.0191, + "step": 29875 + }, + { + "epoch": 17.0, + "learning_rate": 2.56e-06, + "loss": 0.0102, + "step": 29900 + }, + { + "epoch": 17.0, + "learning_rate": 2.5536708860759495e-06, + "loss": 0.0146, + "step": 29925 + }, + { + "epoch": 17.0, + "learning_rate": 2.547341772151899e-06, + "loss": 0.0096, + "step": 29950 + }, + { + "epoch": 17.01, + "learning_rate": 2.5410126582278483e-06, + "loss": 0.0159, + "step": 29975 + }, + { + "epoch": 17.01, + "learning_rate": 2.5346835443037978e-06, + "loss": 0.0094, + "step": 30000 + }, + { + "epoch": 17.01, + "eval_loss": 0.2636851966381073, + "eval_runtime": 1508.7196, + "eval_samples_per_second": 6.901, + "eval_steps_per_second": 0.431, + "eval_wer": 23.029642758418838, + "step": 30000 + }, + { + "epoch": 17.01, + "learning_rate": 2.5283544303797468e-06, + "loss": 0.0078, + "step": 30025 + }, + { + "epoch": 17.01, + "learning_rate": 2.522025316455696e-06, + "loss": 0.01, + "step": 30050 + }, + { + "epoch": 17.01, + "learning_rate": 2.5156962025316456e-06, + "loss": 0.0058, + "step": 30075 + }, + { + "epoch": 17.01, + "learning_rate": 2.509367088607595e-06, + "loss": 0.0092, + "step": 30100 + }, + { + "epoch": 17.01, + "learning_rate": 2.503037974683544e-06, + "loss": 0.0084, + "step": 30125 + }, + { + "epoch": 17.01, + "learning_rate": 2.496708860759494e-06, + "loss": 0.0069, + "step": 30150 + }, + { + "epoch": 17.01, + "learning_rate": 2.4903797468354433e-06, + "loss": 0.0059, + "step": 30175 + }, + { + "epoch": 17.01, + "learning_rate": 2.4840506329113923e-06, + "loss": 0.0097, + "step": 30200 + }, + { + "epoch": 17.01, + "learning_rate": 2.4777215189873418e-06, + "loss": 0.0126, + "step": 30225 + }, + { + "epoch": 17.01, + "learning_rate": 2.4713924050632916e-06, + "loss": 0.0096, + "step": 30250 + }, + { + "epoch": 17.01, + "learning_rate": 2.4650632911392406e-06, + "loss": 0.0094, + "step": 30275 + }, + { + "epoch": 17.01, + "learning_rate": 2.45873417721519e-06, + "loss": 0.0111, + "step": 30300 + }, + { + "epoch": 17.01, + "learning_rate": 2.4524050632911395e-06, + "loss": 0.0062, + "step": 30325 + }, + { + "epoch": 17.01, + "learning_rate": 2.446075949367089e-06, + "loss": 0.0078, + "step": 30350 + }, + { + "epoch": 17.02, + "learning_rate": 2.4397468354430383e-06, + "loss": 0.0043, + "step": 30375 + }, + { + "epoch": 17.02, + "learning_rate": 2.4336708860759494e-06, + "loss": 0.0102, + "step": 30400 + }, + { + "epoch": 17.02, + "learning_rate": 2.4273417721518988e-06, + "loss": 0.0101, + "step": 30425 + }, + { + "epoch": 17.02, + "learning_rate": 2.4210126582278482e-06, + "loss": 0.0066, + "step": 30450 + }, + { + "epoch": 17.02, + "learning_rate": 2.4146835443037976e-06, + "loss": 0.0078, + "step": 30475 + }, + { + "epoch": 17.02, + "learning_rate": 2.408354430379747e-06, + "loss": 0.0074, + "step": 30500 + }, + { + "epoch": 17.02, + "learning_rate": 2.4020253164556965e-06, + "loss": 0.0072, + "step": 30525 + }, + { + "epoch": 17.02, + "learning_rate": 2.395696202531646e-06, + "loss": 0.0109, + "step": 30550 + }, + { + "epoch": 17.02, + "learning_rate": 2.389367088607595e-06, + "loss": 0.0113, + "step": 30575 + }, + { + "epoch": 17.02, + "learning_rate": 2.3830379746835444e-06, + "loss": 0.0109, + "step": 30600 + }, + { + "epoch": 17.02, + "learning_rate": 2.3767088607594938e-06, + "loss": 0.0089, + "step": 30625 + }, + { + "epoch": 17.02, + "learning_rate": 2.3703797468354432e-06, + "loss": 0.0062, + "step": 30650 + }, + { + "epoch": 17.02, + "learning_rate": 2.3640506329113926e-06, + "loss": 0.0101, + "step": 30675 + }, + { + "epoch": 17.02, + "learning_rate": 2.357721518987342e-06, + "loss": 0.0096, + "step": 30700 + }, + { + "epoch": 17.02, + "learning_rate": 2.3513924050632915e-06, + "loss": 0.0093, + "step": 30725 + }, + { + "epoch": 17.02, + "learning_rate": 2.3450632911392405e-06, + "loss": 0.0113, + "step": 30750 + }, + { + "epoch": 17.03, + "learning_rate": 2.33873417721519e-06, + "loss": 0.0094, + "step": 30775 + }, + { + "epoch": 17.03, + "learning_rate": 2.3324050632911394e-06, + "loss": 0.0086, + "step": 30800 + }, + { + "epoch": 17.03, + "learning_rate": 2.3260759493670888e-06, + "loss": 0.0096, + "step": 30825 + }, + { + "epoch": 17.03, + "learning_rate": 2.319746835443038e-06, + "loss": 0.0086, + "step": 30850 + }, + { + "epoch": 17.03, + "learning_rate": 2.3134177215189876e-06, + "loss": 0.0064, + "step": 30875 + }, + { + "epoch": 17.03, + "learning_rate": 2.307088607594937e-06, + "loss": 0.0107, + "step": 30900 + }, + { + "epoch": 17.03, + "learning_rate": 2.3007594936708865e-06, + "loss": 0.0073, + "step": 30925 + }, + { + "epoch": 17.03, + "learning_rate": 2.2944303797468355e-06, + "loss": 0.005, + "step": 30950 + }, + { + "epoch": 17.03, + "learning_rate": 2.288101265822785e-06, + "loss": 0.0055, + "step": 30975 + }, + { + "epoch": 17.03, + "learning_rate": 2.2817721518987344e-06, + "loss": 0.006, + "step": 31000 + }, + { + "epoch": 17.03, + "eval_loss": 0.2709895372390747, + "eval_runtime": 1461.5544, + "eval_samples_per_second": 7.123, + "eval_steps_per_second": 0.445, + "eval_wer": 23.29468771849949, + "step": 31000 + }, + { + "epoch": 17.03, + "learning_rate": 2.2754430379746838e-06, + "loss": 0.0094, + "step": 31025 + }, + { + "epoch": 17.03, + "learning_rate": 2.2691139240506328e-06, + "loss": 0.0088, + "step": 31050 + }, + { + "epoch": 17.03, + "learning_rate": 2.2627848101265826e-06, + "loss": 0.0057, + "step": 31075 + }, + { + "epoch": 17.03, + "learning_rate": 2.256455696202532e-06, + "loss": 0.0066, + "step": 31100 + }, + { + "epoch": 17.03, + "learning_rate": 2.250126582278481e-06, + "loss": 0.008, + "step": 31125 + }, + { + "epoch": 17.03, + "learning_rate": 2.2437974683544305e-06, + "loss": 0.0048, + "step": 31150 + }, + { + "epoch": 17.04, + "learning_rate": 2.23746835443038e-06, + "loss": 0.0074, + "step": 31175 + }, + { + "epoch": 17.04, + "learning_rate": 2.2311392405063294e-06, + "loss": 0.0061, + "step": 31200 + }, + { + "epoch": 17.04, + "learning_rate": 2.2248101265822788e-06, + "loss": 0.0088, + "step": 31225 + }, + { + "epoch": 17.04, + "learning_rate": 2.2184810126582278e-06, + "loss": 0.0087, + "step": 31250 + }, + { + "epoch": 17.04, + "learning_rate": 2.2121518987341776e-06, + "loss": 0.0089, + "step": 31275 + }, + { + "epoch": 17.04, + "learning_rate": 2.2058227848101266e-06, + "loss": 0.0061, + "step": 31300 + }, + { + "epoch": 17.04, + "learning_rate": 2.199493670886076e-06, + "loss": 0.0097, + "step": 31325 + }, + { + "epoch": 17.04, + "learning_rate": 2.1931645569620255e-06, + "loss": 0.0063, + "step": 31350 + }, + { + "epoch": 17.04, + "learning_rate": 2.186835443037975e-06, + "loss": 0.0084, + "step": 31375 + }, + { + "epoch": 17.04, + "learning_rate": 2.1805063291139243e-06, + "loss": 0.0089, + "step": 31400 + }, + { + "epoch": 17.04, + "learning_rate": 2.1741772151898734e-06, + "loss": 0.0077, + "step": 31425 + }, + { + "epoch": 17.04, + "learning_rate": 2.1678481012658228e-06, + "loss": 0.0153, + "step": 31450 + }, + { + "epoch": 17.04, + "learning_rate": 2.1615189873417726e-06, + "loss": 0.0088, + "step": 31475 + }, + { + "epoch": 17.04, + "learning_rate": 2.1551898734177216e-06, + "loss": 0.0095, + "step": 31500 + }, + { + "epoch": 18.0, + "learning_rate": 2.148860759493671e-06, + "loss": 0.0084, + "step": 31525 + }, + { + "epoch": 18.0, + "learning_rate": 2.1425316455696205e-06, + "loss": 0.0054, + "step": 31550 + }, + { + "epoch": 18.0, + "learning_rate": 2.13620253164557e-06, + "loss": 0.013, + "step": 31575 + }, + { + "epoch": 18.0, + "learning_rate": 2.129873417721519e-06, + "loss": 0.0074, + "step": 31600 + }, + { + "epoch": 18.0, + "learning_rate": 2.1235443037974684e-06, + "loss": 0.0083, + "step": 31625 + }, + { + "epoch": 18.0, + "learning_rate": 2.1172151898734178e-06, + "loss": 0.0118, + "step": 31650 + }, + { + "epoch": 18.0, + "learning_rate": 2.110886075949367e-06, + "loss": 0.0095, + "step": 31675 + }, + { + "epoch": 18.0, + "learning_rate": 2.1045569620253166e-06, + "loss": 0.0075, + "step": 31700 + }, + { + "epoch": 18.01, + "learning_rate": 2.098227848101266e-06, + "loss": 0.0149, + "step": 31725 + }, + { + "epoch": 18.01, + "learning_rate": 2.0918987341772155e-06, + "loss": 0.0095, + "step": 31750 + }, + { + "epoch": 18.01, + "learning_rate": 2.085569620253165e-06, + "loss": 0.0098, + "step": 31775 + }, + { + "epoch": 18.01, + "learning_rate": 2.079240506329114e-06, + "loss": 0.0058, + "step": 31800 + }, + { + "epoch": 18.01, + "learning_rate": 2.0729113924050633e-06, + "loss": 0.0072, + "step": 31825 + }, + { + "epoch": 18.01, + "learning_rate": 2.0665822784810128e-06, + "loss": 0.0103, + "step": 31850 + }, + { + "epoch": 18.01, + "learning_rate": 2.060253164556962e-06, + "loss": 0.0085, + "step": 31875 + }, + { + "epoch": 18.01, + "learning_rate": 2.0539240506329116e-06, + "loss": 0.0064, + "step": 31900 + }, + { + "epoch": 18.01, + "learning_rate": 2.047594936708861e-06, + "loss": 0.0077, + "step": 31925 + }, + { + "epoch": 18.01, + "learning_rate": 2.0412658227848105e-06, + "loss": 0.0086, + "step": 31950 + }, + { + "epoch": 18.01, + "learning_rate": 2.0349367088607595e-06, + "loss": 0.0136, + "step": 31975 + }, + { + "epoch": 18.01, + "learning_rate": 2.028607594936709e-06, + "loss": 0.0085, + "step": 32000 + }, + { + "epoch": 18.01, + "eval_loss": 0.26488059759140015, + "eval_runtime": 1466.953, + "eval_samples_per_second": 7.097, + "eval_steps_per_second": 0.444, + "eval_wer": 23.089806338314446, + "step": 32000 + }, + { + "epoch": 18.01, + "learning_rate": 2.0222784810126583e-06, + "loss": 0.011, + "step": 32025 + }, + { + "epoch": 18.01, + "learning_rate": 2.0159493670886078e-06, + "loss": 0.0086, + "step": 32050 + }, + { + "epoch": 18.01, + "learning_rate": 2.009620253164557e-06, + "loss": 0.008, + "step": 32075 + }, + { + "epoch": 18.01, + "learning_rate": 2.0032911392405066e-06, + "loss": 0.0083, + "step": 32100 + }, + { + "epoch": 18.02, + "learning_rate": 1.996962025316456e-06, + "loss": 0.0073, + "step": 32125 + }, + { + "epoch": 18.02, + "learning_rate": 1.9906329113924055e-06, + "loss": 0.0036, + "step": 32150 + }, + { + "epoch": 18.02, + "learning_rate": 1.9843037974683545e-06, + "loss": 0.0096, + "step": 32175 + }, + { + "epoch": 18.02, + "learning_rate": 1.977974683544304e-06, + "loss": 0.0071, + "step": 32200 + }, + { + "epoch": 18.02, + "learning_rate": 1.9716455696202533e-06, + "loss": 0.009, + "step": 32225 + }, + { + "epoch": 18.02, + "learning_rate": 1.9653164556962028e-06, + "loss": 0.0093, + "step": 32250 + }, + { + "epoch": 18.02, + "learning_rate": 1.9589873417721518e-06, + "loss": 0.0071, + "step": 32275 + }, + { + "epoch": 18.02, + "learning_rate": 1.9526582278481016e-06, + "loss": 0.0037, + "step": 32300 + }, + { + "epoch": 18.02, + "learning_rate": 1.946329113924051e-06, + "loss": 0.007, + "step": 32325 + }, + { + "epoch": 18.02, + "learning_rate": 1.94e-06, + "loss": 0.0104, + "step": 32350 + }, + { + "epoch": 18.02, + "learning_rate": 1.9336708860759495e-06, + "loss": 0.0082, + "step": 32375 + }, + { + "epoch": 18.02, + "learning_rate": 1.927341772151899e-06, + "loss": 0.0095, + "step": 32400 + }, + { + "epoch": 18.02, + "learning_rate": 1.9210126582278483e-06, + "loss": 0.0101, + "step": 32425 + }, + { + "epoch": 18.02, + "learning_rate": 1.9146835443037973e-06, + "loss": 0.0122, + "step": 32450 + }, + { + "epoch": 18.02, + "learning_rate": 1.9083544303797468e-06, + "loss": 0.0082, + "step": 32475 + }, + { + "epoch": 18.02, + "learning_rate": 1.9020253164556964e-06, + "loss": 0.0093, + "step": 32500 + }, + { + "epoch": 18.03, + "learning_rate": 1.8956962025316458e-06, + "loss": 0.0073, + "step": 32525 + }, + { + "epoch": 18.03, + "learning_rate": 1.889367088607595e-06, + "loss": 0.0061, + "step": 32550 + }, + { + "epoch": 18.03, + "learning_rate": 1.8830379746835445e-06, + "loss": 0.0072, + "step": 32575 + }, + { + "epoch": 18.03, + "learning_rate": 1.876708860759494e-06, + "loss": 0.0064, + "step": 32600 + }, + { + "epoch": 18.03, + "learning_rate": 1.8703797468354431e-06, + "loss": 0.0084, + "step": 32625 + }, + { + "epoch": 18.03, + "learning_rate": 1.8640506329113926e-06, + "loss": 0.0067, + "step": 32650 + }, + { + "epoch": 18.03, + "learning_rate": 1.8577215189873418e-06, + "loss": 0.0052, + "step": 32675 + }, + { + "epoch": 18.03, + "learning_rate": 1.8513924050632912e-06, + "loss": 0.0085, + "step": 32700 + }, + { + "epoch": 18.03, + "learning_rate": 1.8450632911392408e-06, + "loss": 0.0077, + "step": 32725 + }, + { + "epoch": 18.03, + "learning_rate": 1.83873417721519e-06, + "loss": 0.0068, + "step": 32750 + }, + { + "epoch": 18.03, + "learning_rate": 1.8324050632911395e-06, + "loss": 0.0079, + "step": 32775 + }, + { + "epoch": 18.03, + "learning_rate": 1.8260759493670887e-06, + "loss": 0.008, + "step": 32800 + }, + { + "epoch": 18.03, + "learning_rate": 1.8197468354430381e-06, + "loss": 0.0071, + "step": 32825 + }, + { + "epoch": 18.03, + "learning_rate": 1.8134177215189873e-06, + "loss": 0.0091, + "step": 32850 + }, + { + "epoch": 18.03, + "learning_rate": 1.8070886075949368e-06, + "loss": 0.0059, + "step": 32875 + }, + { + "epoch": 18.03, + "learning_rate": 1.8007594936708862e-06, + "loss": 0.0079, + "step": 32900 + }, + { + "epoch": 18.04, + "learning_rate": 1.7944303797468356e-06, + "loss": 0.0055, + "step": 32925 + }, + { + "epoch": 18.04, + "learning_rate": 1.788101265822785e-06, + "loss": 0.0095, + "step": 32950 + }, + { + "epoch": 18.04, + "learning_rate": 1.7817721518987343e-06, + "loss": 0.0086, + "step": 32975 + }, + { + "epoch": 18.04, + "learning_rate": 1.7754430379746837e-06, + "loss": 0.0057, + "step": 33000 + }, + { + "epoch": 18.04, + "eval_loss": 0.2678743898868561, + "eval_runtime": 1484.126, + "eval_samples_per_second": 7.015, + "eval_steps_per_second": 0.439, + "eval_wer": 23.405258622091416, + "step": 33000 + }, + { + "epoch": 18.04, + "learning_rate": 1.7691139240506331e-06, + "loss": 0.0085, + "step": 33025 + }, + { + "epoch": 18.04, + "learning_rate": 1.7627848101265823e-06, + "loss": 0.0073, + "step": 33050 + }, + { + "epoch": 18.04, + "learning_rate": 1.7564556962025318e-06, + "loss": 0.0045, + "step": 33075 + }, + { + "epoch": 18.04, + "learning_rate": 1.750126582278481e-06, + "loss": 0.0065, + "step": 33100 + }, + { + "epoch": 18.04, + "learning_rate": 1.7437974683544306e-06, + "loss": 0.0076, + "step": 33125 + }, + { + "epoch": 18.04, + "learning_rate": 1.73746835443038e-06, + "loss": 0.007, + "step": 33150 + }, + { + "epoch": 18.04, + "learning_rate": 1.7311392405063293e-06, + "loss": 0.0109, + "step": 33175 + }, + { + "epoch": 18.04, + "learning_rate": 1.7248101265822787e-06, + "loss": 0.0095, + "step": 33200 + }, + { + "epoch": 18.04, + "learning_rate": 1.718481012658228e-06, + "loss": 0.0109, + "step": 33225 + }, + { + "epoch": 18.04, + "learning_rate": 1.7121518987341773e-06, + "loss": 0.0117, + "step": 33250 + }, + { + "epoch": 19.0, + "learning_rate": 1.7058227848101266e-06, + "loss": 0.0038, + "step": 33275 + }, + { + "epoch": 19.0, + "learning_rate": 1.699493670886076e-06, + "loss": 0.013, + "step": 33300 + }, + { + "epoch": 19.0, + "learning_rate": 1.6931645569620256e-06, + "loss": 0.0104, + "step": 33325 + }, + { + "epoch": 19.0, + "learning_rate": 1.6868354430379748e-06, + "loss": 0.0064, + "step": 33350 + }, + { + "epoch": 19.0, + "learning_rate": 1.6805063291139243e-06, + "loss": 0.0137, + "step": 33375 + }, + { + "epoch": 19.0, + "learning_rate": 1.6741772151898735e-06, + "loss": 0.012, + "step": 33400 + }, + { + "epoch": 19.0, + "learning_rate": 1.667848101265823e-06, + "loss": 0.0106, + "step": 33425 + }, + { + "epoch": 19.0, + "learning_rate": 1.6615189873417723e-06, + "loss": 0.0072, + "step": 33450 + }, + { + "epoch": 19.01, + "learning_rate": 1.6551898734177216e-06, + "loss": 0.0154, + "step": 33475 + }, + { + "epoch": 19.01, + "learning_rate": 1.648860759493671e-06, + "loss": 0.0129, + "step": 33500 + }, + { + "epoch": 19.01, + "learning_rate": 1.6425316455696206e-06, + "loss": 0.008, + "step": 33525 + }, + { + "epoch": 19.01, + "learning_rate": 1.6362025316455698e-06, + "loss": 0.0071, + "step": 33550 + }, + { + "epoch": 19.01, + "learning_rate": 1.6298734177215193e-06, + "loss": 0.0088, + "step": 33575 + }, + { + "epoch": 19.01, + "learning_rate": 1.6235443037974685e-06, + "loss": 0.0102, + "step": 33600 + }, + { + "epoch": 19.01, + "learning_rate": 1.617215189873418e-06, + "loss": 0.0064, + "step": 33625 + }, + { + "epoch": 19.01, + "learning_rate": 1.6108860759493671e-06, + "loss": 0.0098, + "step": 33650 + }, + { + "epoch": 19.01, + "learning_rate": 1.6045569620253166e-06, + "loss": 0.009, + "step": 33675 + }, + { + "epoch": 19.01, + "learning_rate": 1.5982278481012658e-06, + "loss": 0.0074, + "step": 33700 + }, + { + "epoch": 19.01, + "learning_rate": 1.5918987341772152e-06, + "loss": 0.0055, + "step": 33725 + }, + { + "epoch": 19.01, + "learning_rate": 1.5855696202531648e-06, + "loss": 0.0059, + "step": 33750 + }, + { + "epoch": 19.01, + "learning_rate": 1.579240506329114e-06, + "loss": 0.0102, + "step": 33775 + }, + { + "epoch": 19.01, + "learning_rate": 1.5729113924050635e-06, + "loss": 0.0111, + "step": 33800 + }, + { + "epoch": 19.01, + "learning_rate": 1.5665822784810127e-06, + "loss": 0.0033, + "step": 33825 + }, + { + "epoch": 19.01, + "learning_rate": 1.5602531645569621e-06, + "loss": 0.0057, + "step": 33850 + }, + { + "epoch": 19.02, + "learning_rate": 1.5539240506329115e-06, + "loss": 0.0074, + "step": 33875 + }, + { + "epoch": 19.02, + "learning_rate": 1.5475949367088608e-06, + "loss": 0.0056, + "step": 33900 + }, + { + "epoch": 19.02, + "learning_rate": 1.5412658227848102e-06, + "loss": 0.0053, + "step": 33925 + }, + { + "epoch": 19.02, + "learning_rate": 1.5349367088607598e-06, + "loss": 0.0049, + "step": 33950 + }, + { + "epoch": 19.02, + "learning_rate": 1.528607594936709e-06, + "loss": 0.0065, + "step": 33975 + }, + { + "epoch": 19.02, + "learning_rate": 1.5222784810126585e-06, + "loss": 0.007, + "step": 34000 + }, + { + "epoch": 19.02, + "eval_loss": 0.2660910487174988, + "eval_runtime": 1471.5922, + "eval_samples_per_second": 7.075, + "eval_steps_per_second": 0.442, + "eval_wer": 23.14834387551017, + "step": 34000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.569356265480192e+20, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-34000/training_args.bin b/checkpoint-34000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-34000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-35000/config.json b/checkpoint-35000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-35000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-35000/generation_config.json b/checkpoint-35000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-35000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-35000/optimizer.pt b/checkpoint-35000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6796d73cd1c136be425e6c3c277d13bcbab23fa6 --- /dev/null +++ b/checkpoint-35000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:047c889fd6e7e81dd6c07dad09104139243aad357621d18f0067d0472d4c9781 +size 1934161093 diff --git a/checkpoint-35000/preprocessor_config.json b/checkpoint-35000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-35000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-35000/pytorch_model.bin b/checkpoint-35000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..caad48ca905458aacfbfcccbe1d04a2f2fcb7640 --- /dev/null +++ b/checkpoint-35000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01ea4bf5f5badeebe5a54f58a72cd14a81fee78b5b92ff8ad180551d585ba2e5 +size 967102601 diff --git a/checkpoint-35000/rng_state.pth b/checkpoint-35000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..cfb3bbc6dc884c3d06eff21d8d3e2ff55ed1d11e --- /dev/null +++ b/checkpoint-35000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e779730f2cf33eb5895ceffaca6252e0e05aaa1bf51c5044efc1dd3565d5226c +size 14575 diff --git a/checkpoint-35000/scaler.pt b/checkpoint-35000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..861286a20a35e6e16afaf05703e546ccf2b56cb7 --- /dev/null +++ b/checkpoint-35000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17825be488a38c5ff07f7822f674f25c265e1f3891b463400269e484b18abc6c +size 557 diff --git a/checkpoint-35000/scheduler.pt b/checkpoint-35000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f14cf139d11403421d44339e52eaf7f6e117baf1 --- /dev/null +++ b/checkpoint-35000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7375843aa543ffb026c6739a820dfc0bede046791e5da971c9eb1e2ba73142eb +size 627 diff --git a/checkpoint-35000/trainer_state.json b/checkpoint-35000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..75239ba07f91245b7c3a80f8424333889541e2c8 --- /dev/null +++ b/checkpoint-35000/trainer_state.json @@ -0,0 +1,8731 @@ +{ + "best_metric": 23.029642758418838, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-30000", + "epoch": 19.043275, + "global_step": 35000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + }, + { + "epoch": 7.02, + "learning_rate": 6.830632911392405e-06, + "loss": 0.0159, + "step": 13025 + }, + { + "epoch": 7.02, + "learning_rate": 6.824303797468354e-06, + "loss": 0.0186, + "step": 13050 + }, + { + "epoch": 7.02, + "learning_rate": 6.817974683544304e-06, + "loss": 0.0246, + "step": 13075 + }, + { + "epoch": 7.02, + "learning_rate": 6.811645569620253e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 7.02, + "learning_rate": 6.805316455696203e-06, + "loss": 0.0287, + "step": 13125 + }, + { + "epoch": 7.02, + "learning_rate": 6.798987341772152e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 7.02, + "learning_rate": 6.792658227848102e-06, + "loss": 0.035, + "step": 13175 + }, + { + "epoch": 7.02, + "learning_rate": 6.786329113924052e-06, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 7.02, + "learning_rate": 6.780000000000001e-06, + "loss": 0.0283, + "step": 13225 + }, + { + "epoch": 7.02, + "learning_rate": 6.773670886075951e-06, + "loss": 0.0203, + "step": 13250 + }, + { + "epoch": 7.03, + "learning_rate": 6.7673417721519e-06, + "loss": 0.0193, + "step": 13275 + }, + { + "epoch": 7.03, + "learning_rate": 6.7610126582278494e-06, + "loss": 0.0174, + "step": 13300 + }, + { + "epoch": 7.03, + "learning_rate": 6.754683544303798e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 7.03, + "learning_rate": 6.7483544303797474e-06, + "loss": 0.023, + "step": 13350 + }, + { + "epoch": 7.03, + "learning_rate": 6.742025316455697e-06, + "loss": 0.0266, + "step": 13375 + }, + { + "epoch": 7.03, + "learning_rate": 6.735696202531646e-06, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 7.03, + "learning_rate": 6.729367088607596e-06, + "loss": 0.0164, + "step": 13425 + }, + { + "epoch": 7.03, + "learning_rate": 6.723037974683545e-06, + "loss": 0.019, + "step": 13450 + }, + { + "epoch": 7.03, + "learning_rate": 6.716708860759495e-06, + "loss": 0.0209, + "step": 13475 + }, + { + "epoch": 7.03, + "learning_rate": 6.710379746835444e-06, + "loss": 0.0241, + "step": 13500 + }, + { + "epoch": 7.03, + "learning_rate": 6.7040506329113934e-06, + "loss": 0.0233, + "step": 13525 + }, + { + "epoch": 7.03, + "learning_rate": 6.697721518987342e-06, + "loss": 0.0204, + "step": 13550 + }, + { + "epoch": 7.03, + "learning_rate": 6.6913924050632914e-06, + "loss": 0.0131, + "step": 13575 + }, + { + "epoch": 7.03, + "learning_rate": 6.685063291139241e-06, + "loss": 0.0268, + "step": 13600 + }, + { + "epoch": 7.03, + "learning_rate": 6.67873417721519e-06, + "loss": 0.0156, + "step": 13625 + }, + { + "epoch": 7.03, + "learning_rate": 6.67240506329114e-06, + "loss": 0.0205, + "step": 13650 + }, + { + "epoch": 7.04, + "learning_rate": 6.666075949367089e-06, + "loss": 0.0217, + "step": 13675 + }, + { + "epoch": 7.04, + "learning_rate": 6.659746835443039e-06, + "loss": 0.0258, + "step": 13700 + }, + { + "epoch": 7.04, + "learning_rate": 6.653417721518988e-06, + "loss": 0.0211, + "step": 13725 + }, + { + "epoch": 7.04, + "learning_rate": 6.647088607594937e-06, + "loss": 0.0188, + "step": 13750 + }, + { + "epoch": 7.04, + "learning_rate": 6.640759493670886e-06, + "loss": 0.0234, + "step": 13775 + }, + { + "epoch": 7.04, + "learning_rate": 6.6344303797468355e-06, + "loss": 0.0202, + "step": 13800 + }, + { + "epoch": 7.04, + "learning_rate": 6.628101265822785e-06, + "loss": 0.0251, + "step": 13825 + }, + { + "epoch": 7.04, + "learning_rate": 6.621772151898734e-06, + "loss": 0.017, + "step": 13850 + }, + { + "epoch": 7.04, + "learning_rate": 6.615443037974684e-06, + "loss": 0.0254, + "step": 13875 + }, + { + "epoch": 7.04, + "learning_rate": 6.609113924050633e-06, + "loss": 0.0289, + "step": 13900 + }, + { + "epoch": 7.04, + "learning_rate": 6.602784810126583e-06, + "loss": 0.0238, + "step": 13925 + }, + { + "epoch": 7.04, + "learning_rate": 6.596455696202532e-06, + "loss": 0.0218, + "step": 13950 + }, + { + "epoch": 7.04, + "learning_rate": 6.590126582278481e-06, + "loss": 0.0289, + "step": 13975 + }, + { + "epoch": 7.04, + "learning_rate": 6.583797468354432e-06, + "loss": 0.0271, + "step": 14000 + }, + { + "epoch": 7.04, + "eval_loss": 0.26684999465942383, + "eval_runtime": 1448.0228, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "eval_wer": 42.464105107400115, + "step": 14000 + }, + { + "epoch": 8.0, + "learning_rate": 6.577468354430381e-06, + "loss": 0.0292, + "step": 14025 + }, + { + "epoch": 8.0, + "learning_rate": 6.57113924050633e-06, + "loss": 0.0192, + "step": 14050 + }, + { + "epoch": 8.0, + "learning_rate": 6.564810126582279e-06, + "loss": 0.027, + "step": 14075 + }, + { + "epoch": 8.0, + "learning_rate": 6.558481012658229e-06, + "loss": 0.03, + "step": 14100 + }, + { + "epoch": 8.0, + "learning_rate": 6.552151898734178e-06, + "loss": 0.0375, + "step": 14125 + }, + { + "epoch": 8.0, + "learning_rate": 6.5458227848101274e-06, + "loss": 0.0338, + "step": 14150 + }, + { + "epoch": 8.0, + "learning_rate": 6.539493670886077e-06, + "loss": 0.0305, + "step": 14175 + }, + { + "epoch": 8.0, + "learning_rate": 6.533164556962026e-06, + "loss": 0.0266, + "step": 14200 + }, + { + "epoch": 8.01, + "learning_rate": 6.526835443037976e-06, + "loss": 0.0378, + "step": 14225 + }, + { + "epoch": 8.01, + "learning_rate": 6.520506329113925e-06, + "loss": 0.0256, + "step": 14250 + }, + { + "epoch": 8.01, + "learning_rate": 6.514177215189874e-06, + "loss": 0.0203, + "step": 14275 + }, + { + "epoch": 8.01, + "learning_rate": 6.507848101265823e-06, + "loss": 0.0212, + "step": 14300 + }, + { + "epoch": 8.01, + "learning_rate": 6.501518987341773e-06, + "loss": 0.0204, + "step": 14325 + }, + { + "epoch": 8.01, + "learning_rate": 6.495189873417722e-06, + "loss": 0.0259, + "step": 14350 + }, + { + "epoch": 8.01, + "learning_rate": 6.4888607594936714e-06, + "loss": 0.0214, + "step": 14375 + }, + { + "epoch": 8.01, + "learning_rate": 6.482531645569621e-06, + "loss": 0.021, + "step": 14400 + }, + { + "epoch": 8.01, + "learning_rate": 6.47620253164557e-06, + "loss": 0.0241, + "step": 14425 + }, + { + "epoch": 8.01, + "learning_rate": 6.46987341772152e-06, + "loss": 0.0216, + "step": 14450 + }, + { + "epoch": 8.01, + "learning_rate": 6.463544303797469e-06, + "loss": 0.0209, + "step": 14475 + }, + { + "epoch": 8.01, + "learning_rate": 6.457215189873418e-06, + "loss": 0.0292, + "step": 14500 + }, + { + "epoch": 8.01, + "learning_rate": 6.450886075949367e-06, + "loss": 0.0286, + "step": 14525 + }, + { + "epoch": 8.01, + "learning_rate": 6.444556962025317e-06, + "loss": 0.0218, + "step": 14550 + }, + { + "epoch": 8.01, + "learning_rate": 6.438227848101266e-06, + "loss": 0.0209, + "step": 14575 + }, + { + "epoch": 8.01, + "learning_rate": 6.4318987341772154e-06, + "loss": 0.0244, + "step": 14600 + }, + { + "epoch": 8.02, + "learning_rate": 6.425569620253165e-06, + "loss": 0.0212, + "step": 14625 + }, + { + "epoch": 8.02, + "learning_rate": 6.419240506329114e-06, + "loss": 0.0155, + "step": 14650 + }, + { + "epoch": 8.02, + "learning_rate": 6.412911392405064e-06, + "loss": 0.0209, + "step": 14675 + }, + { + "epoch": 8.02, + "learning_rate": 6.406582278481013e-06, + "loss": 0.0203, + "step": 14700 + }, + { + "epoch": 8.02, + "learning_rate": 6.400253164556962e-06, + "loss": 0.0192, + "step": 14725 + }, + { + "epoch": 8.02, + "learning_rate": 6.393924050632911e-06, + "loss": 0.0216, + "step": 14750 + }, + { + "epoch": 8.02, + "learning_rate": 6.387594936708861e-06, + "loss": 0.012, + "step": 14775 + }, + { + "epoch": 8.02, + "learning_rate": 6.38126582278481e-06, + "loss": 0.0241, + "step": 14800 + }, + { + "epoch": 8.02, + "learning_rate": 6.37493670886076e-06, + "loss": 0.0192, + "step": 14825 + }, + { + "epoch": 8.02, + "learning_rate": 6.36860759493671e-06, + "loss": 0.0202, + "step": 14850 + }, + { + "epoch": 8.02, + "learning_rate": 6.362278481012659e-06, + "loss": 0.0257, + "step": 14875 + }, + { + "epoch": 8.02, + "learning_rate": 6.3559493670886086e-06, + "loss": 0.0218, + "step": 14900 + }, + { + "epoch": 8.02, + "learning_rate": 6.349620253164558e-06, + "loss": 0.0232, + "step": 14925 + }, + { + "epoch": 8.02, + "learning_rate": 6.3432911392405074e-06, + "loss": 0.03, + "step": 14950 + }, + { + "epoch": 8.02, + "learning_rate": 6.337215189873418e-06, + "loss": 0.0252, + "step": 14975 + }, + { + "epoch": 8.02, + "learning_rate": 6.3308860759493675e-06, + "loss": 0.0176, + "step": 15000 + }, + { + "epoch": 8.02, + "eval_loss": 0.2596448063850403, + "eval_runtime": 1417.6481, + "eval_samples_per_second": 7.344, + "eval_steps_per_second": 0.459, + "eval_wer": 28.52241499861786, + "step": 15000 + }, + { + "epoch": 8.03, + "learning_rate": 6.324556962025317e-06, + "loss": 0.0166, + "step": 15025 + }, + { + "epoch": 8.03, + "learning_rate": 6.318227848101266e-06, + "loss": 0.017, + "step": 15050 + }, + { + "epoch": 8.03, + "learning_rate": 6.311898734177216e-06, + "loss": 0.0291, + "step": 15075 + }, + { + "epoch": 8.03, + "learning_rate": 6.305569620253164e-06, + "loss": 0.0149, + "step": 15100 + }, + { + "epoch": 8.03, + "learning_rate": 6.299240506329114e-06, + "loss": 0.0223, + "step": 15125 + }, + { + "epoch": 8.03, + "learning_rate": 6.292911392405063e-06, + "loss": 0.0212, + "step": 15150 + }, + { + "epoch": 8.03, + "learning_rate": 6.286582278481013e-06, + "loss": 0.0125, + "step": 15175 + }, + { + "epoch": 8.03, + "learning_rate": 6.280253164556962e-06, + "loss": 0.0169, + "step": 15200 + }, + { + "epoch": 8.03, + "learning_rate": 6.273924050632912e-06, + "loss": 0.0244, + "step": 15225 + }, + { + "epoch": 8.03, + "learning_rate": 6.267594936708862e-06, + "loss": 0.015, + "step": 15250 + }, + { + "epoch": 8.03, + "learning_rate": 6.261265822784811e-06, + "loss": 0.0171, + "step": 15275 + }, + { + "epoch": 8.03, + "learning_rate": 6.254936708860761e-06, + "loss": 0.017, + "step": 15300 + }, + { + "epoch": 8.03, + "learning_rate": 6.24860759493671e-06, + "loss": 0.0162, + "step": 15325 + }, + { + "epoch": 8.03, + "learning_rate": 6.2422784810126594e-06, + "loss": 0.0133, + "step": 15350 + }, + { + "epoch": 8.03, + "learning_rate": 6.235949367088608e-06, + "loss": 0.0139, + "step": 15375 + }, + { + "epoch": 8.03, + "learning_rate": 6.2296202531645575e-06, + "loss": 0.0177, + "step": 15400 + }, + { + "epoch": 8.04, + "learning_rate": 6.223291139240507e-06, + "loss": 0.0207, + "step": 15425 + }, + { + "epoch": 8.04, + "learning_rate": 6.216962025316456e-06, + "loss": 0.0192, + "step": 15450 + }, + { + "epoch": 8.04, + "learning_rate": 6.210632911392406e-06, + "loss": 0.0176, + "step": 15475 + }, + { + "epoch": 8.04, + "learning_rate": 6.204303797468355e-06, + "loss": 0.0133, + "step": 15500 + }, + { + "epoch": 8.04, + "learning_rate": 6.197974683544305e-06, + "loss": 0.0199, + "step": 15525 + }, + { + "epoch": 8.04, + "learning_rate": 6.191645569620254e-06, + "loss": 0.0135, + "step": 15550 + }, + { + "epoch": 8.04, + "learning_rate": 6.1853164556962035e-06, + "loss": 0.0121, + "step": 15575 + }, + { + "epoch": 8.04, + "learning_rate": 6.178987341772152e-06, + "loss": 0.0152, + "step": 15600 + }, + { + "epoch": 8.04, + "learning_rate": 6.1726582278481015e-06, + "loss": 0.0201, + "step": 15625 + }, + { + "epoch": 8.04, + "learning_rate": 6.166329113924051e-06, + "loss": 0.0226, + "step": 15650 + }, + { + "epoch": 8.04, + "learning_rate": 6.16e-06, + "loss": 0.0222, + "step": 15675 + }, + { + "epoch": 8.04, + "learning_rate": 6.15367088607595e-06, + "loss": 0.0184, + "step": 15700 + }, + { + "epoch": 8.04, + "learning_rate": 6.147341772151899e-06, + "loss": 0.0219, + "step": 15725 + }, + { + "epoch": 8.04, + "learning_rate": 6.141012658227849e-06, + "loss": 0.0219, + "step": 15750 + }, + { + "epoch": 9.0, + "learning_rate": 6.134683544303798e-06, + "loss": 0.0208, + "step": 15775 + }, + { + "epoch": 9.0, + "learning_rate": 6.1283544303797475e-06, + "loss": 0.0143, + "step": 15800 + }, + { + "epoch": 9.0, + "learning_rate": 6.122025316455696e-06, + "loss": 0.0254, + "step": 15825 + }, + { + "epoch": 9.0, + "learning_rate": 6.1156962025316455e-06, + "loss": 0.0287, + "step": 15850 + }, + { + "epoch": 9.0, + "learning_rate": 6.109367088607595e-06, + "loss": 0.0252, + "step": 15875 + }, + { + "epoch": 9.0, + "learning_rate": 6.103037974683544e-06, + "loss": 0.0275, + "step": 15900 + }, + { + "epoch": 9.0, + "learning_rate": 6.096708860759494e-06, + "loss": 0.0193, + "step": 15925 + }, + { + "epoch": 9.0, + "learning_rate": 6.090379746835443e-06, + "loss": 0.0255, + "step": 15950 + }, + { + "epoch": 9.01, + "learning_rate": 6.084050632911393e-06, + "loss": 0.0248, + "step": 15975 + }, + { + "epoch": 9.01, + "learning_rate": 6.077721518987342e-06, + "loss": 0.0328, + "step": 16000 + }, + { + "epoch": 9.01, + "eval_loss": 0.2512108087539673, + "eval_runtime": 1474.2883, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.442, + "eval_wer": 35.66236849379665, + "step": 16000 + }, + { + "epoch": 9.01, + "learning_rate": 6.0713924050632915e-06, + "loss": 0.0181, + "step": 16025 + }, + { + "epoch": 9.01, + "learning_rate": 6.065063291139242e-06, + "loss": 0.0134, + "step": 16050 + }, + { + "epoch": 9.01, + "learning_rate": 6.058734177215191e-06, + "loss": 0.0203, + "step": 16075 + }, + { + "epoch": 9.01, + "learning_rate": 6.052405063291141e-06, + "loss": 0.0224, + "step": 16100 + }, + { + "epoch": 9.01, + "learning_rate": 6.046075949367089e-06, + "loss": 0.0164, + "step": 16125 + }, + { + "epoch": 9.01, + "learning_rate": 6.039746835443039e-06, + "loss": 0.019, + "step": 16150 + }, + { + "epoch": 9.01, + "learning_rate": 6.033417721518988e-06, + "loss": 0.0152, + "step": 16175 + }, + { + "epoch": 9.01, + "learning_rate": 6.0270886075949374e-06, + "loss": 0.0197, + "step": 16200 + }, + { + "epoch": 9.01, + "learning_rate": 6.020759493670887e-06, + "loss": 0.0188, + "step": 16225 + }, + { + "epoch": 9.01, + "learning_rate": 6.014430379746836e-06, + "loss": 0.0245, + "step": 16250 + }, + { + "epoch": 9.01, + "learning_rate": 6.008101265822786e-06, + "loss": 0.0202, + "step": 16275 + }, + { + "epoch": 9.01, + "learning_rate": 6.001772151898735e-06, + "loss": 0.0184, + "step": 16300 + }, + { + "epoch": 9.01, + "learning_rate": 5.995443037974685e-06, + "loss": 0.0158, + "step": 16325 + }, + { + "epoch": 9.01, + "learning_rate": 5.989113924050633e-06, + "loss": 0.0175, + "step": 16350 + }, + { + "epoch": 9.02, + "learning_rate": 5.982784810126583e-06, + "loss": 0.014, + "step": 16375 + }, + { + "epoch": 9.02, + "learning_rate": 5.976455696202532e-06, + "loss": 0.0149, + "step": 16400 + }, + { + "epoch": 9.02, + "learning_rate": 5.9701265822784815e-06, + "loss": 0.0188, + "step": 16425 + }, + { + "epoch": 9.02, + "learning_rate": 5.963797468354431e-06, + "loss": 0.0215, + "step": 16450 + }, + { + "epoch": 9.02, + "learning_rate": 5.95746835443038e-06, + "loss": 0.017, + "step": 16475 + }, + { + "epoch": 9.02, + "learning_rate": 5.95113924050633e-06, + "loss": 0.0095, + "step": 16500 + }, + { + "epoch": 9.02, + "learning_rate": 5.944810126582279e-06, + "loss": 0.0112, + "step": 16525 + }, + { + "epoch": 9.02, + "learning_rate": 5.938481012658229e-06, + "loss": 0.0198, + "step": 16550 + }, + { + "epoch": 9.02, + "learning_rate": 5.932151898734177e-06, + "loss": 0.0252, + "step": 16575 + }, + { + "epoch": 9.02, + "learning_rate": 5.925822784810127e-06, + "loss": 0.0226, + "step": 16600 + }, + { + "epoch": 9.02, + "learning_rate": 5.919493670886076e-06, + "loss": 0.0167, + "step": 16625 + }, + { + "epoch": 9.02, + "learning_rate": 5.9131645569620255e-06, + "loss": 0.0259, + "step": 16650 + }, + { + "epoch": 9.02, + "learning_rate": 5.906835443037975e-06, + "loss": 0.0229, + "step": 16675 + }, + { + "epoch": 9.02, + "learning_rate": 5.900506329113924e-06, + "loss": 0.0238, + "step": 16700 + }, + { + "epoch": 9.02, + "learning_rate": 5.894177215189874e-06, + "loss": 0.02, + "step": 16725 + }, + { + "epoch": 9.02, + "learning_rate": 5.887848101265823e-06, + "loss": 0.0182, + "step": 16750 + }, + { + "epoch": 9.03, + "learning_rate": 5.881518987341773e-06, + "loss": 0.0157, + "step": 16775 + }, + { + "epoch": 9.03, + "learning_rate": 5.875189873417721e-06, + "loss": 0.0145, + "step": 16800 + }, + { + "epoch": 9.03, + "learning_rate": 5.868860759493671e-06, + "loss": 0.0166, + "step": 16825 + }, + { + "epoch": 9.03, + "learning_rate": 5.86253164556962e-06, + "loss": 0.0108, + "step": 16850 + }, + { + "epoch": 9.03, + "learning_rate": 5.85620253164557e-06, + "loss": 0.014, + "step": 16875 + }, + { + "epoch": 9.03, + "learning_rate": 5.84987341772152e-06, + "loss": 0.0167, + "step": 16900 + }, + { + "epoch": 9.03, + "learning_rate": 5.843544303797469e-06, + "loss": 0.0121, + "step": 16925 + }, + { + "epoch": 9.03, + "learning_rate": 5.837215189873419e-06, + "loss": 0.0149, + "step": 16950 + }, + { + "epoch": 9.03, + "learning_rate": 5.831139240506329e-06, + "loss": 0.0143, + "step": 16975 + }, + { + "epoch": 9.03, + "learning_rate": 5.824810126582279e-06, + "loss": 0.0139, + "step": 17000 + }, + { + "epoch": 9.03, + "eval_loss": 0.2688957154750824, + "eval_runtime": 1423.1196, + "eval_samples_per_second": 7.316, + "eval_steps_per_second": 0.457, + "eval_wer": 25.920746678807788, + "step": 17000 + }, + { + "epoch": 9.03, + "learning_rate": 5.818481012658228e-06, + "loss": 0.0169, + "step": 17025 + }, + { + "epoch": 9.03, + "learning_rate": 5.8121518987341775e-06, + "loss": 0.013, + "step": 17050 + }, + { + "epoch": 9.03, + "learning_rate": 5.805822784810127e-06, + "loss": 0.0123, + "step": 17075 + }, + { + "epoch": 9.03, + "learning_rate": 5.799493670886076e-06, + "loss": 0.0161, + "step": 17100 + }, + { + "epoch": 9.03, + "learning_rate": 5.793164556962026e-06, + "loss": 0.0114, + "step": 17125 + }, + { + "epoch": 9.03, + "learning_rate": 5.786835443037974e-06, + "loss": 0.0194, + "step": 17150 + }, + { + "epoch": 9.04, + "learning_rate": 5.780506329113924e-06, + "loss": 0.0179, + "step": 17175 + }, + { + "epoch": 9.04, + "learning_rate": 5.774177215189873e-06, + "loss": 0.0165, + "step": 17200 + }, + { + "epoch": 9.04, + "learning_rate": 5.767848101265823e-06, + "loss": 0.0165, + "step": 17225 + }, + { + "epoch": 9.04, + "learning_rate": 5.761518987341773e-06, + "loss": 0.0125, + "step": 17250 + }, + { + "epoch": 9.04, + "learning_rate": 5.755189873417722e-06, + "loss": 0.0172, + "step": 17275 + }, + { + "epoch": 9.04, + "learning_rate": 5.748860759493672e-06, + "loss": 0.0158, + "step": 17300 + }, + { + "epoch": 9.04, + "learning_rate": 5.742531645569621e-06, + "loss": 0.0174, + "step": 17325 + }, + { + "epoch": 9.04, + "learning_rate": 5.736202531645571e-06, + "loss": 0.0162, + "step": 17350 + }, + { + "epoch": 9.04, + "learning_rate": 5.72987341772152e-06, + "loss": 0.0203, + "step": 17375 + }, + { + "epoch": 9.04, + "learning_rate": 5.7235443037974695e-06, + "loss": 0.0157, + "step": 17400 + }, + { + "epoch": 9.04, + "learning_rate": 5.717215189873419e-06, + "loss": 0.018, + "step": 17425 + }, + { + "epoch": 9.04, + "learning_rate": 5.7108860759493675e-06, + "loss": 0.0199, + "step": 17450 + }, + { + "epoch": 9.04, + "learning_rate": 5.704556962025317e-06, + "loss": 0.0153, + "step": 17475 + }, + { + "epoch": 9.04, + "learning_rate": 5.698227848101266e-06, + "loss": 0.0234, + "step": 17500 + }, + { + "epoch": 10.0, + "learning_rate": 5.691898734177216e-06, + "loss": 0.0168, + "step": 17525 + }, + { + "epoch": 10.0, + "learning_rate": 5.685569620253165e-06, + "loss": 0.0214, + "step": 17550 + }, + { + "epoch": 10.0, + "learning_rate": 5.679240506329115e-06, + "loss": 0.0185, + "step": 17575 + }, + { + "epoch": 10.0, + "learning_rate": 5.672911392405064e-06, + "loss": 0.0228, + "step": 17600 + }, + { + "epoch": 10.0, + "learning_rate": 5.6665822784810135e-06, + "loss": 0.024, + "step": 17625 + }, + { + "epoch": 10.0, + "learning_rate": 5.660253164556963e-06, + "loss": 0.0235, + "step": 17650 + }, + { + "epoch": 10.0, + "learning_rate": 5.6539240506329115e-06, + "loss": 0.0262, + "step": 17675 + }, + { + "epoch": 10.0, + "learning_rate": 5.647594936708861e-06, + "loss": 0.0214, + "step": 17700 + }, + { + "epoch": 10.01, + "learning_rate": 5.64126582278481e-06, + "loss": 0.0279, + "step": 17725 + }, + { + "epoch": 10.01, + "learning_rate": 5.63493670886076e-06, + "loss": 0.0248, + "step": 17750 + }, + { + "epoch": 10.01, + "learning_rate": 5.628607594936709e-06, + "loss": 0.0155, + "step": 17775 + }, + { + "epoch": 10.01, + "learning_rate": 5.622278481012659e-06, + "loss": 0.017, + "step": 17800 + }, + { + "epoch": 10.01, + "learning_rate": 5.615949367088608e-06, + "loss": 0.0205, + "step": 17825 + }, + { + "epoch": 10.01, + "learning_rate": 5.6096202531645575e-06, + "loss": 0.021, + "step": 17850 + }, + { + "epoch": 10.01, + "learning_rate": 5.603291139240507e-06, + "loss": 0.0178, + "step": 17875 + }, + { + "epoch": 10.01, + "learning_rate": 5.5969620253164555e-06, + "loss": 0.0169, + "step": 17900 + }, + { + "epoch": 10.01, + "learning_rate": 5.590632911392405e-06, + "loss": 0.0158, + "step": 17925 + }, + { + "epoch": 10.01, + "learning_rate": 5.584303797468354e-06, + "loss": 0.0195, + "step": 17950 + }, + { + "epoch": 10.01, + "learning_rate": 5.577974683544304e-06, + "loss": 0.0195, + "step": 17975 + }, + { + "epoch": 10.01, + "learning_rate": 5.571645569620253e-06, + "loss": 0.0187, + "step": 18000 + }, + { + "epoch": 10.01, + "eval_loss": 0.2605174481868744, + "eval_runtime": 1553.7854, + "eval_samples_per_second": 6.7, + "eval_steps_per_second": 0.419, + "eval_wer": 25.330493178750874, + "step": 18000 + }, + { + "epoch": 10.01, + "learning_rate": 5.565316455696203e-06, + "loss": 0.0214, + "step": 18025 + }, + { + "epoch": 10.01, + "learning_rate": 5.558987341772152e-06, + "loss": 0.0193, + "step": 18050 + }, + { + "epoch": 10.01, + "learning_rate": 5.552658227848102e-06, + "loss": 0.0168, + "step": 18075 + }, + { + "epoch": 10.01, + "learning_rate": 5.546329113924052e-06, + "loss": 0.0147, + "step": 18100 + }, + { + "epoch": 10.02, + "learning_rate": 5.540000000000001e-06, + "loss": 0.0192, + "step": 18125 + }, + { + "epoch": 10.02, + "learning_rate": 5.533670886075951e-06, + "loss": 0.014, + "step": 18150 + }, + { + "epoch": 10.02, + "learning_rate": 5.527341772151899e-06, + "loss": 0.0163, + "step": 18175 + }, + { + "epoch": 10.02, + "learning_rate": 5.521012658227849e-06, + "loss": 0.0139, + "step": 18200 + }, + { + "epoch": 10.02, + "learning_rate": 5.514683544303798e-06, + "loss": 0.0168, + "step": 18225 + }, + { + "epoch": 10.02, + "learning_rate": 5.5083544303797475e-06, + "loss": 0.0118, + "step": 18250 + }, + { + "epoch": 10.02, + "learning_rate": 5.502025316455697e-06, + "loss": 0.0119, + "step": 18275 + }, + { + "epoch": 10.02, + "learning_rate": 5.495696202531646e-06, + "loss": 0.0136, + "step": 18300 + }, + { + "epoch": 10.02, + "learning_rate": 5.489367088607596e-06, + "loss": 0.0132, + "step": 18325 + }, + { + "epoch": 10.02, + "learning_rate": 5.483037974683545e-06, + "loss": 0.0148, + "step": 18350 + }, + { + "epoch": 10.02, + "learning_rate": 5.476708860759495e-06, + "loss": 0.015, + "step": 18375 + }, + { + "epoch": 10.02, + "learning_rate": 5.470379746835443e-06, + "loss": 0.0154, + "step": 18400 + }, + { + "epoch": 10.02, + "learning_rate": 5.464050632911393e-06, + "loss": 0.0139, + "step": 18425 + }, + { + "epoch": 10.02, + "learning_rate": 5.457721518987342e-06, + "loss": 0.0239, + "step": 18450 + }, + { + "epoch": 10.02, + "learning_rate": 5.4513924050632915e-06, + "loss": 0.0203, + "step": 18475 + }, + { + "epoch": 10.02, + "learning_rate": 5.445063291139241e-06, + "loss": 0.0199, + "step": 18500 + }, + { + "epoch": 10.03, + "learning_rate": 5.43873417721519e-06, + "loss": 0.013, + "step": 18525 + }, + { + "epoch": 10.03, + "learning_rate": 5.43240506329114e-06, + "loss": 0.0128, + "step": 18550 + }, + { + "epoch": 10.03, + "learning_rate": 5.426075949367089e-06, + "loss": 0.0161, + "step": 18575 + }, + { + "epoch": 10.03, + "learning_rate": 5.419746835443039e-06, + "loss": 0.0129, + "step": 18600 + }, + { + "epoch": 10.03, + "learning_rate": 5.413417721518987e-06, + "loss": 0.0124, + "step": 18625 + }, + { + "epoch": 10.03, + "learning_rate": 5.407088607594937e-06, + "loss": 0.0161, + "step": 18650 + }, + { + "epoch": 10.03, + "learning_rate": 5.400759493670886e-06, + "loss": 0.0104, + "step": 18675 + }, + { + "epoch": 10.03, + "learning_rate": 5.3944303797468355e-06, + "loss": 0.0109, + "step": 18700 + }, + { + "epoch": 10.03, + "learning_rate": 5.388101265822785e-06, + "loss": 0.0128, + "step": 18725 + }, + { + "epoch": 10.03, + "learning_rate": 5.381772151898734e-06, + "loss": 0.014, + "step": 18750 + }, + { + "epoch": 10.03, + "learning_rate": 5.375443037974684e-06, + "loss": 0.0159, + "step": 18775 + }, + { + "epoch": 10.03, + "learning_rate": 5.369113924050633e-06, + "loss": 0.017, + "step": 18800 + }, + { + "epoch": 10.03, + "learning_rate": 5.362784810126583e-06, + "loss": 0.012, + "step": 18825 + }, + { + "epoch": 10.03, + "learning_rate": 5.356455696202531e-06, + "loss": 0.0165, + "step": 18850 + }, + { + "epoch": 10.03, + "learning_rate": 5.350126582278481e-06, + "loss": 0.0153, + "step": 18875 + }, + { + "epoch": 10.03, + "learning_rate": 5.343797468354432e-06, + "loss": 0.0182, + "step": 18900 + }, + { + "epoch": 10.04, + "learning_rate": 5.33746835443038e-06, + "loss": 0.0194, + "step": 18925 + }, + { + "epoch": 10.04, + "learning_rate": 5.33113924050633e-06, + "loss": 0.0144, + "step": 18950 + }, + { + "epoch": 10.04, + "learning_rate": 5.324810126582279e-06, + "loss": 0.0157, + "step": 18975 + }, + { + "epoch": 10.04, + "learning_rate": 5.318481012658229e-06, + "loss": 0.0111, + "step": 19000 + }, + { + "epoch": 10.04, + "eval_loss": 0.2696678638458252, + "eval_runtime": 1463.5986, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.445, + "eval_wer": 24.79877721588969, + "step": 19000 + }, + { + "epoch": 10.04, + "learning_rate": 5.312151898734178e-06, + "loss": 0.0176, + "step": 19025 + }, + { + "epoch": 10.04, + "learning_rate": 5.3058227848101275e-06, + "loss": 0.0144, + "step": 19050 + }, + { + "epoch": 10.04, + "learning_rate": 5.299493670886077e-06, + "loss": 0.0129, + "step": 19075 + }, + { + "epoch": 10.04, + "learning_rate": 5.293164556962026e-06, + "loss": 0.0162, + "step": 19100 + }, + { + "epoch": 10.04, + "learning_rate": 5.286835443037976e-06, + "loss": 0.0128, + "step": 19125 + }, + { + "epoch": 10.04, + "learning_rate": 5.280506329113924e-06, + "loss": 0.0152, + "step": 19150 + }, + { + "epoch": 10.04, + "learning_rate": 5.274177215189874e-06, + "loss": 0.0183, + "step": 19175 + }, + { + "epoch": 10.04, + "learning_rate": 5.267848101265823e-06, + "loss": 0.0198, + "step": 19200 + }, + { + "epoch": 10.04, + "learning_rate": 5.261518987341773e-06, + "loss": 0.0179, + "step": 19225 + }, + { + "epoch": 10.04, + "learning_rate": 5.255443037974683e-06, + "loss": 0.019, + "step": 19250 + }, + { + "epoch": 11.0, + "learning_rate": 5.249113924050633e-06, + "loss": 0.0159, + "step": 19275 + }, + { + "epoch": 11.0, + "learning_rate": 5.242784810126583e-06, + "loss": 0.0156, + "step": 19300 + }, + { + "epoch": 11.0, + "learning_rate": 5.236455696202532e-06, + "loss": 0.015, + "step": 19325 + }, + { + "epoch": 11.0, + "learning_rate": 5.230126582278482e-06, + "loss": 0.0162, + "step": 19350 + }, + { + "epoch": 11.0, + "learning_rate": 5.223797468354431e-06, + "loss": 0.0262, + "step": 19375 + }, + { + "epoch": 11.0, + "learning_rate": 5.217468354430381e-06, + "loss": 0.0207, + "step": 19400 + }, + { + "epoch": 11.0, + "learning_rate": 5.21113924050633e-06, + "loss": 0.0222, + "step": 19425 + }, + { + "epoch": 11.0, + "learning_rate": 5.2048101265822795e-06, + "loss": 0.016, + "step": 19450 + }, + { + "epoch": 11.01, + "learning_rate": 5.198481012658229e-06, + "loss": 0.0176, + "step": 19475 + }, + { + "epoch": 11.01, + "learning_rate": 5.192151898734178e-06, + "loss": 0.0209, + "step": 19500 + }, + { + "epoch": 11.01, + "learning_rate": 5.185822784810127e-06, + "loss": 0.0212, + "step": 19525 + }, + { + "epoch": 11.01, + "learning_rate": 5.179493670886076e-06, + "loss": 0.0143, + "step": 19550 + }, + { + "epoch": 11.01, + "learning_rate": 5.173164556962026e-06, + "loss": 0.0224, + "step": 19575 + }, + { + "epoch": 11.01, + "learning_rate": 5.166835443037975e-06, + "loss": 0.0171, + "step": 19600 + }, + { + "epoch": 11.01, + "learning_rate": 5.160506329113925e-06, + "loss": 0.0166, + "step": 19625 + }, + { + "epoch": 11.01, + "learning_rate": 5.154177215189874e-06, + "loss": 0.0135, + "step": 19650 + }, + { + "epoch": 11.01, + "learning_rate": 5.1478481012658235e-06, + "loss": 0.0164, + "step": 19675 + }, + { + "epoch": 11.01, + "learning_rate": 5.141518987341773e-06, + "loss": 0.0154, + "step": 19700 + }, + { + "epoch": 11.01, + "learning_rate": 5.135189873417722e-06, + "loss": 0.0183, + "step": 19725 + }, + { + "epoch": 11.01, + "learning_rate": 5.128860759493671e-06, + "loss": 0.0149, + "step": 19750 + }, + { + "epoch": 11.01, + "learning_rate": 5.12253164556962e-06, + "loss": 0.0176, + "step": 19775 + }, + { + "epoch": 11.01, + "learning_rate": 5.11620253164557e-06, + "loss": 0.0135, + "step": 19800 + }, + { + "epoch": 11.01, + "learning_rate": 5.109873417721519e-06, + "loss": 0.0121, + "step": 19825 + }, + { + "epoch": 11.01, + "learning_rate": 5.103544303797469e-06, + "loss": 0.0158, + "step": 19850 + }, + { + "epoch": 11.02, + "learning_rate": 5.097215189873418e-06, + "loss": 0.0191, + "step": 19875 + }, + { + "epoch": 11.02, + "learning_rate": 5.0908860759493675e-06, + "loss": 0.0127, + "step": 19900 + }, + { + "epoch": 11.02, + "learning_rate": 5.084556962025317e-06, + "loss": 0.0149, + "step": 19925 + }, + { + "epoch": 11.02, + "learning_rate": 5.0782278481012655e-06, + "loss": 0.0152, + "step": 19950 + }, + { + "epoch": 11.02, + "learning_rate": 5.071898734177215e-06, + "loss": 0.0139, + "step": 19975 + }, + { + "epoch": 11.02, + "learning_rate": 5.065569620253164e-06, + "loss": 0.015, + "step": 20000 + }, + { + "epoch": 11.02, + "eval_loss": 0.2623814344406128, + "eval_runtime": 1462.1621, + "eval_samples_per_second": 7.12, + "eval_steps_per_second": 0.445, + "eval_wer": 24.392266540919362, + "step": 20000 + }, + { + "epoch": 11.02, + "learning_rate": 5.059240506329114e-06, + "loss": 0.0094, + "step": 20025 + }, + { + "epoch": 11.02, + "learning_rate": 5.052911392405063e-06, + "loss": 0.0108, + "step": 20050 + }, + { + "epoch": 11.02, + "learning_rate": 5.046582278481013e-06, + "loss": 0.0155, + "step": 20075 + }, + { + "epoch": 11.02, + "learning_rate": 5.040253164556962e-06, + "loss": 0.0103, + "step": 20100 + }, + { + "epoch": 11.02, + "learning_rate": 5.033924050632912e-06, + "loss": 0.0109, + "step": 20125 + }, + { + "epoch": 11.02, + "learning_rate": 5.027594936708862e-06, + "loss": 0.0188, + "step": 20150 + }, + { + "epoch": 11.02, + "learning_rate": 5.021265822784811e-06, + "loss": 0.0178, + "step": 20175 + }, + { + "epoch": 11.02, + "learning_rate": 5.014936708860761e-06, + "loss": 0.0262, + "step": 20200 + }, + { + "epoch": 11.02, + "learning_rate": 5.00860759493671e-06, + "loss": 0.0194, + "step": 20225 + }, + { + "epoch": 11.02, + "learning_rate": 5.002278481012659e-06, + "loss": 0.0107, + "step": 20250 + }, + { + "epoch": 11.03, + "learning_rate": 4.995949367088608e-06, + "loss": 0.0147, + "step": 20275 + }, + { + "epoch": 11.03, + "learning_rate": 4.9896202531645575e-06, + "loss": 0.0084, + "step": 20300 + }, + { + "epoch": 11.03, + "learning_rate": 4.983291139240507e-06, + "loss": 0.0214, + "step": 20325 + }, + { + "epoch": 11.03, + "learning_rate": 4.976962025316456e-06, + "loss": 0.014, + "step": 20350 + }, + { + "epoch": 11.03, + "learning_rate": 4.970632911392406e-06, + "loss": 0.0129, + "step": 20375 + }, + { + "epoch": 11.03, + "learning_rate": 4.964303797468355e-06, + "loss": 0.0145, + "step": 20400 + }, + { + "epoch": 11.03, + "learning_rate": 4.957974683544305e-06, + "loss": 0.0109, + "step": 20425 + }, + { + "epoch": 11.03, + "learning_rate": 4.951645569620254e-06, + "loss": 0.0117, + "step": 20450 + }, + { + "epoch": 11.03, + "learning_rate": 4.945316455696203e-06, + "loss": 0.0137, + "step": 20475 + }, + { + "epoch": 11.03, + "learning_rate": 4.938987341772152e-06, + "loss": 0.0086, + "step": 20500 + }, + { + "epoch": 11.03, + "learning_rate": 4.9326582278481015e-06, + "loss": 0.0128, + "step": 20525 + }, + { + "epoch": 11.03, + "learning_rate": 4.926329113924051e-06, + "loss": 0.0101, + "step": 20550 + }, + { + "epoch": 11.03, + "learning_rate": 4.92e-06, + "loss": 0.0132, + "step": 20575 + }, + { + "epoch": 11.03, + "learning_rate": 4.91367088607595e-06, + "loss": 0.0147, + "step": 20600 + }, + { + "epoch": 11.03, + "learning_rate": 4.907341772151899e-06, + "loss": 0.0077, + "step": 20625 + }, + { + "epoch": 11.03, + "learning_rate": 4.901012658227849e-06, + "loss": 0.0125, + "step": 20650 + }, + { + "epoch": 11.04, + "learning_rate": 4.894683544303798e-06, + "loss": 0.0115, + "step": 20675 + }, + { + "epoch": 11.04, + "learning_rate": 4.888354430379747e-06, + "loss": 0.0136, + "step": 20700 + }, + { + "epoch": 11.04, + "learning_rate": 4.882025316455696e-06, + "loss": 0.0098, + "step": 20725 + }, + { + "epoch": 11.04, + "learning_rate": 4.875696202531646e-06, + "loss": 0.0154, + "step": 20750 + }, + { + "epoch": 11.04, + "learning_rate": 4.869367088607596e-06, + "loss": 0.0126, + "step": 20775 + }, + { + "epoch": 11.04, + "learning_rate": 4.863037974683545e-06, + "loss": 0.0116, + "step": 20800 + }, + { + "epoch": 11.04, + "learning_rate": 4.856708860759495e-06, + "loss": 0.0133, + "step": 20825 + }, + { + "epoch": 11.04, + "learning_rate": 4.850379746835443e-06, + "loss": 0.0106, + "step": 20850 + }, + { + "epoch": 11.04, + "learning_rate": 4.844050632911393e-06, + "loss": 0.0151, + "step": 20875 + }, + { + "epoch": 11.04, + "learning_rate": 4.837721518987342e-06, + "loss": 0.0148, + "step": 20900 + }, + { + "epoch": 11.04, + "learning_rate": 4.8313924050632915e-06, + "loss": 0.0173, + "step": 20925 + }, + { + "epoch": 11.04, + "learning_rate": 4.825063291139241e-06, + "loss": 0.0126, + "step": 20950 + }, + { + "epoch": 11.04, + "learning_rate": 4.81873417721519e-06, + "loss": 0.0163, + "step": 20975 + }, + { + "epoch": 11.04, + "learning_rate": 4.81240506329114e-06, + "loss": 0.0203, + "step": 21000 + }, + { + "epoch": 11.04, + "eval_loss": 0.27788785099983215, + "eval_runtime": 1546.852, + "eval_samples_per_second": 6.73, + "eval_steps_per_second": 0.421, + "eval_wer": 25.284963983154196, + "step": 21000 + }, + { + "epoch": 12.0, + "learning_rate": 4.806075949367089e-06, + "loss": 0.0176, + "step": 21025 + }, + { + "epoch": 12.0, + "learning_rate": 4.799746835443039e-06, + "loss": 0.0157, + "step": 21050 + }, + { + "epoch": 12.0, + "learning_rate": 4.793417721518987e-06, + "loss": 0.0192, + "step": 21075 + }, + { + "epoch": 12.0, + "learning_rate": 4.787088607594937e-06, + "loss": 0.0124, + "step": 21100 + }, + { + "epoch": 12.0, + "learning_rate": 4.780759493670886e-06, + "loss": 0.0194, + "step": 21125 + }, + { + "epoch": 12.0, + "learning_rate": 4.774430379746836e-06, + "loss": 0.0138, + "step": 21150 + }, + { + "epoch": 12.0, + "learning_rate": 4.768101265822786e-06, + "loss": 0.0191, + "step": 21175 + }, + { + "epoch": 12.0, + "learning_rate": 4.761772151898735e-06, + "loss": 0.0124, + "step": 21200 + }, + { + "epoch": 12.01, + "learning_rate": 4.755443037974684e-06, + "loss": 0.0175, + "step": 21225 + }, + { + "epoch": 12.01, + "learning_rate": 4.749113924050633e-06, + "loss": 0.0118, + "step": 21250 + }, + { + "epoch": 12.01, + "learning_rate": 4.742784810126583e-06, + "loss": 0.0172, + "step": 21275 + }, + { + "epoch": 12.01, + "learning_rate": 4.736455696202532e-06, + "loss": 0.0136, + "step": 21300 + }, + { + "epoch": 12.01, + "learning_rate": 4.7301265822784815e-06, + "loss": 0.018, + "step": 21325 + }, + { + "epoch": 12.01, + "learning_rate": 4.723797468354431e-06, + "loss": 0.0184, + "step": 21350 + }, + { + "epoch": 12.01, + "learning_rate": 4.71746835443038e-06, + "loss": 0.0166, + "step": 21375 + }, + { + "epoch": 12.01, + "learning_rate": 4.71113924050633e-06, + "loss": 0.0107, + "step": 21400 + }, + { + "epoch": 12.01, + "learning_rate": 4.704810126582279e-06, + "loss": 0.0125, + "step": 21425 + }, + { + "epoch": 12.01, + "learning_rate": 4.698481012658228e-06, + "loss": 0.0098, + "step": 21450 + }, + { + "epoch": 12.01, + "learning_rate": 4.692151898734177e-06, + "loss": 0.0103, + "step": 21475 + }, + { + "epoch": 12.01, + "learning_rate": 4.685822784810127e-06, + "loss": 0.0211, + "step": 21500 + }, + { + "epoch": 12.01, + "learning_rate": 4.679493670886076e-06, + "loss": 0.0125, + "step": 21525 + }, + { + "epoch": 12.01, + "learning_rate": 4.6731645569620255e-06, + "loss": 0.0083, + "step": 21550 + }, + { + "epoch": 12.01, + "learning_rate": 4.666835443037975e-06, + "loss": 0.0156, + "step": 21575 + }, + { + "epoch": 12.01, + "learning_rate": 4.660506329113924e-06, + "loss": 0.0137, + "step": 21600 + }, + { + "epoch": 12.02, + "learning_rate": 4.654177215189874e-06, + "loss": 0.0137, + "step": 21625 + }, + { + "epoch": 12.02, + "learning_rate": 4.647848101265823e-06, + "loss": 0.0092, + "step": 21650 + }, + { + "epoch": 12.02, + "learning_rate": 4.641518987341773e-06, + "loss": 0.0101, + "step": 21675 + }, + { + "epoch": 12.02, + "learning_rate": 4.635189873417722e-06, + "loss": 0.0118, + "step": 21700 + }, + { + "epoch": 12.02, + "learning_rate": 4.6288607594936715e-06, + "loss": 0.0119, + "step": 21725 + }, + { + "epoch": 12.02, + "learning_rate": 4.622531645569621e-06, + "loss": 0.009, + "step": 21750 + }, + { + "epoch": 12.02, + "learning_rate": 4.61620253164557e-06, + "loss": 0.014, + "step": 21775 + }, + { + "epoch": 12.02, + "learning_rate": 4.609873417721519e-06, + "loss": 0.0108, + "step": 21800 + }, + { + "epoch": 12.02, + "learning_rate": 4.603544303797468e-06, + "loss": 0.0117, + "step": 21825 + }, + { + "epoch": 12.02, + "learning_rate": 4.597215189873418e-06, + "loss": 0.0147, + "step": 21850 + }, + { + "epoch": 12.02, + "learning_rate": 4.590886075949367e-06, + "loss": 0.0133, + "step": 21875 + }, + { + "epoch": 12.02, + "learning_rate": 4.584556962025317e-06, + "loss": 0.0131, + "step": 21900 + }, + { + "epoch": 12.02, + "learning_rate": 4.578227848101266e-06, + "loss": 0.0156, + "step": 21925 + }, + { + "epoch": 12.02, + "learning_rate": 4.5718987341772155e-06, + "loss": 0.0211, + "step": 21950 + }, + { + "epoch": 12.02, + "learning_rate": 4.565569620253165e-06, + "loss": 0.0127, + "step": 21975 + }, + { + "epoch": 12.02, + "learning_rate": 4.559240506329114e-06, + "loss": 0.011, + "step": 22000 + }, + { + "epoch": 12.02, + "eval_loss": 0.26152506470680237, + "eval_runtime": 1497.9019, + "eval_samples_per_second": 6.95, + "eval_steps_per_second": 0.435, + "eval_wer": 26.236198962584755, + "step": 22000 + }, + { + "epoch": 12.03, + "learning_rate": 4.552911392405064e-06, + "loss": 0.0164, + "step": 22025 + }, + { + "epoch": 12.03, + "learning_rate": 4.546582278481013e-06, + "loss": 0.01, + "step": 22050 + }, + { + "epoch": 12.03, + "learning_rate": 4.540253164556963e-06, + "loss": 0.0127, + "step": 22075 + }, + { + "epoch": 12.03, + "learning_rate": 4.533924050632912e-06, + "loss": 0.0089, + "step": 22100 + }, + { + "epoch": 12.03, + "learning_rate": 4.5275949367088615e-06, + "loss": 0.0077, + "step": 22125 + }, + { + "epoch": 12.03, + "learning_rate": 4.521265822784811e-06, + "loss": 0.0098, + "step": 22150 + }, + { + "epoch": 12.03, + "learning_rate": 4.5149367088607595e-06, + "loss": 0.0084, + "step": 22175 + }, + { + "epoch": 12.03, + "learning_rate": 4.508607594936709e-06, + "loss": 0.0096, + "step": 22200 + }, + { + "epoch": 12.03, + "learning_rate": 4.502278481012658e-06, + "loss": 0.0097, + "step": 22225 + }, + { + "epoch": 12.03, + "learning_rate": 4.495949367088608e-06, + "loss": 0.0114, + "step": 22250 + }, + { + "epoch": 12.03, + "learning_rate": 4.489620253164557e-06, + "loss": 0.0109, + "step": 22275 + }, + { + "epoch": 12.03, + "learning_rate": 4.483291139240507e-06, + "loss": 0.0096, + "step": 22300 + }, + { + "epoch": 12.03, + "learning_rate": 4.476962025316456e-06, + "loss": 0.0117, + "step": 22325 + }, + { + "epoch": 12.03, + "learning_rate": 4.4706329113924055e-06, + "loss": 0.0105, + "step": 22350 + }, + { + "epoch": 12.03, + "learning_rate": 4.464303797468355e-06, + "loss": 0.0103, + "step": 22375 + }, + { + "epoch": 12.03, + "learning_rate": 4.457974683544304e-06, + "loss": 0.0085, + "step": 22400 + }, + { + "epoch": 12.04, + "learning_rate": 4.451645569620254e-06, + "loss": 0.0129, + "step": 22425 + }, + { + "epoch": 12.04, + "learning_rate": 4.445316455696203e-06, + "loss": 0.0188, + "step": 22450 + }, + { + "epoch": 12.04, + "learning_rate": 4.438987341772153e-06, + "loss": 0.013, + "step": 22475 + }, + { + "epoch": 12.04, + "learning_rate": 4.432658227848102e-06, + "loss": 0.0095, + "step": 22500 + }, + { + "epoch": 12.04, + "learning_rate": 4.4263291139240515e-06, + "loss": 0.0111, + "step": 22525 + }, + { + "epoch": 12.04, + "learning_rate": 4.42e-06, + "loss": 0.0115, + "step": 22550 + }, + { + "epoch": 12.04, + "learning_rate": 4.4136708860759495e-06, + "loss": 0.0096, + "step": 22575 + }, + { + "epoch": 12.04, + "learning_rate": 4.407341772151899e-06, + "loss": 0.0108, + "step": 22600 + }, + { + "epoch": 12.04, + "learning_rate": 4.401012658227848e-06, + "loss": 0.0118, + "step": 22625 + }, + { + "epoch": 12.04, + "learning_rate": 4.394683544303798e-06, + "loss": 0.0136, + "step": 22650 + }, + { + "epoch": 12.04, + "learning_rate": 4.388354430379747e-06, + "loss": 0.0176, + "step": 22675 + }, + { + "epoch": 12.04, + "learning_rate": 4.382025316455697e-06, + "loss": 0.011, + "step": 22700 + }, + { + "epoch": 12.04, + "learning_rate": 4.375696202531646e-06, + "loss": 0.0125, + "step": 22725 + }, + { + "epoch": 12.04, + "learning_rate": 4.3693670886075955e-06, + "loss": 0.0164, + "step": 22750 + }, + { + "epoch": 13.0, + "learning_rate": 4.363037974683544e-06, + "loss": 0.0192, + "step": 22775 + }, + { + "epoch": 13.0, + "learning_rate": 4.356708860759494e-06, + "loss": 0.0145, + "step": 22800 + }, + { + "epoch": 13.0, + "learning_rate": 4.350379746835444e-06, + "loss": 0.0174, + "step": 22825 + }, + { + "epoch": 13.0, + "learning_rate": 4.344050632911393e-06, + "loss": 0.0142, + "step": 22850 + }, + { + "epoch": 13.0, + "learning_rate": 4.337721518987343e-06, + "loss": 0.0228, + "step": 22875 + }, + { + "epoch": 13.0, + "learning_rate": 4.331392405063292e-06, + "loss": 0.0201, + "step": 22900 + }, + { + "epoch": 13.0, + "learning_rate": 4.325063291139241e-06, + "loss": 0.0188, + "step": 22925 + }, + { + "epoch": 13.0, + "learning_rate": 4.31873417721519e-06, + "loss": 0.0185, + "step": 22950 + }, + { + "epoch": 13.01, + "learning_rate": 4.3124050632911395e-06, + "loss": 0.0186, + "step": 22975 + }, + { + "epoch": 13.01, + "learning_rate": 4.306075949367089e-06, + "loss": 0.0191, + "step": 23000 + }, + { + "epoch": 13.01, + "eval_loss": 0.25467297434806824, + "eval_runtime": 1486.1182, + "eval_samples_per_second": 7.005, + "eval_steps_per_second": 0.438, + "eval_wer": 23.982503780549276, + "step": 23000 + }, + { + "epoch": 13.01, + "learning_rate": 4.299746835443038e-06, + "loss": 0.0145, + "step": 23025 + }, + { + "epoch": 13.01, + "learning_rate": 4.293417721518988e-06, + "loss": 0.0106, + "step": 23050 + }, + { + "epoch": 13.01, + "learning_rate": 4.287088607594937e-06, + "loss": 0.0135, + "step": 23075 + }, + { + "epoch": 13.01, + "learning_rate": 4.280759493670887e-06, + "loss": 0.0173, + "step": 23100 + }, + { + "epoch": 13.01, + "learning_rate": 4.274430379746836e-06, + "loss": 0.0126, + "step": 23125 + }, + { + "epoch": 13.01, + "learning_rate": 4.268101265822785e-06, + "loss": 0.0166, + "step": 23150 + }, + { + "epoch": 13.01, + "learning_rate": 4.261772151898734e-06, + "loss": 0.0097, + "step": 23175 + }, + { + "epoch": 13.01, + "learning_rate": 4.255443037974684e-06, + "loss": 0.0094, + "step": 23200 + }, + { + "epoch": 13.01, + "learning_rate": 4.249113924050634e-06, + "loss": 0.012, + "step": 23225 + }, + { + "epoch": 13.01, + "learning_rate": 4.242784810126583e-06, + "loss": 0.0153, + "step": 23250 + }, + { + "epoch": 13.01, + "learning_rate": 4.236455696202532e-06, + "loss": 0.0169, + "step": 23275 + }, + { + "epoch": 13.01, + "learning_rate": 4.230379746835443e-06, + "loss": 0.0097, + "step": 23300 + }, + { + "epoch": 13.01, + "learning_rate": 4.224050632911393e-06, + "loss": 0.0098, + "step": 23325 + }, + { + "epoch": 13.01, + "learning_rate": 4.217721518987342e-06, + "loss": 0.0129, + "step": 23350 + }, + { + "epoch": 13.02, + "learning_rate": 4.2113924050632915e-06, + "loss": 0.0157, + "step": 23375 + }, + { + "epoch": 13.02, + "learning_rate": 4.205063291139241e-06, + "loss": 0.0088, + "step": 23400 + }, + { + "epoch": 13.02, + "learning_rate": 4.19873417721519e-06, + "loss": 0.0105, + "step": 23425 + }, + { + "epoch": 13.02, + "learning_rate": 4.19240506329114e-06, + "loss": 0.0131, + "step": 23450 + }, + { + "epoch": 13.02, + "learning_rate": 4.186075949367089e-06, + "loss": 0.0133, + "step": 23475 + }, + { + "epoch": 13.02, + "learning_rate": 4.179746835443038e-06, + "loss": 0.0054, + "step": 23500 + }, + { + "epoch": 13.02, + "learning_rate": 4.173417721518987e-06, + "loss": 0.01, + "step": 23525 + }, + { + "epoch": 13.02, + "learning_rate": 4.167088607594937e-06, + "loss": 0.013, + "step": 23550 + }, + { + "epoch": 13.02, + "learning_rate": 4.160759493670886e-06, + "loss": 0.0141, + "step": 23575 + }, + { + "epoch": 13.02, + "learning_rate": 4.154430379746836e-06, + "loss": 0.0126, + "step": 23600 + }, + { + "epoch": 13.02, + "learning_rate": 4.148101265822786e-06, + "loss": 0.015, + "step": 23625 + }, + { + "epoch": 13.02, + "learning_rate": 4.141772151898734e-06, + "loss": 0.0164, + "step": 23650 + }, + { + "epoch": 13.02, + "learning_rate": 4.135443037974684e-06, + "loss": 0.0135, + "step": 23675 + }, + { + "epoch": 13.02, + "learning_rate": 4.129113924050633e-06, + "loss": 0.0151, + "step": 23700 + }, + { + "epoch": 13.02, + "learning_rate": 4.122784810126583e-06, + "loss": 0.0157, + "step": 23725 + }, + { + "epoch": 13.02, + "learning_rate": 4.116455696202532e-06, + "loss": 0.0087, + "step": 23750 + }, + { + "epoch": 13.03, + "learning_rate": 4.1101265822784815e-06, + "loss": 0.0097, + "step": 23775 + }, + { + "epoch": 13.03, + "learning_rate": 4.103797468354431e-06, + "loss": 0.0141, + "step": 23800 + }, + { + "epoch": 13.03, + "learning_rate": 4.09746835443038e-06, + "loss": 0.0079, + "step": 23825 + }, + { + "epoch": 13.03, + "learning_rate": 4.09113924050633e-06, + "loss": 0.0116, + "step": 23850 + }, + { + "epoch": 13.03, + "learning_rate": 4.084810126582278e-06, + "loss": 0.0093, + "step": 23875 + }, + { + "epoch": 13.03, + "learning_rate": 4.078481012658228e-06, + "loss": 0.0149, + "step": 23900 + }, + { + "epoch": 13.03, + "learning_rate": 4.072151898734177e-06, + "loss": 0.0125, + "step": 23925 + }, + { + "epoch": 13.03, + "learning_rate": 4.065822784810127e-06, + "loss": 0.0106, + "step": 23950 + }, + { + "epoch": 13.03, + "learning_rate": 4.059493670886076e-06, + "loss": 0.0124, + "step": 23975 + }, + { + "epoch": 13.03, + "learning_rate": 4.0531645569620255e-06, + "loss": 0.0074, + "step": 24000 + }, + { + "epoch": 13.03, + "eval_loss": 0.27036768198013306, + "eval_runtime": 1458.9195, + "eval_samples_per_second": 7.136, + "eval_steps_per_second": 0.446, + "eval_wer": 24.153238264036816, + "step": 24000 + }, + { + "epoch": 13.03, + "learning_rate": 4.046835443037975e-06, + "loss": 0.0128, + "step": 24025 + }, + { + "epoch": 13.03, + "learning_rate": 4.040506329113924e-06, + "loss": 0.012, + "step": 24050 + }, + { + "epoch": 13.03, + "learning_rate": 4.034177215189874e-06, + "loss": 0.0098, + "step": 24075 + }, + { + "epoch": 13.03, + "learning_rate": 4.027848101265823e-06, + "loss": 0.0091, + "step": 24100 + }, + { + "epoch": 13.03, + "learning_rate": 4.021518987341773e-06, + "loss": 0.0077, + "step": 24125 + }, + { + "epoch": 13.03, + "learning_rate": 4.015189873417722e-06, + "loss": 0.0135, + "step": 24150 + }, + { + "epoch": 13.04, + "learning_rate": 4.0088607594936715e-06, + "loss": 0.0124, + "step": 24175 + }, + { + "epoch": 13.04, + "learning_rate": 4.002531645569621e-06, + "loss": 0.0152, + "step": 24200 + }, + { + "epoch": 13.04, + "learning_rate": 3.99620253164557e-06, + "loss": 0.0127, + "step": 24225 + }, + { + "epoch": 13.04, + "learning_rate": 3.989873417721519e-06, + "loss": 0.0066, + "step": 24250 + }, + { + "epoch": 13.04, + "learning_rate": 3.983544303797468e-06, + "loss": 0.0163, + "step": 24275 + }, + { + "epoch": 13.04, + "learning_rate": 3.977215189873418e-06, + "loss": 0.0095, + "step": 24300 + }, + { + "epoch": 13.04, + "learning_rate": 3.970886075949367e-06, + "loss": 0.0103, + "step": 24325 + }, + { + "epoch": 13.04, + "learning_rate": 3.964556962025317e-06, + "loss": 0.0112, + "step": 24350 + }, + { + "epoch": 13.04, + "learning_rate": 3.958227848101266e-06, + "loss": 0.01, + "step": 24375 + }, + { + "epoch": 13.04, + "learning_rate": 3.9518987341772155e-06, + "loss": 0.0131, + "step": 24400 + }, + { + "epoch": 13.04, + "learning_rate": 3.945569620253165e-06, + "loss": 0.0143, + "step": 24425 + }, + { + "epoch": 13.04, + "learning_rate": 3.939240506329114e-06, + "loss": 0.0151, + "step": 24450 + }, + { + "epoch": 13.04, + "learning_rate": 3.932911392405064e-06, + "loss": 0.0129, + "step": 24475 + }, + { + "epoch": 13.04, + "learning_rate": 3.926582278481013e-06, + "loss": 0.0125, + "step": 24500 + }, + { + "epoch": 14.0, + "learning_rate": 3.920506329113925e-06, + "loss": 0.0165, + "step": 24525 + }, + { + "epoch": 14.0, + "learning_rate": 3.914177215189874e-06, + "loss": 0.0114, + "step": 24550 + }, + { + "epoch": 14.0, + "learning_rate": 3.9078481012658235e-06, + "loss": 0.0111, + "step": 24575 + }, + { + "epoch": 14.0, + "learning_rate": 3.901518987341772e-06, + "loss": 0.0134, + "step": 24600 + }, + { + "epoch": 14.0, + "learning_rate": 3.8951898734177215e-06, + "loss": 0.0138, + "step": 24625 + }, + { + "epoch": 14.0, + "learning_rate": 3.888860759493671e-06, + "loss": 0.0151, + "step": 24650 + }, + { + "epoch": 14.0, + "learning_rate": 3.88253164556962e-06, + "loss": 0.0101, + "step": 24675 + }, + { + "epoch": 14.0, + "learning_rate": 3.87620253164557e-06, + "loss": 0.0108, + "step": 24700 + }, + { + "epoch": 14.01, + "learning_rate": 3.869873417721519e-06, + "loss": 0.0151, + "step": 24725 + }, + { + "epoch": 14.01, + "learning_rate": 3.863544303797469e-06, + "loss": 0.0132, + "step": 24750 + }, + { + "epoch": 14.01, + "learning_rate": 3.857215189873418e-06, + "loss": 0.012, + "step": 24775 + }, + { + "epoch": 14.01, + "learning_rate": 3.8508860759493675e-06, + "loss": 0.017, + "step": 24800 + }, + { + "epoch": 14.01, + "learning_rate": 3.844556962025317e-06, + "loss": 0.0141, + "step": 24825 + }, + { + "epoch": 14.01, + "learning_rate": 3.838227848101266e-06, + "loss": 0.0166, + "step": 24850 + }, + { + "epoch": 14.01, + "learning_rate": 3.831898734177216e-06, + "loss": 0.0112, + "step": 24875 + }, + { + "epoch": 14.01, + "learning_rate": 3.825569620253165e-06, + "loss": 0.0097, + "step": 24900 + }, + { + "epoch": 14.01, + "learning_rate": 3.819240506329115e-06, + "loss": 0.0138, + "step": 24925 + }, + { + "epoch": 14.01, + "learning_rate": 3.8129113924050636e-06, + "loss": 0.0069, + "step": 24950 + }, + { + "epoch": 14.01, + "learning_rate": 3.806582278481013e-06, + "loss": 0.0111, + "step": 24975 + }, + { + "epoch": 14.01, + "learning_rate": 3.8002531645569625e-06, + "loss": 0.0149, + "step": 25000 + }, + { + "epoch": 14.01, + "eval_loss": 0.2640444040298462, + "eval_runtime": 1478.4977, + "eval_samples_per_second": 7.042, + "eval_steps_per_second": 0.44, + "eval_wer": 24.063805915543345, + "step": 25000 + }, + { + "epoch": 14.01, + "learning_rate": 3.7939240506329115e-06, + "loss": 0.0176, + "step": 25025 + }, + { + "epoch": 14.01, + "learning_rate": 3.787594936708861e-06, + "loss": 0.0104, + "step": 25050 + }, + { + "epoch": 14.01, + "learning_rate": 3.7812658227848103e-06, + "loss": 0.0084, + "step": 25075 + }, + { + "epoch": 14.01, + "learning_rate": 3.7749367088607598e-06, + "loss": 0.0118, + "step": 25100 + }, + { + "epoch": 14.02, + "learning_rate": 3.768607594936709e-06, + "loss": 0.0099, + "step": 25125 + }, + { + "epoch": 14.02, + "learning_rate": 3.762278481012658e-06, + "loss": 0.0089, + "step": 25150 + }, + { + "epoch": 14.02, + "learning_rate": 3.7559493670886076e-06, + "loss": 0.0119, + "step": 25175 + }, + { + "epoch": 14.02, + "learning_rate": 3.749620253164557e-06, + "loss": 0.0118, + "step": 25200 + }, + { + "epoch": 14.02, + "learning_rate": 3.743291139240507e-06, + "loss": 0.0108, + "step": 25225 + }, + { + "epoch": 14.02, + "learning_rate": 3.7369620253164563e-06, + "loss": 0.0101, + "step": 25250 + }, + { + "epoch": 14.02, + "learning_rate": 3.7306329113924058e-06, + "loss": 0.0095, + "step": 25275 + }, + { + "epoch": 14.02, + "learning_rate": 3.7243037974683548e-06, + "loss": 0.0106, + "step": 25300 + }, + { + "epoch": 14.02, + "learning_rate": 3.717974683544304e-06, + "loss": 0.0135, + "step": 25325 + }, + { + "epoch": 14.02, + "learning_rate": 3.7116455696202536e-06, + "loss": 0.0115, + "step": 25350 + }, + { + "epoch": 14.02, + "learning_rate": 3.705316455696203e-06, + "loss": 0.0114, + "step": 25375 + }, + { + "epoch": 14.02, + "learning_rate": 3.698987341772152e-06, + "loss": 0.0119, + "step": 25400 + }, + { + "epoch": 14.02, + "learning_rate": 3.6926582278481015e-06, + "loss": 0.0123, + "step": 25425 + }, + { + "epoch": 14.02, + "learning_rate": 3.686329113924051e-06, + "loss": 0.0138, + "step": 25450 + }, + { + "epoch": 14.02, + "learning_rate": 3.6800000000000003e-06, + "loss": 0.0133, + "step": 25475 + }, + { + "epoch": 14.02, + "learning_rate": 3.6736708860759498e-06, + "loss": 0.0106, + "step": 25500 + }, + { + "epoch": 14.03, + "learning_rate": 3.6673417721518988e-06, + "loss": 0.0099, + "step": 25525 + }, + { + "epoch": 14.03, + "learning_rate": 3.661012658227848e-06, + "loss": 0.0104, + "step": 25550 + }, + { + "epoch": 14.03, + "learning_rate": 3.6546835443037976e-06, + "loss": 0.0143, + "step": 25575 + }, + { + "epoch": 14.03, + "learning_rate": 3.648354430379747e-06, + "loss": 0.0096, + "step": 25600 + }, + { + "epoch": 14.03, + "learning_rate": 3.642025316455696e-06, + "loss": 0.0094, + "step": 25625 + }, + { + "epoch": 14.03, + "learning_rate": 3.6356962025316463e-06, + "loss": 0.0162, + "step": 25650 + }, + { + "epoch": 14.03, + "learning_rate": 3.6293670886075953e-06, + "loss": 0.008, + "step": 25675 + }, + { + "epoch": 14.03, + "learning_rate": 3.6230379746835448e-06, + "loss": 0.0094, + "step": 25700 + }, + { + "epoch": 14.03, + "learning_rate": 3.616708860759494e-06, + "loss": 0.0122, + "step": 25725 + }, + { + "epoch": 14.03, + "learning_rate": 3.6103797468354436e-06, + "loss": 0.0128, + "step": 25750 + }, + { + "epoch": 14.03, + "learning_rate": 3.6040506329113926e-06, + "loss": 0.0117, + "step": 25775 + }, + { + "epoch": 14.03, + "learning_rate": 3.597721518987342e-06, + "loss": 0.0067, + "step": 25800 + }, + { + "epoch": 14.03, + "learning_rate": 3.5913924050632915e-06, + "loss": 0.0078, + "step": 25825 + }, + { + "epoch": 14.03, + "learning_rate": 3.585063291139241e-06, + "loss": 0.0071, + "step": 25850 + }, + { + "epoch": 14.03, + "learning_rate": 3.57873417721519e-06, + "loss": 0.0093, + "step": 25875 + }, + { + "epoch": 14.03, + "learning_rate": 3.5724050632911393e-06, + "loss": 0.0064, + "step": 25900 + }, + { + "epoch": 14.04, + "learning_rate": 3.5660759493670888e-06, + "loss": 0.0067, + "step": 25925 + }, + { + "epoch": 14.04, + "learning_rate": 3.559746835443038e-06, + "loss": 0.0076, + "step": 25950 + }, + { + "epoch": 14.04, + "learning_rate": 3.5534177215189876e-06, + "loss": 0.0099, + "step": 25975 + }, + { + "epoch": 14.04, + "learning_rate": 3.5470886075949366e-06, + "loss": 0.0111, + "step": 26000 + }, + { + "epoch": 14.04, + "eval_loss": 0.2734878659248352, + "eval_runtime": 1456.7487, + "eval_samples_per_second": 7.147, + "eval_steps_per_second": 0.447, + "eval_wer": 24.359745686921737, + "step": 26000 + }, + { + "epoch": 14.04, + "learning_rate": 3.540759493670886e-06, + "loss": 0.0134, + "step": 26025 + }, + { + "epoch": 14.04, + "learning_rate": 3.534430379746836e-06, + "loss": 0.008, + "step": 26050 + }, + { + "epoch": 14.04, + "learning_rate": 3.5281012658227853e-06, + "loss": 0.0138, + "step": 26075 + }, + { + "epoch": 14.04, + "learning_rate": 3.5217721518987348e-06, + "loss": 0.0075, + "step": 26100 + }, + { + "epoch": 14.04, + "learning_rate": 3.515443037974684e-06, + "loss": 0.0097, + "step": 26125 + }, + { + "epoch": 14.04, + "learning_rate": 3.509113924050633e-06, + "loss": 0.0139, + "step": 26150 + }, + { + "epoch": 14.04, + "learning_rate": 3.5027848101265826e-06, + "loss": 0.0099, + "step": 26175 + }, + { + "epoch": 14.04, + "learning_rate": 3.496455696202532e-06, + "loss": 0.0148, + "step": 26200 + }, + { + "epoch": 14.04, + "learning_rate": 3.4901265822784815e-06, + "loss": 0.0119, + "step": 26225 + }, + { + "epoch": 14.04, + "learning_rate": 3.4837974683544305e-06, + "loss": 0.0086, + "step": 26250 + }, + { + "epoch": 15.0, + "learning_rate": 3.47746835443038e-06, + "loss": 0.0091, + "step": 26275 + }, + { + "epoch": 15.0, + "learning_rate": 3.4711392405063293e-06, + "loss": 0.0105, + "step": 26300 + }, + { + "epoch": 15.0, + "learning_rate": 3.465063291139241e-06, + "loss": 0.0138, + "step": 26325 + }, + { + "epoch": 15.0, + "learning_rate": 3.45873417721519e-06, + "loss": 0.0185, + "step": 26350 + }, + { + "epoch": 15.0, + "learning_rate": 3.4524050632911392e-06, + "loss": 0.015, + "step": 26375 + }, + { + "epoch": 15.0, + "learning_rate": 3.4460759493670886e-06, + "loss": 0.0177, + "step": 26400 + }, + { + "epoch": 15.0, + "learning_rate": 3.439746835443038e-06, + "loss": 0.014, + "step": 26425 + }, + { + "epoch": 15.0, + "learning_rate": 3.433417721518988e-06, + "loss": 0.0127, + "step": 26450 + }, + { + "epoch": 15.01, + "learning_rate": 3.4270886075949374e-06, + "loss": 0.0135, + "step": 26475 + }, + { + "epoch": 15.01, + "learning_rate": 3.4207594936708864e-06, + "loss": 0.0171, + "step": 26500 + }, + { + "epoch": 15.01, + "learning_rate": 3.4144303797468358e-06, + "loss": 0.0114, + "step": 26525 + }, + { + "epoch": 15.01, + "learning_rate": 3.4081012658227852e-06, + "loss": 0.0099, + "step": 26550 + }, + { + "epoch": 15.01, + "learning_rate": 3.4017721518987346e-06, + "loss": 0.0126, + "step": 26575 + }, + { + "epoch": 15.01, + "learning_rate": 3.395443037974684e-06, + "loss": 0.0116, + "step": 26600 + }, + { + "epoch": 15.01, + "learning_rate": 3.389113924050633e-06, + "loss": 0.0087, + "step": 26625 + }, + { + "epoch": 15.01, + "learning_rate": 3.3827848101265825e-06, + "loss": 0.0073, + "step": 26650 + }, + { + "epoch": 15.01, + "learning_rate": 3.376455696202532e-06, + "loss": 0.0102, + "step": 26675 + }, + { + "epoch": 15.01, + "learning_rate": 3.3701265822784814e-06, + "loss": 0.0106, + "step": 26700 + }, + { + "epoch": 15.01, + "learning_rate": 3.3637974683544304e-06, + "loss": 0.0094, + "step": 26725 + }, + { + "epoch": 15.01, + "learning_rate": 3.35746835443038e-06, + "loss": 0.0113, + "step": 26750 + }, + { + "epoch": 15.01, + "learning_rate": 3.3511392405063292e-06, + "loss": 0.0169, + "step": 26775 + }, + { + "epoch": 15.01, + "learning_rate": 3.3448101265822786e-06, + "loss": 0.0084, + "step": 26800 + }, + { + "epoch": 15.01, + "learning_rate": 3.338481012658228e-06, + "loss": 0.0153, + "step": 26825 + }, + { + "epoch": 15.01, + "learning_rate": 3.332151898734177e-06, + "loss": 0.012, + "step": 26850 + }, + { + "epoch": 15.02, + "learning_rate": 3.325822784810127e-06, + "loss": 0.0106, + "step": 26875 + }, + { + "epoch": 15.02, + "learning_rate": 3.3194936708860764e-06, + "loss": 0.0099, + "step": 26900 + }, + { + "epoch": 15.02, + "learning_rate": 3.3131645569620258e-06, + "loss": 0.008, + "step": 26925 + }, + { + "epoch": 15.02, + "learning_rate": 3.306835443037975e-06, + "loss": 0.0076, + "step": 26950 + }, + { + "epoch": 15.02, + "learning_rate": 3.3005063291139246e-06, + "loss": 0.0088, + "step": 26975 + }, + { + "epoch": 15.02, + "learning_rate": 3.2941772151898736e-06, + "loss": 0.009, + "step": 27000 + }, + { + "epoch": 15.02, + "eval_loss": 0.2693029046058655, + "eval_runtime": 1479.1374, + "eval_samples_per_second": 7.039, + "eval_steps_per_second": 0.44, + "eval_wer": 24.28819980812696, + "step": 27000 + }, + { + "epoch": 15.02, + "learning_rate": 3.287848101265823e-06, + "loss": 0.0115, + "step": 27025 + }, + { + "epoch": 15.02, + "learning_rate": 3.2815189873417725e-06, + "loss": 0.0099, + "step": 27050 + }, + { + "epoch": 15.02, + "learning_rate": 3.275189873417722e-06, + "loss": 0.0073, + "step": 27075 + }, + { + "epoch": 15.02, + "learning_rate": 3.268860759493671e-06, + "loss": 0.0061, + "step": 27100 + }, + { + "epoch": 15.02, + "learning_rate": 3.2625316455696204e-06, + "loss": 0.007, + "step": 27125 + }, + { + "epoch": 15.02, + "learning_rate": 3.2562025316455698e-06, + "loss": 0.0109, + "step": 27150 + }, + { + "epoch": 15.02, + "learning_rate": 3.2498734177215192e-06, + "loss": 0.0123, + "step": 27175 + }, + { + "epoch": 15.02, + "learning_rate": 3.2435443037974686e-06, + "loss": 0.0121, + "step": 27200 + }, + { + "epoch": 15.02, + "learning_rate": 3.2372151898734176e-06, + "loss": 0.0129, + "step": 27225 + }, + { + "epoch": 15.02, + "learning_rate": 3.230886075949367e-06, + "loss": 0.0128, + "step": 27250 + }, + { + "epoch": 15.03, + "learning_rate": 3.224556962025317e-06, + "loss": 0.0094, + "step": 27275 + }, + { + "epoch": 15.03, + "learning_rate": 3.2182278481012664e-06, + "loss": 0.0081, + "step": 27300 + }, + { + "epoch": 15.03, + "learning_rate": 3.2118987341772158e-06, + "loss": 0.0087, + "step": 27325 + }, + { + "epoch": 15.03, + "learning_rate": 3.2055696202531648e-06, + "loss": 0.0131, + "step": 27350 + }, + { + "epoch": 15.03, + "learning_rate": 3.199240506329114e-06, + "loss": 0.0073, + "step": 27375 + }, + { + "epoch": 15.03, + "learning_rate": 3.1929113924050636e-06, + "loss": 0.0086, + "step": 27400 + }, + { + "epoch": 15.03, + "learning_rate": 3.186582278481013e-06, + "loss": 0.0108, + "step": 27425 + }, + { + "epoch": 15.03, + "learning_rate": 3.1802531645569625e-06, + "loss": 0.01, + "step": 27450 + }, + { + "epoch": 15.03, + "learning_rate": 3.1739240506329115e-06, + "loss": 0.0123, + "step": 27475 + }, + { + "epoch": 15.03, + "learning_rate": 3.167594936708861e-06, + "loss": 0.0081, + "step": 27500 + }, + { + "epoch": 15.03, + "learning_rate": 3.1612658227848104e-06, + "loss": 0.0123, + "step": 27525 + }, + { + "epoch": 15.03, + "learning_rate": 3.1549367088607598e-06, + "loss": 0.0096, + "step": 27550 + }, + { + "epoch": 15.03, + "learning_rate": 3.1486075949367088e-06, + "loss": 0.0054, + "step": 27575 + }, + { + "epoch": 15.03, + "learning_rate": 3.1422784810126582e-06, + "loss": 0.006, + "step": 27600 + }, + { + "epoch": 15.03, + "learning_rate": 3.1359493670886076e-06, + "loss": 0.0089, + "step": 27625 + }, + { + "epoch": 15.03, + "learning_rate": 3.129620253164557e-06, + "loss": 0.0109, + "step": 27650 + }, + { + "epoch": 15.04, + "learning_rate": 3.1232911392405065e-06, + "loss": 0.0089, + "step": 27675 + }, + { + "epoch": 15.04, + "learning_rate": 3.1169620253164563e-06, + "loss": 0.0094, + "step": 27700 + }, + { + "epoch": 15.04, + "learning_rate": 3.1106329113924054e-06, + "loss": 0.0102, + "step": 27725 + }, + { + "epoch": 15.04, + "learning_rate": 3.1043037974683548e-06, + "loss": 0.0097, + "step": 27750 + }, + { + "epoch": 15.04, + "learning_rate": 3.097974683544304e-06, + "loss": 0.0078, + "step": 27775 + }, + { + "epoch": 15.04, + "learning_rate": 3.0916455696202536e-06, + "loss": 0.0073, + "step": 27800 + }, + { + "epoch": 15.04, + "learning_rate": 3.085316455696203e-06, + "loss": 0.0105, + "step": 27825 + }, + { + "epoch": 15.04, + "learning_rate": 3.078987341772152e-06, + "loss": 0.0083, + "step": 27850 + }, + { + "epoch": 15.04, + "learning_rate": 3.0726582278481015e-06, + "loss": 0.0138, + "step": 27875 + }, + { + "epoch": 15.04, + "learning_rate": 3.066329113924051e-06, + "loss": 0.0049, + "step": 27900 + }, + { + "epoch": 15.04, + "learning_rate": 3.0600000000000003e-06, + "loss": 0.0107, + "step": 27925 + }, + { + "epoch": 15.04, + "learning_rate": 3.0536708860759494e-06, + "loss": 0.0105, + "step": 27950 + }, + { + "epoch": 15.04, + "learning_rate": 3.0473417721518988e-06, + "loss": 0.0134, + "step": 27975 + }, + { + "epoch": 15.04, + "learning_rate": 3.041012658227848e-06, + "loss": 0.0137, + "step": 28000 + }, + { + "epoch": 15.04, + "eval_loss": 0.2780563235282898, + "eval_runtime": 1465.9542, + "eval_samples_per_second": 7.102, + "eval_steps_per_second": 0.444, + "eval_wer": 25.010162766874256, + "step": 28000 + }, + { + "epoch": 16.0, + "learning_rate": 3.0346835443037976e-06, + "loss": 0.009, + "step": 28025 + }, + { + "epoch": 16.0, + "learning_rate": 3.028354430379747e-06, + "loss": 0.0081, + "step": 28050 + }, + { + "epoch": 16.0, + "learning_rate": 3.022025316455696e-06, + "loss": 0.0109, + "step": 28075 + }, + { + "epoch": 16.0, + "learning_rate": 3.015696202531646e-06, + "loss": 0.0132, + "step": 28100 + }, + { + "epoch": 16.0, + "learning_rate": 3.0093670886075953e-06, + "loss": 0.0142, + "step": 28125 + }, + { + "epoch": 16.0, + "learning_rate": 3.0030379746835448e-06, + "loss": 0.0152, + "step": 28150 + }, + { + "epoch": 16.0, + "learning_rate": 2.996708860759494e-06, + "loss": 0.0074, + "step": 28175 + }, + { + "epoch": 16.0, + "learning_rate": 2.990379746835443e-06, + "loss": 0.0129, + "step": 28200 + }, + { + "epoch": 16.01, + "learning_rate": 2.9840506329113926e-06, + "loss": 0.0118, + "step": 28225 + }, + { + "epoch": 16.01, + "learning_rate": 2.977721518987342e-06, + "loss": 0.0153, + "step": 28250 + }, + { + "epoch": 16.01, + "learning_rate": 2.9713924050632915e-06, + "loss": 0.0121, + "step": 28275 + }, + { + "epoch": 16.01, + "learning_rate": 2.965063291139241e-06, + "loss": 0.0097, + "step": 28300 + }, + { + "epoch": 16.01, + "learning_rate": 2.95873417721519e-06, + "loss": 0.0083, + "step": 28325 + }, + { + "epoch": 16.01, + "learning_rate": 2.9524050632911393e-06, + "loss": 0.01, + "step": 28350 + }, + { + "epoch": 16.01, + "learning_rate": 2.9460759493670888e-06, + "loss": 0.0084, + "step": 28375 + }, + { + "epoch": 16.01, + "learning_rate": 2.939746835443038e-06, + "loss": 0.0124, + "step": 28400 + }, + { + "epoch": 16.01, + "learning_rate": 2.933417721518987e-06, + "loss": 0.0071, + "step": 28425 + }, + { + "epoch": 16.01, + "learning_rate": 2.9270886075949366e-06, + "loss": 0.0113, + "step": 28450 + }, + { + "epoch": 16.01, + "learning_rate": 2.920759493670886e-06, + "loss": 0.0085, + "step": 28475 + }, + { + "epoch": 16.01, + "learning_rate": 2.914430379746836e-06, + "loss": 0.0133, + "step": 28500 + }, + { + "epoch": 16.01, + "learning_rate": 2.9081012658227853e-06, + "loss": 0.014, + "step": 28525 + }, + { + "epoch": 16.01, + "learning_rate": 2.9017721518987348e-06, + "loss": 0.0105, + "step": 28550 + }, + { + "epoch": 16.01, + "learning_rate": 2.8954430379746838e-06, + "loss": 0.01, + "step": 28575 + }, + { + "epoch": 16.01, + "learning_rate": 2.889113924050633e-06, + "loss": 0.0135, + "step": 28600 + }, + { + "epoch": 16.02, + "learning_rate": 2.8827848101265826e-06, + "loss": 0.0118, + "step": 28625 + }, + { + "epoch": 16.02, + "learning_rate": 2.876455696202532e-06, + "loss": 0.0111, + "step": 28650 + }, + { + "epoch": 16.02, + "learning_rate": 2.8701265822784815e-06, + "loss": 0.008, + "step": 28675 + }, + { + "epoch": 16.02, + "learning_rate": 2.8637974683544305e-06, + "loss": 0.0103, + "step": 28700 + }, + { + "epoch": 16.02, + "learning_rate": 2.85746835443038e-06, + "loss": 0.0082, + "step": 28725 + }, + { + "epoch": 16.02, + "learning_rate": 2.8511392405063293e-06, + "loss": 0.0057, + "step": 28750 + }, + { + "epoch": 16.02, + "learning_rate": 2.8448101265822788e-06, + "loss": 0.0062, + "step": 28775 + }, + { + "epoch": 16.02, + "learning_rate": 2.8384810126582278e-06, + "loss": 0.0092, + "step": 28800 + }, + { + "epoch": 16.02, + "learning_rate": 2.832151898734177e-06, + "loss": 0.0084, + "step": 28825 + }, + { + "epoch": 16.02, + "learning_rate": 2.8258227848101266e-06, + "loss": 0.0099, + "step": 28850 + }, + { + "epoch": 16.02, + "learning_rate": 2.819493670886076e-06, + "loss": 0.0117, + "step": 28875 + }, + { + "epoch": 16.02, + "learning_rate": 2.8131645569620255e-06, + "loss": 0.0117, + "step": 28900 + }, + { + "epoch": 16.02, + "learning_rate": 2.8068354430379753e-06, + "loss": 0.0127, + "step": 28925 + }, + { + "epoch": 16.02, + "learning_rate": 2.8005063291139243e-06, + "loss": 0.0123, + "step": 28950 + }, + { + "epoch": 16.02, + "learning_rate": 2.7941772151898738e-06, + "loss": 0.0128, + "step": 28975 + }, + { + "epoch": 16.02, + "learning_rate": 2.787848101265823e-06, + "loss": 0.0073, + "step": 29000 + }, + { + "epoch": 16.02, + "eval_loss": 0.2685891091823578, + "eval_runtime": 1456.562, + "eval_samples_per_second": 7.148, + "eval_steps_per_second": 0.447, + "eval_wer": 23.278427291500677, + "step": 29000 + }, + { + "epoch": 16.03, + "learning_rate": 2.7815189873417726e-06, + "loss": 0.0068, + "step": 29025 + }, + { + "epoch": 16.03, + "learning_rate": 2.7751898734177216e-06, + "loss": 0.0087, + "step": 29050 + }, + { + "epoch": 16.03, + "learning_rate": 2.768860759493671e-06, + "loss": 0.0084, + "step": 29075 + }, + { + "epoch": 16.03, + "learning_rate": 2.7625316455696205e-06, + "loss": 0.0039, + "step": 29100 + }, + { + "epoch": 16.03, + "learning_rate": 2.75620253164557e-06, + "loss": 0.0066, + "step": 29125 + }, + { + "epoch": 16.03, + "learning_rate": 2.7498734177215193e-06, + "loss": 0.0108, + "step": 29150 + }, + { + "epoch": 16.03, + "learning_rate": 2.7435443037974683e-06, + "loss": 0.0052, + "step": 29175 + }, + { + "epoch": 16.03, + "learning_rate": 2.7372151898734178e-06, + "loss": 0.0061, + "step": 29200 + }, + { + "epoch": 16.03, + "learning_rate": 2.730886075949367e-06, + "loss": 0.0083, + "step": 29225 + }, + { + "epoch": 16.03, + "learning_rate": 2.7245569620253166e-06, + "loss": 0.0062, + "step": 29250 + }, + { + "epoch": 16.03, + "learning_rate": 2.7182278481012656e-06, + "loss": 0.0078, + "step": 29275 + }, + { + "epoch": 16.03, + "learning_rate": 2.711898734177215e-06, + "loss": 0.0127, + "step": 29300 + }, + { + "epoch": 16.03, + "learning_rate": 2.705569620253165e-06, + "loss": 0.0092, + "step": 29325 + }, + { + "epoch": 16.03, + "learning_rate": 2.6992405063291143e-06, + "loss": 0.0087, + "step": 29350 + }, + { + "epoch": 16.03, + "learning_rate": 2.6929113924050638e-06, + "loss": 0.0062, + "step": 29375 + }, + { + "epoch": 16.03, + "learning_rate": 2.686582278481013e-06, + "loss": 0.0065, + "step": 29400 + }, + { + "epoch": 16.04, + "learning_rate": 2.680253164556962e-06, + "loss": 0.0057, + "step": 29425 + }, + { + "epoch": 16.04, + "learning_rate": 2.6739240506329116e-06, + "loss": 0.0071, + "step": 29450 + }, + { + "epoch": 16.04, + "learning_rate": 2.667594936708861e-06, + "loss": 0.01, + "step": 29475 + }, + { + "epoch": 16.04, + "learning_rate": 2.6612658227848105e-06, + "loss": 0.009, + "step": 29500 + }, + { + "epoch": 16.04, + "learning_rate": 2.65493670886076e-06, + "loss": 0.0091, + "step": 29525 + }, + { + "epoch": 16.04, + "learning_rate": 2.648607594936709e-06, + "loss": 0.0104, + "step": 29550 + }, + { + "epoch": 16.04, + "learning_rate": 2.6422784810126583e-06, + "loss": 0.0071, + "step": 29575 + }, + { + "epoch": 16.04, + "learning_rate": 2.6359493670886078e-06, + "loss": 0.0081, + "step": 29600 + }, + { + "epoch": 16.04, + "learning_rate": 2.629620253164557e-06, + "loss": 0.008, + "step": 29625 + }, + { + "epoch": 16.04, + "learning_rate": 2.623291139240506e-06, + "loss": 0.0067, + "step": 29650 + }, + { + "epoch": 16.04, + "learning_rate": 2.6169620253164556e-06, + "loss": 0.0066, + "step": 29675 + }, + { + "epoch": 16.04, + "learning_rate": 2.610632911392405e-06, + "loss": 0.0121, + "step": 29700 + }, + { + "epoch": 16.04, + "learning_rate": 2.6043037974683545e-06, + "loss": 0.0075, + "step": 29725 + }, + { + "epoch": 16.04, + "learning_rate": 2.5979746835443043e-06, + "loss": 0.0111, + "step": 29750 + }, + { + "epoch": 17.0, + "learning_rate": 2.5916455696202538e-06, + "loss": 0.0095, + "step": 29775 + }, + { + "epoch": 17.0, + "learning_rate": 2.5853164556962028e-06, + "loss": 0.0077, + "step": 29800 + }, + { + "epoch": 17.0, + "learning_rate": 2.578987341772152e-06, + "loss": 0.0047, + "step": 29825 + }, + { + "epoch": 17.0, + "learning_rate": 2.5726582278481016e-06, + "loss": 0.0092, + "step": 29850 + }, + { + "epoch": 17.0, + "learning_rate": 2.566329113924051e-06, + "loss": 0.0191, + "step": 29875 + }, + { + "epoch": 17.0, + "learning_rate": 2.56e-06, + "loss": 0.0102, + "step": 29900 + }, + { + "epoch": 17.0, + "learning_rate": 2.5536708860759495e-06, + "loss": 0.0146, + "step": 29925 + }, + { + "epoch": 17.0, + "learning_rate": 2.547341772151899e-06, + "loss": 0.0096, + "step": 29950 + }, + { + "epoch": 17.01, + "learning_rate": 2.5410126582278483e-06, + "loss": 0.0159, + "step": 29975 + }, + { + "epoch": 17.01, + "learning_rate": 2.5346835443037978e-06, + "loss": 0.0094, + "step": 30000 + }, + { + "epoch": 17.01, + "eval_loss": 0.2636851966381073, + "eval_runtime": 1508.7196, + "eval_samples_per_second": 6.901, + "eval_steps_per_second": 0.431, + "eval_wer": 23.029642758418838, + "step": 30000 + }, + { + "epoch": 17.01, + "learning_rate": 2.5283544303797468e-06, + "loss": 0.0078, + "step": 30025 + }, + { + "epoch": 17.01, + "learning_rate": 2.522025316455696e-06, + "loss": 0.01, + "step": 30050 + }, + { + "epoch": 17.01, + "learning_rate": 2.5156962025316456e-06, + "loss": 0.0058, + "step": 30075 + }, + { + "epoch": 17.01, + "learning_rate": 2.509367088607595e-06, + "loss": 0.0092, + "step": 30100 + }, + { + "epoch": 17.01, + "learning_rate": 2.503037974683544e-06, + "loss": 0.0084, + "step": 30125 + }, + { + "epoch": 17.01, + "learning_rate": 2.496708860759494e-06, + "loss": 0.0069, + "step": 30150 + }, + { + "epoch": 17.01, + "learning_rate": 2.4903797468354433e-06, + "loss": 0.0059, + "step": 30175 + }, + { + "epoch": 17.01, + "learning_rate": 2.4840506329113923e-06, + "loss": 0.0097, + "step": 30200 + }, + { + "epoch": 17.01, + "learning_rate": 2.4777215189873418e-06, + "loss": 0.0126, + "step": 30225 + }, + { + "epoch": 17.01, + "learning_rate": 2.4713924050632916e-06, + "loss": 0.0096, + "step": 30250 + }, + { + "epoch": 17.01, + "learning_rate": 2.4650632911392406e-06, + "loss": 0.0094, + "step": 30275 + }, + { + "epoch": 17.01, + "learning_rate": 2.45873417721519e-06, + "loss": 0.0111, + "step": 30300 + }, + { + "epoch": 17.01, + "learning_rate": 2.4524050632911395e-06, + "loss": 0.0062, + "step": 30325 + }, + { + "epoch": 17.01, + "learning_rate": 2.446075949367089e-06, + "loss": 0.0078, + "step": 30350 + }, + { + "epoch": 17.02, + "learning_rate": 2.4397468354430383e-06, + "loss": 0.0043, + "step": 30375 + }, + { + "epoch": 17.02, + "learning_rate": 2.4336708860759494e-06, + "loss": 0.0102, + "step": 30400 + }, + { + "epoch": 17.02, + "learning_rate": 2.4273417721518988e-06, + "loss": 0.0101, + "step": 30425 + }, + { + "epoch": 17.02, + "learning_rate": 2.4210126582278482e-06, + "loss": 0.0066, + "step": 30450 + }, + { + "epoch": 17.02, + "learning_rate": 2.4146835443037976e-06, + "loss": 0.0078, + "step": 30475 + }, + { + "epoch": 17.02, + "learning_rate": 2.408354430379747e-06, + "loss": 0.0074, + "step": 30500 + }, + { + "epoch": 17.02, + "learning_rate": 2.4020253164556965e-06, + "loss": 0.0072, + "step": 30525 + }, + { + "epoch": 17.02, + "learning_rate": 2.395696202531646e-06, + "loss": 0.0109, + "step": 30550 + }, + { + "epoch": 17.02, + "learning_rate": 2.389367088607595e-06, + "loss": 0.0113, + "step": 30575 + }, + { + "epoch": 17.02, + "learning_rate": 2.3830379746835444e-06, + "loss": 0.0109, + "step": 30600 + }, + { + "epoch": 17.02, + "learning_rate": 2.3767088607594938e-06, + "loss": 0.0089, + "step": 30625 + }, + { + "epoch": 17.02, + "learning_rate": 2.3703797468354432e-06, + "loss": 0.0062, + "step": 30650 + }, + { + "epoch": 17.02, + "learning_rate": 2.3640506329113926e-06, + "loss": 0.0101, + "step": 30675 + }, + { + "epoch": 17.02, + "learning_rate": 2.357721518987342e-06, + "loss": 0.0096, + "step": 30700 + }, + { + "epoch": 17.02, + "learning_rate": 2.3513924050632915e-06, + "loss": 0.0093, + "step": 30725 + }, + { + "epoch": 17.02, + "learning_rate": 2.3450632911392405e-06, + "loss": 0.0113, + "step": 30750 + }, + { + "epoch": 17.03, + "learning_rate": 2.33873417721519e-06, + "loss": 0.0094, + "step": 30775 + }, + { + "epoch": 17.03, + "learning_rate": 2.3324050632911394e-06, + "loss": 0.0086, + "step": 30800 + }, + { + "epoch": 17.03, + "learning_rate": 2.3260759493670888e-06, + "loss": 0.0096, + "step": 30825 + }, + { + "epoch": 17.03, + "learning_rate": 2.319746835443038e-06, + "loss": 0.0086, + "step": 30850 + }, + { + "epoch": 17.03, + "learning_rate": 2.3134177215189876e-06, + "loss": 0.0064, + "step": 30875 + }, + { + "epoch": 17.03, + "learning_rate": 2.307088607594937e-06, + "loss": 0.0107, + "step": 30900 + }, + { + "epoch": 17.03, + "learning_rate": 2.3007594936708865e-06, + "loss": 0.0073, + "step": 30925 + }, + { + "epoch": 17.03, + "learning_rate": 2.2944303797468355e-06, + "loss": 0.005, + "step": 30950 + }, + { + "epoch": 17.03, + "learning_rate": 2.288101265822785e-06, + "loss": 0.0055, + "step": 30975 + }, + { + "epoch": 17.03, + "learning_rate": 2.2817721518987344e-06, + "loss": 0.006, + "step": 31000 + }, + { + "epoch": 17.03, + "eval_loss": 0.2709895372390747, + "eval_runtime": 1461.5544, + "eval_samples_per_second": 7.123, + "eval_steps_per_second": 0.445, + "eval_wer": 23.29468771849949, + "step": 31000 + }, + { + "epoch": 17.03, + "learning_rate": 2.2754430379746838e-06, + "loss": 0.0094, + "step": 31025 + }, + { + "epoch": 17.03, + "learning_rate": 2.2691139240506328e-06, + "loss": 0.0088, + "step": 31050 + }, + { + "epoch": 17.03, + "learning_rate": 2.2627848101265826e-06, + "loss": 0.0057, + "step": 31075 + }, + { + "epoch": 17.03, + "learning_rate": 2.256455696202532e-06, + "loss": 0.0066, + "step": 31100 + }, + { + "epoch": 17.03, + "learning_rate": 2.250126582278481e-06, + "loss": 0.008, + "step": 31125 + }, + { + "epoch": 17.03, + "learning_rate": 2.2437974683544305e-06, + "loss": 0.0048, + "step": 31150 + }, + { + "epoch": 17.04, + "learning_rate": 2.23746835443038e-06, + "loss": 0.0074, + "step": 31175 + }, + { + "epoch": 17.04, + "learning_rate": 2.2311392405063294e-06, + "loss": 0.0061, + "step": 31200 + }, + { + "epoch": 17.04, + "learning_rate": 2.2248101265822788e-06, + "loss": 0.0088, + "step": 31225 + }, + { + "epoch": 17.04, + "learning_rate": 2.2184810126582278e-06, + "loss": 0.0087, + "step": 31250 + }, + { + "epoch": 17.04, + "learning_rate": 2.2121518987341776e-06, + "loss": 0.0089, + "step": 31275 + }, + { + "epoch": 17.04, + "learning_rate": 2.2058227848101266e-06, + "loss": 0.0061, + "step": 31300 + }, + { + "epoch": 17.04, + "learning_rate": 2.199493670886076e-06, + "loss": 0.0097, + "step": 31325 + }, + { + "epoch": 17.04, + "learning_rate": 2.1931645569620255e-06, + "loss": 0.0063, + "step": 31350 + }, + { + "epoch": 17.04, + "learning_rate": 2.186835443037975e-06, + "loss": 0.0084, + "step": 31375 + }, + { + "epoch": 17.04, + "learning_rate": 2.1805063291139243e-06, + "loss": 0.0089, + "step": 31400 + }, + { + "epoch": 17.04, + "learning_rate": 2.1741772151898734e-06, + "loss": 0.0077, + "step": 31425 + }, + { + "epoch": 17.04, + "learning_rate": 2.1678481012658228e-06, + "loss": 0.0153, + "step": 31450 + }, + { + "epoch": 17.04, + "learning_rate": 2.1615189873417726e-06, + "loss": 0.0088, + "step": 31475 + }, + { + "epoch": 17.04, + "learning_rate": 2.1551898734177216e-06, + "loss": 0.0095, + "step": 31500 + }, + { + "epoch": 18.0, + "learning_rate": 2.148860759493671e-06, + "loss": 0.0084, + "step": 31525 + }, + { + "epoch": 18.0, + "learning_rate": 2.1425316455696205e-06, + "loss": 0.0054, + "step": 31550 + }, + { + "epoch": 18.0, + "learning_rate": 2.13620253164557e-06, + "loss": 0.013, + "step": 31575 + }, + { + "epoch": 18.0, + "learning_rate": 2.129873417721519e-06, + "loss": 0.0074, + "step": 31600 + }, + { + "epoch": 18.0, + "learning_rate": 2.1235443037974684e-06, + "loss": 0.0083, + "step": 31625 + }, + { + "epoch": 18.0, + "learning_rate": 2.1172151898734178e-06, + "loss": 0.0118, + "step": 31650 + }, + { + "epoch": 18.0, + "learning_rate": 2.110886075949367e-06, + "loss": 0.0095, + "step": 31675 + }, + { + "epoch": 18.0, + "learning_rate": 2.1045569620253166e-06, + "loss": 0.0075, + "step": 31700 + }, + { + "epoch": 18.01, + "learning_rate": 2.098227848101266e-06, + "loss": 0.0149, + "step": 31725 + }, + { + "epoch": 18.01, + "learning_rate": 2.0918987341772155e-06, + "loss": 0.0095, + "step": 31750 + }, + { + "epoch": 18.01, + "learning_rate": 2.085569620253165e-06, + "loss": 0.0098, + "step": 31775 + }, + { + "epoch": 18.01, + "learning_rate": 2.079240506329114e-06, + "loss": 0.0058, + "step": 31800 + }, + { + "epoch": 18.01, + "learning_rate": 2.0729113924050633e-06, + "loss": 0.0072, + "step": 31825 + }, + { + "epoch": 18.01, + "learning_rate": 2.0665822784810128e-06, + "loss": 0.0103, + "step": 31850 + }, + { + "epoch": 18.01, + "learning_rate": 2.060253164556962e-06, + "loss": 0.0085, + "step": 31875 + }, + { + "epoch": 18.01, + "learning_rate": 2.0539240506329116e-06, + "loss": 0.0064, + "step": 31900 + }, + { + "epoch": 18.01, + "learning_rate": 2.047594936708861e-06, + "loss": 0.0077, + "step": 31925 + }, + { + "epoch": 18.01, + "learning_rate": 2.0412658227848105e-06, + "loss": 0.0086, + "step": 31950 + }, + { + "epoch": 18.01, + "learning_rate": 2.0349367088607595e-06, + "loss": 0.0136, + "step": 31975 + }, + { + "epoch": 18.01, + "learning_rate": 2.028607594936709e-06, + "loss": 0.0085, + "step": 32000 + }, + { + "epoch": 18.01, + "eval_loss": 0.26488059759140015, + "eval_runtime": 1466.953, + "eval_samples_per_second": 7.097, + "eval_steps_per_second": 0.444, + "eval_wer": 23.089806338314446, + "step": 32000 + }, + { + "epoch": 18.01, + "learning_rate": 2.0222784810126583e-06, + "loss": 0.011, + "step": 32025 + }, + { + "epoch": 18.01, + "learning_rate": 2.0159493670886078e-06, + "loss": 0.0086, + "step": 32050 + }, + { + "epoch": 18.01, + "learning_rate": 2.009620253164557e-06, + "loss": 0.008, + "step": 32075 + }, + { + "epoch": 18.01, + "learning_rate": 2.0032911392405066e-06, + "loss": 0.0083, + "step": 32100 + }, + { + "epoch": 18.02, + "learning_rate": 1.996962025316456e-06, + "loss": 0.0073, + "step": 32125 + }, + { + "epoch": 18.02, + "learning_rate": 1.9906329113924055e-06, + "loss": 0.0036, + "step": 32150 + }, + { + "epoch": 18.02, + "learning_rate": 1.9843037974683545e-06, + "loss": 0.0096, + "step": 32175 + }, + { + "epoch": 18.02, + "learning_rate": 1.977974683544304e-06, + "loss": 0.0071, + "step": 32200 + }, + { + "epoch": 18.02, + "learning_rate": 1.9716455696202533e-06, + "loss": 0.009, + "step": 32225 + }, + { + "epoch": 18.02, + "learning_rate": 1.9653164556962028e-06, + "loss": 0.0093, + "step": 32250 + }, + { + "epoch": 18.02, + "learning_rate": 1.9589873417721518e-06, + "loss": 0.0071, + "step": 32275 + }, + { + "epoch": 18.02, + "learning_rate": 1.9526582278481016e-06, + "loss": 0.0037, + "step": 32300 + }, + { + "epoch": 18.02, + "learning_rate": 1.946329113924051e-06, + "loss": 0.007, + "step": 32325 + }, + { + "epoch": 18.02, + "learning_rate": 1.94e-06, + "loss": 0.0104, + "step": 32350 + }, + { + "epoch": 18.02, + "learning_rate": 1.9336708860759495e-06, + "loss": 0.0082, + "step": 32375 + }, + { + "epoch": 18.02, + "learning_rate": 1.927341772151899e-06, + "loss": 0.0095, + "step": 32400 + }, + { + "epoch": 18.02, + "learning_rate": 1.9210126582278483e-06, + "loss": 0.0101, + "step": 32425 + }, + { + "epoch": 18.02, + "learning_rate": 1.9146835443037973e-06, + "loss": 0.0122, + "step": 32450 + }, + { + "epoch": 18.02, + "learning_rate": 1.9083544303797468e-06, + "loss": 0.0082, + "step": 32475 + }, + { + "epoch": 18.02, + "learning_rate": 1.9020253164556964e-06, + "loss": 0.0093, + "step": 32500 + }, + { + "epoch": 18.03, + "learning_rate": 1.8956962025316458e-06, + "loss": 0.0073, + "step": 32525 + }, + { + "epoch": 18.03, + "learning_rate": 1.889367088607595e-06, + "loss": 0.0061, + "step": 32550 + }, + { + "epoch": 18.03, + "learning_rate": 1.8830379746835445e-06, + "loss": 0.0072, + "step": 32575 + }, + { + "epoch": 18.03, + "learning_rate": 1.876708860759494e-06, + "loss": 0.0064, + "step": 32600 + }, + { + "epoch": 18.03, + "learning_rate": 1.8703797468354431e-06, + "loss": 0.0084, + "step": 32625 + }, + { + "epoch": 18.03, + "learning_rate": 1.8640506329113926e-06, + "loss": 0.0067, + "step": 32650 + }, + { + "epoch": 18.03, + "learning_rate": 1.8577215189873418e-06, + "loss": 0.0052, + "step": 32675 + }, + { + "epoch": 18.03, + "learning_rate": 1.8513924050632912e-06, + "loss": 0.0085, + "step": 32700 + }, + { + "epoch": 18.03, + "learning_rate": 1.8450632911392408e-06, + "loss": 0.0077, + "step": 32725 + }, + { + "epoch": 18.03, + "learning_rate": 1.83873417721519e-06, + "loss": 0.0068, + "step": 32750 + }, + { + "epoch": 18.03, + "learning_rate": 1.8324050632911395e-06, + "loss": 0.0079, + "step": 32775 + }, + { + "epoch": 18.03, + "learning_rate": 1.8260759493670887e-06, + "loss": 0.008, + "step": 32800 + }, + { + "epoch": 18.03, + "learning_rate": 1.8197468354430381e-06, + "loss": 0.0071, + "step": 32825 + }, + { + "epoch": 18.03, + "learning_rate": 1.8134177215189873e-06, + "loss": 0.0091, + "step": 32850 + }, + { + "epoch": 18.03, + "learning_rate": 1.8070886075949368e-06, + "loss": 0.0059, + "step": 32875 + }, + { + "epoch": 18.03, + "learning_rate": 1.8007594936708862e-06, + "loss": 0.0079, + "step": 32900 + }, + { + "epoch": 18.04, + "learning_rate": 1.7944303797468356e-06, + "loss": 0.0055, + "step": 32925 + }, + { + "epoch": 18.04, + "learning_rate": 1.788101265822785e-06, + "loss": 0.0095, + "step": 32950 + }, + { + "epoch": 18.04, + "learning_rate": 1.7817721518987343e-06, + "loss": 0.0086, + "step": 32975 + }, + { + "epoch": 18.04, + "learning_rate": 1.7754430379746837e-06, + "loss": 0.0057, + "step": 33000 + }, + { + "epoch": 18.04, + "eval_loss": 0.2678743898868561, + "eval_runtime": 1484.126, + "eval_samples_per_second": 7.015, + "eval_steps_per_second": 0.439, + "eval_wer": 23.405258622091416, + "step": 33000 + }, + { + "epoch": 18.04, + "learning_rate": 1.7691139240506331e-06, + "loss": 0.0085, + "step": 33025 + }, + { + "epoch": 18.04, + "learning_rate": 1.7627848101265823e-06, + "loss": 0.0073, + "step": 33050 + }, + { + "epoch": 18.04, + "learning_rate": 1.7564556962025318e-06, + "loss": 0.0045, + "step": 33075 + }, + { + "epoch": 18.04, + "learning_rate": 1.750126582278481e-06, + "loss": 0.0065, + "step": 33100 + }, + { + "epoch": 18.04, + "learning_rate": 1.7437974683544306e-06, + "loss": 0.0076, + "step": 33125 + }, + { + "epoch": 18.04, + "learning_rate": 1.73746835443038e-06, + "loss": 0.007, + "step": 33150 + }, + { + "epoch": 18.04, + "learning_rate": 1.7311392405063293e-06, + "loss": 0.0109, + "step": 33175 + }, + { + "epoch": 18.04, + "learning_rate": 1.7248101265822787e-06, + "loss": 0.0095, + "step": 33200 + }, + { + "epoch": 18.04, + "learning_rate": 1.718481012658228e-06, + "loss": 0.0109, + "step": 33225 + }, + { + "epoch": 18.04, + "learning_rate": 1.7121518987341773e-06, + "loss": 0.0117, + "step": 33250 + }, + { + "epoch": 19.0, + "learning_rate": 1.7058227848101266e-06, + "loss": 0.0038, + "step": 33275 + }, + { + "epoch": 19.0, + "learning_rate": 1.699493670886076e-06, + "loss": 0.013, + "step": 33300 + }, + { + "epoch": 19.0, + "learning_rate": 1.6931645569620256e-06, + "loss": 0.0104, + "step": 33325 + }, + { + "epoch": 19.0, + "learning_rate": 1.6868354430379748e-06, + "loss": 0.0064, + "step": 33350 + }, + { + "epoch": 19.0, + "learning_rate": 1.6805063291139243e-06, + "loss": 0.0137, + "step": 33375 + }, + { + "epoch": 19.0, + "learning_rate": 1.6741772151898735e-06, + "loss": 0.012, + "step": 33400 + }, + { + "epoch": 19.0, + "learning_rate": 1.667848101265823e-06, + "loss": 0.0106, + "step": 33425 + }, + { + "epoch": 19.0, + "learning_rate": 1.6615189873417723e-06, + "loss": 0.0072, + "step": 33450 + }, + { + "epoch": 19.01, + "learning_rate": 1.6551898734177216e-06, + "loss": 0.0154, + "step": 33475 + }, + { + "epoch": 19.01, + "learning_rate": 1.648860759493671e-06, + "loss": 0.0129, + "step": 33500 + }, + { + "epoch": 19.01, + "learning_rate": 1.6425316455696206e-06, + "loss": 0.008, + "step": 33525 + }, + { + "epoch": 19.01, + "learning_rate": 1.6362025316455698e-06, + "loss": 0.0071, + "step": 33550 + }, + { + "epoch": 19.01, + "learning_rate": 1.6298734177215193e-06, + "loss": 0.0088, + "step": 33575 + }, + { + "epoch": 19.01, + "learning_rate": 1.6235443037974685e-06, + "loss": 0.0102, + "step": 33600 + }, + { + "epoch": 19.01, + "learning_rate": 1.617215189873418e-06, + "loss": 0.0064, + "step": 33625 + }, + { + "epoch": 19.01, + "learning_rate": 1.6108860759493671e-06, + "loss": 0.0098, + "step": 33650 + }, + { + "epoch": 19.01, + "learning_rate": 1.6045569620253166e-06, + "loss": 0.009, + "step": 33675 + }, + { + "epoch": 19.01, + "learning_rate": 1.5982278481012658e-06, + "loss": 0.0074, + "step": 33700 + }, + { + "epoch": 19.01, + "learning_rate": 1.5918987341772152e-06, + "loss": 0.0055, + "step": 33725 + }, + { + "epoch": 19.01, + "learning_rate": 1.5855696202531648e-06, + "loss": 0.0059, + "step": 33750 + }, + { + "epoch": 19.01, + "learning_rate": 1.579240506329114e-06, + "loss": 0.0102, + "step": 33775 + }, + { + "epoch": 19.01, + "learning_rate": 1.5729113924050635e-06, + "loss": 0.0111, + "step": 33800 + }, + { + "epoch": 19.01, + "learning_rate": 1.5665822784810127e-06, + "loss": 0.0033, + "step": 33825 + }, + { + "epoch": 19.01, + "learning_rate": 1.5602531645569621e-06, + "loss": 0.0057, + "step": 33850 + }, + { + "epoch": 19.02, + "learning_rate": 1.5539240506329115e-06, + "loss": 0.0074, + "step": 33875 + }, + { + "epoch": 19.02, + "learning_rate": 1.5475949367088608e-06, + "loss": 0.0056, + "step": 33900 + }, + { + "epoch": 19.02, + "learning_rate": 1.5412658227848102e-06, + "loss": 0.0053, + "step": 33925 + }, + { + "epoch": 19.02, + "learning_rate": 1.5349367088607598e-06, + "loss": 0.0049, + "step": 33950 + }, + { + "epoch": 19.02, + "learning_rate": 1.528607594936709e-06, + "loss": 0.0065, + "step": 33975 + }, + { + "epoch": 19.02, + "learning_rate": 1.5222784810126585e-06, + "loss": 0.007, + "step": 34000 + }, + { + "epoch": 19.02, + "eval_loss": 0.2660910487174988, + "eval_runtime": 1471.5922, + "eval_samples_per_second": 7.075, + "eval_steps_per_second": 0.442, + "eval_wer": 23.14834387551017, + "step": 34000 + }, + { + "epoch": 19.02, + "learning_rate": 1.5159493670886077e-06, + "loss": 0.0027, + "step": 34025 + }, + { + "epoch": 19.02, + "learning_rate": 1.5096202531645571e-06, + "loss": 0.0081, + "step": 34050 + }, + { + "epoch": 19.02, + "learning_rate": 1.5032911392405063e-06, + "loss": 0.0044, + "step": 34075 + }, + { + "epoch": 19.02, + "learning_rate": 1.4969620253164558e-06, + "loss": 0.0093, + "step": 34100 + }, + { + "epoch": 19.02, + "learning_rate": 1.490632911392405e-06, + "loss": 0.0067, + "step": 34125 + }, + { + "epoch": 19.02, + "learning_rate": 1.4843037974683546e-06, + "loss": 0.0047, + "step": 34150 + }, + { + "epoch": 19.02, + "learning_rate": 1.477974683544304e-06, + "loss": 0.0119, + "step": 34175 + }, + { + "epoch": 19.02, + "learning_rate": 1.4716455696202533e-06, + "loss": 0.0091, + "step": 34200 + }, + { + "epoch": 19.02, + "learning_rate": 1.4653164556962027e-06, + "loss": 0.0094, + "step": 34225 + }, + { + "epoch": 19.02, + "learning_rate": 1.458987341772152e-06, + "loss": 0.0069, + "step": 34250 + }, + { + "epoch": 19.03, + "learning_rate": 1.4526582278481013e-06, + "loss": 0.0066, + "step": 34275 + }, + { + "epoch": 19.03, + "learning_rate": 1.4463291139240508e-06, + "loss": 0.0072, + "step": 34300 + }, + { + "epoch": 19.03, + "learning_rate": 1.44e-06, + "loss": 0.0067, + "step": 34325 + }, + { + "epoch": 19.03, + "learning_rate": 1.4336708860759496e-06, + "loss": 0.0076, + "step": 34350 + }, + { + "epoch": 19.03, + "learning_rate": 1.427341772151899e-06, + "loss": 0.0083, + "step": 34375 + }, + { + "epoch": 19.03, + "learning_rate": 1.4212658227848103e-06, + "loss": 0.0072, + "step": 34400 + }, + { + "epoch": 19.03, + "learning_rate": 1.4149367088607597e-06, + "loss": 0.0086, + "step": 34425 + }, + { + "epoch": 19.03, + "learning_rate": 1.408607594936709e-06, + "loss": 0.0057, + "step": 34450 + }, + { + "epoch": 19.03, + "learning_rate": 1.4022784810126584e-06, + "loss": 0.0111, + "step": 34475 + }, + { + "epoch": 19.03, + "learning_rate": 1.3959493670886076e-06, + "loss": 0.01, + "step": 34500 + }, + { + "epoch": 19.03, + "learning_rate": 1.389620253164557e-06, + "loss": 0.008, + "step": 34525 + }, + { + "epoch": 19.03, + "learning_rate": 1.3832911392405066e-06, + "loss": 0.0082, + "step": 34550 + }, + { + "epoch": 19.03, + "learning_rate": 1.3769620253164559e-06, + "loss": 0.0062, + "step": 34575 + }, + { + "epoch": 19.03, + "learning_rate": 1.3706329113924053e-06, + "loss": 0.0052, + "step": 34600 + }, + { + "epoch": 19.03, + "learning_rate": 1.3643037974683545e-06, + "loss": 0.007, + "step": 34625 + }, + { + "epoch": 19.03, + "learning_rate": 1.357974683544304e-06, + "loss": 0.0045, + "step": 34650 + }, + { + "epoch": 19.04, + "learning_rate": 1.3516455696202531e-06, + "loss": 0.0089, + "step": 34675 + }, + { + "epoch": 19.04, + "learning_rate": 1.3453164556962026e-06, + "loss": 0.0081, + "step": 34700 + }, + { + "epoch": 19.04, + "learning_rate": 1.338987341772152e-06, + "loss": 0.0101, + "step": 34725 + }, + { + "epoch": 19.04, + "learning_rate": 1.3326582278481014e-06, + "loss": 0.0072, + "step": 34750 + }, + { + "epoch": 19.04, + "learning_rate": 1.3263291139240509e-06, + "loss": 0.005, + "step": 34775 + }, + { + "epoch": 19.04, + "learning_rate": 1.32e-06, + "loss": 0.0083, + "step": 34800 + }, + { + "epoch": 19.04, + "learning_rate": 1.3136708860759495e-06, + "loss": 0.0087, + "step": 34825 + }, + { + "epoch": 19.04, + "learning_rate": 1.307341772151899e-06, + "loss": 0.0067, + "step": 34850 + }, + { + "epoch": 19.04, + "learning_rate": 1.3010126582278481e-06, + "loss": 0.0048, + "step": 34875 + }, + { + "epoch": 19.04, + "learning_rate": 1.2946835443037976e-06, + "loss": 0.0065, + "step": 34900 + }, + { + "epoch": 19.04, + "learning_rate": 1.2883544303797468e-06, + "loss": 0.0076, + "step": 34925 + }, + { + "epoch": 19.04, + "learning_rate": 1.2820253164556964e-06, + "loss": 0.007, + "step": 34950 + }, + { + "epoch": 19.04, + "learning_rate": 1.2756962025316458e-06, + "loss": 0.0044, + "step": 34975 + }, + { + "epoch": 19.04, + "learning_rate": 1.269367088607595e-06, + "loss": 0.0082, + "step": 35000 + }, + { + "epoch": 19.04, + "eval_loss": 0.26719748973846436, + "eval_runtime": 1465.7868, + "eval_samples_per_second": 7.103, + "eval_steps_per_second": 0.444, + "eval_wer": 24.169498691035628, + "step": 35000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.615529929531392e+20, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-35000/training_args.bin b/checkpoint-35000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-35000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-36000/config.json b/checkpoint-36000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-36000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-36000/generation_config.json b/checkpoint-36000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-36000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-36000/optimizer.pt b/checkpoint-36000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0fe73f1b4fded721e3c0ddfd246f84a017c5d887 --- /dev/null +++ b/checkpoint-36000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75a3bda8f274a59532e9ba3f3b50f289b7095e3584296061a006941c98302d22 +size 1934161093 diff --git a/checkpoint-36000/preprocessor_config.json b/checkpoint-36000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-36000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-36000/pytorch_model.bin b/checkpoint-36000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..7fa86b6ea3bcc97c138e2f1a57ea6a6e2ab8e9c3 --- /dev/null +++ b/checkpoint-36000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8db240821e406ec13eb0a16bde50fd371e6219ec2de3cc41f906ec32ffad120a +size 967102601 diff --git a/checkpoint-36000/rng_state.pth b/checkpoint-36000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d264435573aadfa892883a609e3dd842a5348b80 --- /dev/null +++ b/checkpoint-36000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0325bbc73515ce241a837282a5c0f38d9ca5706842761521ce9e10b0fa278ce6 +size 14575 diff --git a/checkpoint-36000/scaler.pt b/checkpoint-36000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..761c2333251bbea82b6039d3db5e001c459de119 --- /dev/null +++ b/checkpoint-36000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf07655e2c481206803e2cbafe12827c4efdbb67d0eb6ef046d68c52492ad2ca +size 557 diff --git a/checkpoint-36000/scheduler.pt b/checkpoint-36000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..16df8fbcb25efc3efca534ca11ed0acf5efcaf00 --- /dev/null +++ b/checkpoint-36000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7448056d0bc011dc0ae096bb3f3aa38975c8f13f43bf21746cc091a458ada1a +size 627 diff --git a/checkpoint-36000/trainer_state.json b/checkpoint-36000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4e9f060f9ce8ecf9e1363e689b7d1712714faa97 --- /dev/null +++ b/checkpoint-36000/trainer_state.json @@ -0,0 +1,8980 @@ +{ + "best_metric": 22.689799834143646, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-36000", + "epoch": 20.0245, + "global_step": 36000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + }, + { + "epoch": 7.02, + "learning_rate": 6.830632911392405e-06, + "loss": 0.0159, + "step": 13025 + }, + { + "epoch": 7.02, + "learning_rate": 6.824303797468354e-06, + "loss": 0.0186, + "step": 13050 + }, + { + "epoch": 7.02, + "learning_rate": 6.817974683544304e-06, + "loss": 0.0246, + "step": 13075 + }, + { + "epoch": 7.02, + "learning_rate": 6.811645569620253e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 7.02, + "learning_rate": 6.805316455696203e-06, + "loss": 0.0287, + "step": 13125 + }, + { + "epoch": 7.02, + "learning_rate": 6.798987341772152e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 7.02, + "learning_rate": 6.792658227848102e-06, + "loss": 0.035, + "step": 13175 + }, + { + "epoch": 7.02, + "learning_rate": 6.786329113924052e-06, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 7.02, + "learning_rate": 6.780000000000001e-06, + "loss": 0.0283, + "step": 13225 + }, + { + "epoch": 7.02, + "learning_rate": 6.773670886075951e-06, + "loss": 0.0203, + "step": 13250 + }, + { + "epoch": 7.03, + "learning_rate": 6.7673417721519e-06, + "loss": 0.0193, + "step": 13275 + }, + { + "epoch": 7.03, + "learning_rate": 6.7610126582278494e-06, + "loss": 0.0174, + "step": 13300 + }, + { + "epoch": 7.03, + "learning_rate": 6.754683544303798e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 7.03, + "learning_rate": 6.7483544303797474e-06, + "loss": 0.023, + "step": 13350 + }, + { + "epoch": 7.03, + "learning_rate": 6.742025316455697e-06, + "loss": 0.0266, + "step": 13375 + }, + { + "epoch": 7.03, + "learning_rate": 6.735696202531646e-06, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 7.03, + "learning_rate": 6.729367088607596e-06, + "loss": 0.0164, + "step": 13425 + }, + { + "epoch": 7.03, + "learning_rate": 6.723037974683545e-06, + "loss": 0.019, + "step": 13450 + }, + { + "epoch": 7.03, + "learning_rate": 6.716708860759495e-06, + "loss": 0.0209, + "step": 13475 + }, + { + "epoch": 7.03, + "learning_rate": 6.710379746835444e-06, + "loss": 0.0241, + "step": 13500 + }, + { + "epoch": 7.03, + "learning_rate": 6.7040506329113934e-06, + "loss": 0.0233, + "step": 13525 + }, + { + "epoch": 7.03, + "learning_rate": 6.697721518987342e-06, + "loss": 0.0204, + "step": 13550 + }, + { + "epoch": 7.03, + "learning_rate": 6.6913924050632914e-06, + "loss": 0.0131, + "step": 13575 + }, + { + "epoch": 7.03, + "learning_rate": 6.685063291139241e-06, + "loss": 0.0268, + "step": 13600 + }, + { + "epoch": 7.03, + "learning_rate": 6.67873417721519e-06, + "loss": 0.0156, + "step": 13625 + }, + { + "epoch": 7.03, + "learning_rate": 6.67240506329114e-06, + "loss": 0.0205, + "step": 13650 + }, + { + "epoch": 7.04, + "learning_rate": 6.666075949367089e-06, + "loss": 0.0217, + "step": 13675 + }, + { + "epoch": 7.04, + "learning_rate": 6.659746835443039e-06, + "loss": 0.0258, + "step": 13700 + }, + { + "epoch": 7.04, + "learning_rate": 6.653417721518988e-06, + "loss": 0.0211, + "step": 13725 + }, + { + "epoch": 7.04, + "learning_rate": 6.647088607594937e-06, + "loss": 0.0188, + "step": 13750 + }, + { + "epoch": 7.04, + "learning_rate": 6.640759493670886e-06, + "loss": 0.0234, + "step": 13775 + }, + { + "epoch": 7.04, + "learning_rate": 6.6344303797468355e-06, + "loss": 0.0202, + "step": 13800 + }, + { + "epoch": 7.04, + "learning_rate": 6.628101265822785e-06, + "loss": 0.0251, + "step": 13825 + }, + { + "epoch": 7.04, + "learning_rate": 6.621772151898734e-06, + "loss": 0.017, + "step": 13850 + }, + { + "epoch": 7.04, + "learning_rate": 6.615443037974684e-06, + "loss": 0.0254, + "step": 13875 + }, + { + "epoch": 7.04, + "learning_rate": 6.609113924050633e-06, + "loss": 0.0289, + "step": 13900 + }, + { + "epoch": 7.04, + "learning_rate": 6.602784810126583e-06, + "loss": 0.0238, + "step": 13925 + }, + { + "epoch": 7.04, + "learning_rate": 6.596455696202532e-06, + "loss": 0.0218, + "step": 13950 + }, + { + "epoch": 7.04, + "learning_rate": 6.590126582278481e-06, + "loss": 0.0289, + "step": 13975 + }, + { + "epoch": 7.04, + "learning_rate": 6.583797468354432e-06, + "loss": 0.0271, + "step": 14000 + }, + { + "epoch": 7.04, + "eval_loss": 0.26684999465942383, + "eval_runtime": 1448.0228, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "eval_wer": 42.464105107400115, + "step": 14000 + }, + { + "epoch": 8.0, + "learning_rate": 6.577468354430381e-06, + "loss": 0.0292, + "step": 14025 + }, + { + "epoch": 8.0, + "learning_rate": 6.57113924050633e-06, + "loss": 0.0192, + "step": 14050 + }, + { + "epoch": 8.0, + "learning_rate": 6.564810126582279e-06, + "loss": 0.027, + "step": 14075 + }, + { + "epoch": 8.0, + "learning_rate": 6.558481012658229e-06, + "loss": 0.03, + "step": 14100 + }, + { + "epoch": 8.0, + "learning_rate": 6.552151898734178e-06, + "loss": 0.0375, + "step": 14125 + }, + { + "epoch": 8.0, + "learning_rate": 6.5458227848101274e-06, + "loss": 0.0338, + "step": 14150 + }, + { + "epoch": 8.0, + "learning_rate": 6.539493670886077e-06, + "loss": 0.0305, + "step": 14175 + }, + { + "epoch": 8.0, + "learning_rate": 6.533164556962026e-06, + "loss": 0.0266, + "step": 14200 + }, + { + "epoch": 8.01, + "learning_rate": 6.526835443037976e-06, + "loss": 0.0378, + "step": 14225 + }, + { + "epoch": 8.01, + "learning_rate": 6.520506329113925e-06, + "loss": 0.0256, + "step": 14250 + }, + { + "epoch": 8.01, + "learning_rate": 6.514177215189874e-06, + "loss": 0.0203, + "step": 14275 + }, + { + "epoch": 8.01, + "learning_rate": 6.507848101265823e-06, + "loss": 0.0212, + "step": 14300 + }, + { + "epoch": 8.01, + "learning_rate": 6.501518987341773e-06, + "loss": 0.0204, + "step": 14325 + }, + { + "epoch": 8.01, + "learning_rate": 6.495189873417722e-06, + "loss": 0.0259, + "step": 14350 + }, + { + "epoch": 8.01, + "learning_rate": 6.4888607594936714e-06, + "loss": 0.0214, + "step": 14375 + }, + { + "epoch": 8.01, + "learning_rate": 6.482531645569621e-06, + "loss": 0.021, + "step": 14400 + }, + { + "epoch": 8.01, + "learning_rate": 6.47620253164557e-06, + "loss": 0.0241, + "step": 14425 + }, + { + "epoch": 8.01, + "learning_rate": 6.46987341772152e-06, + "loss": 0.0216, + "step": 14450 + }, + { + "epoch": 8.01, + "learning_rate": 6.463544303797469e-06, + "loss": 0.0209, + "step": 14475 + }, + { + "epoch": 8.01, + "learning_rate": 6.457215189873418e-06, + "loss": 0.0292, + "step": 14500 + }, + { + "epoch": 8.01, + "learning_rate": 6.450886075949367e-06, + "loss": 0.0286, + "step": 14525 + }, + { + "epoch": 8.01, + "learning_rate": 6.444556962025317e-06, + "loss": 0.0218, + "step": 14550 + }, + { + "epoch": 8.01, + "learning_rate": 6.438227848101266e-06, + "loss": 0.0209, + "step": 14575 + }, + { + "epoch": 8.01, + "learning_rate": 6.4318987341772154e-06, + "loss": 0.0244, + "step": 14600 + }, + { + "epoch": 8.02, + "learning_rate": 6.425569620253165e-06, + "loss": 0.0212, + "step": 14625 + }, + { + "epoch": 8.02, + "learning_rate": 6.419240506329114e-06, + "loss": 0.0155, + "step": 14650 + }, + { + "epoch": 8.02, + "learning_rate": 6.412911392405064e-06, + "loss": 0.0209, + "step": 14675 + }, + { + "epoch": 8.02, + "learning_rate": 6.406582278481013e-06, + "loss": 0.0203, + "step": 14700 + }, + { + "epoch": 8.02, + "learning_rate": 6.400253164556962e-06, + "loss": 0.0192, + "step": 14725 + }, + { + "epoch": 8.02, + "learning_rate": 6.393924050632911e-06, + "loss": 0.0216, + "step": 14750 + }, + { + "epoch": 8.02, + "learning_rate": 6.387594936708861e-06, + "loss": 0.012, + "step": 14775 + }, + { + "epoch": 8.02, + "learning_rate": 6.38126582278481e-06, + "loss": 0.0241, + "step": 14800 + }, + { + "epoch": 8.02, + "learning_rate": 6.37493670886076e-06, + "loss": 0.0192, + "step": 14825 + }, + { + "epoch": 8.02, + "learning_rate": 6.36860759493671e-06, + "loss": 0.0202, + "step": 14850 + }, + { + "epoch": 8.02, + "learning_rate": 6.362278481012659e-06, + "loss": 0.0257, + "step": 14875 + }, + { + "epoch": 8.02, + "learning_rate": 6.3559493670886086e-06, + "loss": 0.0218, + "step": 14900 + }, + { + "epoch": 8.02, + "learning_rate": 6.349620253164558e-06, + "loss": 0.0232, + "step": 14925 + }, + { + "epoch": 8.02, + "learning_rate": 6.3432911392405074e-06, + "loss": 0.03, + "step": 14950 + }, + { + "epoch": 8.02, + "learning_rate": 6.337215189873418e-06, + "loss": 0.0252, + "step": 14975 + }, + { + "epoch": 8.02, + "learning_rate": 6.3308860759493675e-06, + "loss": 0.0176, + "step": 15000 + }, + { + "epoch": 8.02, + "eval_loss": 0.2596448063850403, + "eval_runtime": 1417.6481, + "eval_samples_per_second": 7.344, + "eval_steps_per_second": 0.459, + "eval_wer": 28.52241499861786, + "step": 15000 + }, + { + "epoch": 8.03, + "learning_rate": 6.324556962025317e-06, + "loss": 0.0166, + "step": 15025 + }, + { + "epoch": 8.03, + "learning_rate": 6.318227848101266e-06, + "loss": 0.017, + "step": 15050 + }, + { + "epoch": 8.03, + "learning_rate": 6.311898734177216e-06, + "loss": 0.0291, + "step": 15075 + }, + { + "epoch": 8.03, + "learning_rate": 6.305569620253164e-06, + "loss": 0.0149, + "step": 15100 + }, + { + "epoch": 8.03, + "learning_rate": 6.299240506329114e-06, + "loss": 0.0223, + "step": 15125 + }, + { + "epoch": 8.03, + "learning_rate": 6.292911392405063e-06, + "loss": 0.0212, + "step": 15150 + }, + { + "epoch": 8.03, + "learning_rate": 6.286582278481013e-06, + "loss": 0.0125, + "step": 15175 + }, + { + "epoch": 8.03, + "learning_rate": 6.280253164556962e-06, + "loss": 0.0169, + "step": 15200 + }, + { + "epoch": 8.03, + "learning_rate": 6.273924050632912e-06, + "loss": 0.0244, + "step": 15225 + }, + { + "epoch": 8.03, + "learning_rate": 6.267594936708862e-06, + "loss": 0.015, + "step": 15250 + }, + { + "epoch": 8.03, + "learning_rate": 6.261265822784811e-06, + "loss": 0.0171, + "step": 15275 + }, + { + "epoch": 8.03, + "learning_rate": 6.254936708860761e-06, + "loss": 0.017, + "step": 15300 + }, + { + "epoch": 8.03, + "learning_rate": 6.24860759493671e-06, + "loss": 0.0162, + "step": 15325 + }, + { + "epoch": 8.03, + "learning_rate": 6.2422784810126594e-06, + "loss": 0.0133, + "step": 15350 + }, + { + "epoch": 8.03, + "learning_rate": 6.235949367088608e-06, + "loss": 0.0139, + "step": 15375 + }, + { + "epoch": 8.03, + "learning_rate": 6.2296202531645575e-06, + "loss": 0.0177, + "step": 15400 + }, + { + "epoch": 8.04, + "learning_rate": 6.223291139240507e-06, + "loss": 0.0207, + "step": 15425 + }, + { + "epoch": 8.04, + "learning_rate": 6.216962025316456e-06, + "loss": 0.0192, + "step": 15450 + }, + { + "epoch": 8.04, + "learning_rate": 6.210632911392406e-06, + "loss": 0.0176, + "step": 15475 + }, + { + "epoch": 8.04, + "learning_rate": 6.204303797468355e-06, + "loss": 0.0133, + "step": 15500 + }, + { + "epoch": 8.04, + "learning_rate": 6.197974683544305e-06, + "loss": 0.0199, + "step": 15525 + }, + { + "epoch": 8.04, + "learning_rate": 6.191645569620254e-06, + "loss": 0.0135, + "step": 15550 + }, + { + "epoch": 8.04, + "learning_rate": 6.1853164556962035e-06, + "loss": 0.0121, + "step": 15575 + }, + { + "epoch": 8.04, + "learning_rate": 6.178987341772152e-06, + "loss": 0.0152, + "step": 15600 + }, + { + "epoch": 8.04, + "learning_rate": 6.1726582278481015e-06, + "loss": 0.0201, + "step": 15625 + }, + { + "epoch": 8.04, + "learning_rate": 6.166329113924051e-06, + "loss": 0.0226, + "step": 15650 + }, + { + "epoch": 8.04, + "learning_rate": 6.16e-06, + "loss": 0.0222, + "step": 15675 + }, + { + "epoch": 8.04, + "learning_rate": 6.15367088607595e-06, + "loss": 0.0184, + "step": 15700 + }, + { + "epoch": 8.04, + "learning_rate": 6.147341772151899e-06, + "loss": 0.0219, + "step": 15725 + }, + { + "epoch": 8.04, + "learning_rate": 6.141012658227849e-06, + "loss": 0.0219, + "step": 15750 + }, + { + "epoch": 9.0, + "learning_rate": 6.134683544303798e-06, + "loss": 0.0208, + "step": 15775 + }, + { + "epoch": 9.0, + "learning_rate": 6.1283544303797475e-06, + "loss": 0.0143, + "step": 15800 + }, + { + "epoch": 9.0, + "learning_rate": 6.122025316455696e-06, + "loss": 0.0254, + "step": 15825 + }, + { + "epoch": 9.0, + "learning_rate": 6.1156962025316455e-06, + "loss": 0.0287, + "step": 15850 + }, + { + "epoch": 9.0, + "learning_rate": 6.109367088607595e-06, + "loss": 0.0252, + "step": 15875 + }, + { + "epoch": 9.0, + "learning_rate": 6.103037974683544e-06, + "loss": 0.0275, + "step": 15900 + }, + { + "epoch": 9.0, + "learning_rate": 6.096708860759494e-06, + "loss": 0.0193, + "step": 15925 + }, + { + "epoch": 9.0, + "learning_rate": 6.090379746835443e-06, + "loss": 0.0255, + "step": 15950 + }, + { + "epoch": 9.01, + "learning_rate": 6.084050632911393e-06, + "loss": 0.0248, + "step": 15975 + }, + { + "epoch": 9.01, + "learning_rate": 6.077721518987342e-06, + "loss": 0.0328, + "step": 16000 + }, + { + "epoch": 9.01, + "eval_loss": 0.2512108087539673, + "eval_runtime": 1474.2883, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.442, + "eval_wer": 35.66236849379665, + "step": 16000 + }, + { + "epoch": 9.01, + "learning_rate": 6.0713924050632915e-06, + "loss": 0.0181, + "step": 16025 + }, + { + "epoch": 9.01, + "learning_rate": 6.065063291139242e-06, + "loss": 0.0134, + "step": 16050 + }, + { + "epoch": 9.01, + "learning_rate": 6.058734177215191e-06, + "loss": 0.0203, + "step": 16075 + }, + { + "epoch": 9.01, + "learning_rate": 6.052405063291141e-06, + "loss": 0.0224, + "step": 16100 + }, + { + "epoch": 9.01, + "learning_rate": 6.046075949367089e-06, + "loss": 0.0164, + "step": 16125 + }, + { + "epoch": 9.01, + "learning_rate": 6.039746835443039e-06, + "loss": 0.019, + "step": 16150 + }, + { + "epoch": 9.01, + "learning_rate": 6.033417721518988e-06, + "loss": 0.0152, + "step": 16175 + }, + { + "epoch": 9.01, + "learning_rate": 6.0270886075949374e-06, + "loss": 0.0197, + "step": 16200 + }, + { + "epoch": 9.01, + "learning_rate": 6.020759493670887e-06, + "loss": 0.0188, + "step": 16225 + }, + { + "epoch": 9.01, + "learning_rate": 6.014430379746836e-06, + "loss": 0.0245, + "step": 16250 + }, + { + "epoch": 9.01, + "learning_rate": 6.008101265822786e-06, + "loss": 0.0202, + "step": 16275 + }, + { + "epoch": 9.01, + "learning_rate": 6.001772151898735e-06, + "loss": 0.0184, + "step": 16300 + }, + { + "epoch": 9.01, + "learning_rate": 5.995443037974685e-06, + "loss": 0.0158, + "step": 16325 + }, + { + "epoch": 9.01, + "learning_rate": 5.989113924050633e-06, + "loss": 0.0175, + "step": 16350 + }, + { + "epoch": 9.02, + "learning_rate": 5.982784810126583e-06, + "loss": 0.014, + "step": 16375 + }, + { + "epoch": 9.02, + "learning_rate": 5.976455696202532e-06, + "loss": 0.0149, + "step": 16400 + }, + { + "epoch": 9.02, + "learning_rate": 5.9701265822784815e-06, + "loss": 0.0188, + "step": 16425 + }, + { + "epoch": 9.02, + "learning_rate": 5.963797468354431e-06, + "loss": 0.0215, + "step": 16450 + }, + { + "epoch": 9.02, + "learning_rate": 5.95746835443038e-06, + "loss": 0.017, + "step": 16475 + }, + { + "epoch": 9.02, + "learning_rate": 5.95113924050633e-06, + "loss": 0.0095, + "step": 16500 + }, + { + "epoch": 9.02, + "learning_rate": 5.944810126582279e-06, + "loss": 0.0112, + "step": 16525 + }, + { + "epoch": 9.02, + "learning_rate": 5.938481012658229e-06, + "loss": 0.0198, + "step": 16550 + }, + { + "epoch": 9.02, + "learning_rate": 5.932151898734177e-06, + "loss": 0.0252, + "step": 16575 + }, + { + "epoch": 9.02, + "learning_rate": 5.925822784810127e-06, + "loss": 0.0226, + "step": 16600 + }, + { + "epoch": 9.02, + "learning_rate": 5.919493670886076e-06, + "loss": 0.0167, + "step": 16625 + }, + { + "epoch": 9.02, + "learning_rate": 5.9131645569620255e-06, + "loss": 0.0259, + "step": 16650 + }, + { + "epoch": 9.02, + "learning_rate": 5.906835443037975e-06, + "loss": 0.0229, + "step": 16675 + }, + { + "epoch": 9.02, + "learning_rate": 5.900506329113924e-06, + "loss": 0.0238, + "step": 16700 + }, + { + "epoch": 9.02, + "learning_rate": 5.894177215189874e-06, + "loss": 0.02, + "step": 16725 + }, + { + "epoch": 9.02, + "learning_rate": 5.887848101265823e-06, + "loss": 0.0182, + "step": 16750 + }, + { + "epoch": 9.03, + "learning_rate": 5.881518987341773e-06, + "loss": 0.0157, + "step": 16775 + }, + { + "epoch": 9.03, + "learning_rate": 5.875189873417721e-06, + "loss": 0.0145, + "step": 16800 + }, + { + "epoch": 9.03, + "learning_rate": 5.868860759493671e-06, + "loss": 0.0166, + "step": 16825 + }, + { + "epoch": 9.03, + "learning_rate": 5.86253164556962e-06, + "loss": 0.0108, + "step": 16850 + }, + { + "epoch": 9.03, + "learning_rate": 5.85620253164557e-06, + "loss": 0.014, + "step": 16875 + }, + { + "epoch": 9.03, + "learning_rate": 5.84987341772152e-06, + "loss": 0.0167, + "step": 16900 + }, + { + "epoch": 9.03, + "learning_rate": 5.843544303797469e-06, + "loss": 0.0121, + "step": 16925 + }, + { + "epoch": 9.03, + "learning_rate": 5.837215189873419e-06, + "loss": 0.0149, + "step": 16950 + }, + { + "epoch": 9.03, + "learning_rate": 5.831139240506329e-06, + "loss": 0.0143, + "step": 16975 + }, + { + "epoch": 9.03, + "learning_rate": 5.824810126582279e-06, + "loss": 0.0139, + "step": 17000 + }, + { + "epoch": 9.03, + "eval_loss": 0.2688957154750824, + "eval_runtime": 1423.1196, + "eval_samples_per_second": 7.316, + "eval_steps_per_second": 0.457, + "eval_wer": 25.920746678807788, + "step": 17000 + }, + { + "epoch": 9.03, + "learning_rate": 5.818481012658228e-06, + "loss": 0.0169, + "step": 17025 + }, + { + "epoch": 9.03, + "learning_rate": 5.8121518987341775e-06, + "loss": 0.013, + "step": 17050 + }, + { + "epoch": 9.03, + "learning_rate": 5.805822784810127e-06, + "loss": 0.0123, + "step": 17075 + }, + { + "epoch": 9.03, + "learning_rate": 5.799493670886076e-06, + "loss": 0.0161, + "step": 17100 + }, + { + "epoch": 9.03, + "learning_rate": 5.793164556962026e-06, + "loss": 0.0114, + "step": 17125 + }, + { + "epoch": 9.03, + "learning_rate": 5.786835443037974e-06, + "loss": 0.0194, + "step": 17150 + }, + { + "epoch": 9.04, + "learning_rate": 5.780506329113924e-06, + "loss": 0.0179, + "step": 17175 + }, + { + "epoch": 9.04, + "learning_rate": 5.774177215189873e-06, + "loss": 0.0165, + "step": 17200 + }, + { + "epoch": 9.04, + "learning_rate": 5.767848101265823e-06, + "loss": 0.0165, + "step": 17225 + }, + { + "epoch": 9.04, + "learning_rate": 5.761518987341773e-06, + "loss": 0.0125, + "step": 17250 + }, + { + "epoch": 9.04, + "learning_rate": 5.755189873417722e-06, + "loss": 0.0172, + "step": 17275 + }, + { + "epoch": 9.04, + "learning_rate": 5.748860759493672e-06, + "loss": 0.0158, + "step": 17300 + }, + { + "epoch": 9.04, + "learning_rate": 5.742531645569621e-06, + "loss": 0.0174, + "step": 17325 + }, + { + "epoch": 9.04, + "learning_rate": 5.736202531645571e-06, + "loss": 0.0162, + "step": 17350 + }, + { + "epoch": 9.04, + "learning_rate": 5.72987341772152e-06, + "loss": 0.0203, + "step": 17375 + }, + { + "epoch": 9.04, + "learning_rate": 5.7235443037974695e-06, + "loss": 0.0157, + "step": 17400 + }, + { + "epoch": 9.04, + "learning_rate": 5.717215189873419e-06, + "loss": 0.018, + "step": 17425 + }, + { + "epoch": 9.04, + "learning_rate": 5.7108860759493675e-06, + "loss": 0.0199, + "step": 17450 + }, + { + "epoch": 9.04, + "learning_rate": 5.704556962025317e-06, + "loss": 0.0153, + "step": 17475 + }, + { + "epoch": 9.04, + "learning_rate": 5.698227848101266e-06, + "loss": 0.0234, + "step": 17500 + }, + { + "epoch": 10.0, + "learning_rate": 5.691898734177216e-06, + "loss": 0.0168, + "step": 17525 + }, + { + "epoch": 10.0, + "learning_rate": 5.685569620253165e-06, + "loss": 0.0214, + "step": 17550 + }, + { + "epoch": 10.0, + "learning_rate": 5.679240506329115e-06, + "loss": 0.0185, + "step": 17575 + }, + { + "epoch": 10.0, + "learning_rate": 5.672911392405064e-06, + "loss": 0.0228, + "step": 17600 + }, + { + "epoch": 10.0, + "learning_rate": 5.6665822784810135e-06, + "loss": 0.024, + "step": 17625 + }, + { + "epoch": 10.0, + "learning_rate": 5.660253164556963e-06, + "loss": 0.0235, + "step": 17650 + }, + { + "epoch": 10.0, + "learning_rate": 5.6539240506329115e-06, + "loss": 0.0262, + "step": 17675 + }, + { + "epoch": 10.0, + "learning_rate": 5.647594936708861e-06, + "loss": 0.0214, + "step": 17700 + }, + { + "epoch": 10.01, + "learning_rate": 5.64126582278481e-06, + "loss": 0.0279, + "step": 17725 + }, + { + "epoch": 10.01, + "learning_rate": 5.63493670886076e-06, + "loss": 0.0248, + "step": 17750 + }, + { + "epoch": 10.01, + "learning_rate": 5.628607594936709e-06, + "loss": 0.0155, + "step": 17775 + }, + { + "epoch": 10.01, + "learning_rate": 5.622278481012659e-06, + "loss": 0.017, + "step": 17800 + }, + { + "epoch": 10.01, + "learning_rate": 5.615949367088608e-06, + "loss": 0.0205, + "step": 17825 + }, + { + "epoch": 10.01, + "learning_rate": 5.6096202531645575e-06, + "loss": 0.021, + "step": 17850 + }, + { + "epoch": 10.01, + "learning_rate": 5.603291139240507e-06, + "loss": 0.0178, + "step": 17875 + }, + { + "epoch": 10.01, + "learning_rate": 5.5969620253164555e-06, + "loss": 0.0169, + "step": 17900 + }, + { + "epoch": 10.01, + "learning_rate": 5.590632911392405e-06, + "loss": 0.0158, + "step": 17925 + }, + { + "epoch": 10.01, + "learning_rate": 5.584303797468354e-06, + "loss": 0.0195, + "step": 17950 + }, + { + "epoch": 10.01, + "learning_rate": 5.577974683544304e-06, + "loss": 0.0195, + "step": 17975 + }, + { + "epoch": 10.01, + "learning_rate": 5.571645569620253e-06, + "loss": 0.0187, + "step": 18000 + }, + { + "epoch": 10.01, + "eval_loss": 0.2605174481868744, + "eval_runtime": 1553.7854, + "eval_samples_per_second": 6.7, + "eval_steps_per_second": 0.419, + "eval_wer": 25.330493178750874, + "step": 18000 + }, + { + "epoch": 10.01, + "learning_rate": 5.565316455696203e-06, + "loss": 0.0214, + "step": 18025 + }, + { + "epoch": 10.01, + "learning_rate": 5.558987341772152e-06, + "loss": 0.0193, + "step": 18050 + }, + { + "epoch": 10.01, + "learning_rate": 5.552658227848102e-06, + "loss": 0.0168, + "step": 18075 + }, + { + "epoch": 10.01, + "learning_rate": 5.546329113924052e-06, + "loss": 0.0147, + "step": 18100 + }, + { + "epoch": 10.02, + "learning_rate": 5.540000000000001e-06, + "loss": 0.0192, + "step": 18125 + }, + { + "epoch": 10.02, + "learning_rate": 5.533670886075951e-06, + "loss": 0.014, + "step": 18150 + }, + { + "epoch": 10.02, + "learning_rate": 5.527341772151899e-06, + "loss": 0.0163, + "step": 18175 + }, + { + "epoch": 10.02, + "learning_rate": 5.521012658227849e-06, + "loss": 0.0139, + "step": 18200 + }, + { + "epoch": 10.02, + "learning_rate": 5.514683544303798e-06, + "loss": 0.0168, + "step": 18225 + }, + { + "epoch": 10.02, + "learning_rate": 5.5083544303797475e-06, + "loss": 0.0118, + "step": 18250 + }, + { + "epoch": 10.02, + "learning_rate": 5.502025316455697e-06, + "loss": 0.0119, + "step": 18275 + }, + { + "epoch": 10.02, + "learning_rate": 5.495696202531646e-06, + "loss": 0.0136, + "step": 18300 + }, + { + "epoch": 10.02, + "learning_rate": 5.489367088607596e-06, + "loss": 0.0132, + "step": 18325 + }, + { + "epoch": 10.02, + "learning_rate": 5.483037974683545e-06, + "loss": 0.0148, + "step": 18350 + }, + { + "epoch": 10.02, + "learning_rate": 5.476708860759495e-06, + "loss": 0.015, + "step": 18375 + }, + { + "epoch": 10.02, + "learning_rate": 5.470379746835443e-06, + "loss": 0.0154, + "step": 18400 + }, + { + "epoch": 10.02, + "learning_rate": 5.464050632911393e-06, + "loss": 0.0139, + "step": 18425 + }, + { + "epoch": 10.02, + "learning_rate": 5.457721518987342e-06, + "loss": 0.0239, + "step": 18450 + }, + { + "epoch": 10.02, + "learning_rate": 5.4513924050632915e-06, + "loss": 0.0203, + "step": 18475 + }, + { + "epoch": 10.02, + "learning_rate": 5.445063291139241e-06, + "loss": 0.0199, + "step": 18500 + }, + { + "epoch": 10.03, + "learning_rate": 5.43873417721519e-06, + "loss": 0.013, + "step": 18525 + }, + { + "epoch": 10.03, + "learning_rate": 5.43240506329114e-06, + "loss": 0.0128, + "step": 18550 + }, + { + "epoch": 10.03, + "learning_rate": 5.426075949367089e-06, + "loss": 0.0161, + "step": 18575 + }, + { + "epoch": 10.03, + "learning_rate": 5.419746835443039e-06, + "loss": 0.0129, + "step": 18600 + }, + { + "epoch": 10.03, + "learning_rate": 5.413417721518987e-06, + "loss": 0.0124, + "step": 18625 + }, + { + "epoch": 10.03, + "learning_rate": 5.407088607594937e-06, + "loss": 0.0161, + "step": 18650 + }, + { + "epoch": 10.03, + "learning_rate": 5.400759493670886e-06, + "loss": 0.0104, + "step": 18675 + }, + { + "epoch": 10.03, + "learning_rate": 5.3944303797468355e-06, + "loss": 0.0109, + "step": 18700 + }, + { + "epoch": 10.03, + "learning_rate": 5.388101265822785e-06, + "loss": 0.0128, + "step": 18725 + }, + { + "epoch": 10.03, + "learning_rate": 5.381772151898734e-06, + "loss": 0.014, + "step": 18750 + }, + { + "epoch": 10.03, + "learning_rate": 5.375443037974684e-06, + "loss": 0.0159, + "step": 18775 + }, + { + "epoch": 10.03, + "learning_rate": 5.369113924050633e-06, + "loss": 0.017, + "step": 18800 + }, + { + "epoch": 10.03, + "learning_rate": 5.362784810126583e-06, + "loss": 0.012, + "step": 18825 + }, + { + "epoch": 10.03, + "learning_rate": 5.356455696202531e-06, + "loss": 0.0165, + "step": 18850 + }, + { + "epoch": 10.03, + "learning_rate": 5.350126582278481e-06, + "loss": 0.0153, + "step": 18875 + }, + { + "epoch": 10.03, + "learning_rate": 5.343797468354432e-06, + "loss": 0.0182, + "step": 18900 + }, + { + "epoch": 10.04, + "learning_rate": 5.33746835443038e-06, + "loss": 0.0194, + "step": 18925 + }, + { + "epoch": 10.04, + "learning_rate": 5.33113924050633e-06, + "loss": 0.0144, + "step": 18950 + }, + { + "epoch": 10.04, + "learning_rate": 5.324810126582279e-06, + "loss": 0.0157, + "step": 18975 + }, + { + "epoch": 10.04, + "learning_rate": 5.318481012658229e-06, + "loss": 0.0111, + "step": 19000 + }, + { + "epoch": 10.04, + "eval_loss": 0.2696678638458252, + "eval_runtime": 1463.5986, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.445, + "eval_wer": 24.79877721588969, + "step": 19000 + }, + { + "epoch": 10.04, + "learning_rate": 5.312151898734178e-06, + "loss": 0.0176, + "step": 19025 + }, + { + "epoch": 10.04, + "learning_rate": 5.3058227848101275e-06, + "loss": 0.0144, + "step": 19050 + }, + { + "epoch": 10.04, + "learning_rate": 5.299493670886077e-06, + "loss": 0.0129, + "step": 19075 + }, + { + "epoch": 10.04, + "learning_rate": 5.293164556962026e-06, + "loss": 0.0162, + "step": 19100 + }, + { + "epoch": 10.04, + "learning_rate": 5.286835443037976e-06, + "loss": 0.0128, + "step": 19125 + }, + { + "epoch": 10.04, + "learning_rate": 5.280506329113924e-06, + "loss": 0.0152, + "step": 19150 + }, + { + "epoch": 10.04, + "learning_rate": 5.274177215189874e-06, + "loss": 0.0183, + "step": 19175 + }, + { + "epoch": 10.04, + "learning_rate": 5.267848101265823e-06, + "loss": 0.0198, + "step": 19200 + }, + { + "epoch": 10.04, + "learning_rate": 5.261518987341773e-06, + "loss": 0.0179, + "step": 19225 + }, + { + "epoch": 10.04, + "learning_rate": 5.255443037974683e-06, + "loss": 0.019, + "step": 19250 + }, + { + "epoch": 11.0, + "learning_rate": 5.249113924050633e-06, + "loss": 0.0159, + "step": 19275 + }, + { + "epoch": 11.0, + "learning_rate": 5.242784810126583e-06, + "loss": 0.0156, + "step": 19300 + }, + { + "epoch": 11.0, + "learning_rate": 5.236455696202532e-06, + "loss": 0.015, + "step": 19325 + }, + { + "epoch": 11.0, + "learning_rate": 5.230126582278482e-06, + "loss": 0.0162, + "step": 19350 + }, + { + "epoch": 11.0, + "learning_rate": 5.223797468354431e-06, + "loss": 0.0262, + "step": 19375 + }, + { + "epoch": 11.0, + "learning_rate": 5.217468354430381e-06, + "loss": 0.0207, + "step": 19400 + }, + { + "epoch": 11.0, + "learning_rate": 5.21113924050633e-06, + "loss": 0.0222, + "step": 19425 + }, + { + "epoch": 11.0, + "learning_rate": 5.2048101265822795e-06, + "loss": 0.016, + "step": 19450 + }, + { + "epoch": 11.01, + "learning_rate": 5.198481012658229e-06, + "loss": 0.0176, + "step": 19475 + }, + { + "epoch": 11.01, + "learning_rate": 5.192151898734178e-06, + "loss": 0.0209, + "step": 19500 + }, + { + "epoch": 11.01, + "learning_rate": 5.185822784810127e-06, + "loss": 0.0212, + "step": 19525 + }, + { + "epoch": 11.01, + "learning_rate": 5.179493670886076e-06, + "loss": 0.0143, + "step": 19550 + }, + { + "epoch": 11.01, + "learning_rate": 5.173164556962026e-06, + "loss": 0.0224, + "step": 19575 + }, + { + "epoch": 11.01, + "learning_rate": 5.166835443037975e-06, + "loss": 0.0171, + "step": 19600 + }, + { + "epoch": 11.01, + "learning_rate": 5.160506329113925e-06, + "loss": 0.0166, + "step": 19625 + }, + { + "epoch": 11.01, + "learning_rate": 5.154177215189874e-06, + "loss": 0.0135, + "step": 19650 + }, + { + "epoch": 11.01, + "learning_rate": 5.1478481012658235e-06, + "loss": 0.0164, + "step": 19675 + }, + { + "epoch": 11.01, + "learning_rate": 5.141518987341773e-06, + "loss": 0.0154, + "step": 19700 + }, + { + "epoch": 11.01, + "learning_rate": 5.135189873417722e-06, + "loss": 0.0183, + "step": 19725 + }, + { + "epoch": 11.01, + "learning_rate": 5.128860759493671e-06, + "loss": 0.0149, + "step": 19750 + }, + { + "epoch": 11.01, + "learning_rate": 5.12253164556962e-06, + "loss": 0.0176, + "step": 19775 + }, + { + "epoch": 11.01, + "learning_rate": 5.11620253164557e-06, + "loss": 0.0135, + "step": 19800 + }, + { + "epoch": 11.01, + "learning_rate": 5.109873417721519e-06, + "loss": 0.0121, + "step": 19825 + }, + { + "epoch": 11.01, + "learning_rate": 5.103544303797469e-06, + "loss": 0.0158, + "step": 19850 + }, + { + "epoch": 11.02, + "learning_rate": 5.097215189873418e-06, + "loss": 0.0191, + "step": 19875 + }, + { + "epoch": 11.02, + "learning_rate": 5.0908860759493675e-06, + "loss": 0.0127, + "step": 19900 + }, + { + "epoch": 11.02, + "learning_rate": 5.084556962025317e-06, + "loss": 0.0149, + "step": 19925 + }, + { + "epoch": 11.02, + "learning_rate": 5.0782278481012655e-06, + "loss": 0.0152, + "step": 19950 + }, + { + "epoch": 11.02, + "learning_rate": 5.071898734177215e-06, + "loss": 0.0139, + "step": 19975 + }, + { + "epoch": 11.02, + "learning_rate": 5.065569620253164e-06, + "loss": 0.015, + "step": 20000 + }, + { + "epoch": 11.02, + "eval_loss": 0.2623814344406128, + "eval_runtime": 1462.1621, + "eval_samples_per_second": 7.12, + "eval_steps_per_second": 0.445, + "eval_wer": 24.392266540919362, + "step": 20000 + }, + { + "epoch": 11.02, + "learning_rate": 5.059240506329114e-06, + "loss": 0.0094, + "step": 20025 + }, + { + "epoch": 11.02, + "learning_rate": 5.052911392405063e-06, + "loss": 0.0108, + "step": 20050 + }, + { + "epoch": 11.02, + "learning_rate": 5.046582278481013e-06, + "loss": 0.0155, + "step": 20075 + }, + { + "epoch": 11.02, + "learning_rate": 5.040253164556962e-06, + "loss": 0.0103, + "step": 20100 + }, + { + "epoch": 11.02, + "learning_rate": 5.033924050632912e-06, + "loss": 0.0109, + "step": 20125 + }, + { + "epoch": 11.02, + "learning_rate": 5.027594936708862e-06, + "loss": 0.0188, + "step": 20150 + }, + { + "epoch": 11.02, + "learning_rate": 5.021265822784811e-06, + "loss": 0.0178, + "step": 20175 + }, + { + "epoch": 11.02, + "learning_rate": 5.014936708860761e-06, + "loss": 0.0262, + "step": 20200 + }, + { + "epoch": 11.02, + "learning_rate": 5.00860759493671e-06, + "loss": 0.0194, + "step": 20225 + }, + { + "epoch": 11.02, + "learning_rate": 5.002278481012659e-06, + "loss": 0.0107, + "step": 20250 + }, + { + "epoch": 11.03, + "learning_rate": 4.995949367088608e-06, + "loss": 0.0147, + "step": 20275 + }, + { + "epoch": 11.03, + "learning_rate": 4.9896202531645575e-06, + "loss": 0.0084, + "step": 20300 + }, + { + "epoch": 11.03, + "learning_rate": 4.983291139240507e-06, + "loss": 0.0214, + "step": 20325 + }, + { + "epoch": 11.03, + "learning_rate": 4.976962025316456e-06, + "loss": 0.014, + "step": 20350 + }, + { + "epoch": 11.03, + "learning_rate": 4.970632911392406e-06, + "loss": 0.0129, + "step": 20375 + }, + { + "epoch": 11.03, + "learning_rate": 4.964303797468355e-06, + "loss": 0.0145, + "step": 20400 + }, + { + "epoch": 11.03, + "learning_rate": 4.957974683544305e-06, + "loss": 0.0109, + "step": 20425 + }, + { + "epoch": 11.03, + "learning_rate": 4.951645569620254e-06, + "loss": 0.0117, + "step": 20450 + }, + { + "epoch": 11.03, + "learning_rate": 4.945316455696203e-06, + "loss": 0.0137, + "step": 20475 + }, + { + "epoch": 11.03, + "learning_rate": 4.938987341772152e-06, + "loss": 0.0086, + "step": 20500 + }, + { + "epoch": 11.03, + "learning_rate": 4.9326582278481015e-06, + "loss": 0.0128, + "step": 20525 + }, + { + "epoch": 11.03, + "learning_rate": 4.926329113924051e-06, + "loss": 0.0101, + "step": 20550 + }, + { + "epoch": 11.03, + "learning_rate": 4.92e-06, + "loss": 0.0132, + "step": 20575 + }, + { + "epoch": 11.03, + "learning_rate": 4.91367088607595e-06, + "loss": 0.0147, + "step": 20600 + }, + { + "epoch": 11.03, + "learning_rate": 4.907341772151899e-06, + "loss": 0.0077, + "step": 20625 + }, + { + "epoch": 11.03, + "learning_rate": 4.901012658227849e-06, + "loss": 0.0125, + "step": 20650 + }, + { + "epoch": 11.04, + "learning_rate": 4.894683544303798e-06, + "loss": 0.0115, + "step": 20675 + }, + { + "epoch": 11.04, + "learning_rate": 4.888354430379747e-06, + "loss": 0.0136, + "step": 20700 + }, + { + "epoch": 11.04, + "learning_rate": 4.882025316455696e-06, + "loss": 0.0098, + "step": 20725 + }, + { + "epoch": 11.04, + "learning_rate": 4.875696202531646e-06, + "loss": 0.0154, + "step": 20750 + }, + { + "epoch": 11.04, + "learning_rate": 4.869367088607596e-06, + "loss": 0.0126, + "step": 20775 + }, + { + "epoch": 11.04, + "learning_rate": 4.863037974683545e-06, + "loss": 0.0116, + "step": 20800 + }, + { + "epoch": 11.04, + "learning_rate": 4.856708860759495e-06, + "loss": 0.0133, + "step": 20825 + }, + { + "epoch": 11.04, + "learning_rate": 4.850379746835443e-06, + "loss": 0.0106, + "step": 20850 + }, + { + "epoch": 11.04, + "learning_rate": 4.844050632911393e-06, + "loss": 0.0151, + "step": 20875 + }, + { + "epoch": 11.04, + "learning_rate": 4.837721518987342e-06, + "loss": 0.0148, + "step": 20900 + }, + { + "epoch": 11.04, + "learning_rate": 4.8313924050632915e-06, + "loss": 0.0173, + "step": 20925 + }, + { + "epoch": 11.04, + "learning_rate": 4.825063291139241e-06, + "loss": 0.0126, + "step": 20950 + }, + { + "epoch": 11.04, + "learning_rate": 4.81873417721519e-06, + "loss": 0.0163, + "step": 20975 + }, + { + "epoch": 11.04, + "learning_rate": 4.81240506329114e-06, + "loss": 0.0203, + "step": 21000 + }, + { + "epoch": 11.04, + "eval_loss": 0.27788785099983215, + "eval_runtime": 1546.852, + "eval_samples_per_second": 6.73, + "eval_steps_per_second": 0.421, + "eval_wer": 25.284963983154196, + "step": 21000 + }, + { + "epoch": 12.0, + "learning_rate": 4.806075949367089e-06, + "loss": 0.0176, + "step": 21025 + }, + { + "epoch": 12.0, + "learning_rate": 4.799746835443039e-06, + "loss": 0.0157, + "step": 21050 + }, + { + "epoch": 12.0, + "learning_rate": 4.793417721518987e-06, + "loss": 0.0192, + "step": 21075 + }, + { + "epoch": 12.0, + "learning_rate": 4.787088607594937e-06, + "loss": 0.0124, + "step": 21100 + }, + { + "epoch": 12.0, + "learning_rate": 4.780759493670886e-06, + "loss": 0.0194, + "step": 21125 + }, + { + "epoch": 12.0, + "learning_rate": 4.774430379746836e-06, + "loss": 0.0138, + "step": 21150 + }, + { + "epoch": 12.0, + "learning_rate": 4.768101265822786e-06, + "loss": 0.0191, + "step": 21175 + }, + { + "epoch": 12.0, + "learning_rate": 4.761772151898735e-06, + "loss": 0.0124, + "step": 21200 + }, + { + "epoch": 12.01, + "learning_rate": 4.755443037974684e-06, + "loss": 0.0175, + "step": 21225 + }, + { + "epoch": 12.01, + "learning_rate": 4.749113924050633e-06, + "loss": 0.0118, + "step": 21250 + }, + { + "epoch": 12.01, + "learning_rate": 4.742784810126583e-06, + "loss": 0.0172, + "step": 21275 + }, + { + "epoch": 12.01, + "learning_rate": 4.736455696202532e-06, + "loss": 0.0136, + "step": 21300 + }, + { + "epoch": 12.01, + "learning_rate": 4.7301265822784815e-06, + "loss": 0.018, + "step": 21325 + }, + { + "epoch": 12.01, + "learning_rate": 4.723797468354431e-06, + "loss": 0.0184, + "step": 21350 + }, + { + "epoch": 12.01, + "learning_rate": 4.71746835443038e-06, + "loss": 0.0166, + "step": 21375 + }, + { + "epoch": 12.01, + "learning_rate": 4.71113924050633e-06, + "loss": 0.0107, + "step": 21400 + }, + { + "epoch": 12.01, + "learning_rate": 4.704810126582279e-06, + "loss": 0.0125, + "step": 21425 + }, + { + "epoch": 12.01, + "learning_rate": 4.698481012658228e-06, + "loss": 0.0098, + "step": 21450 + }, + { + "epoch": 12.01, + "learning_rate": 4.692151898734177e-06, + "loss": 0.0103, + "step": 21475 + }, + { + "epoch": 12.01, + "learning_rate": 4.685822784810127e-06, + "loss": 0.0211, + "step": 21500 + }, + { + "epoch": 12.01, + "learning_rate": 4.679493670886076e-06, + "loss": 0.0125, + "step": 21525 + }, + { + "epoch": 12.01, + "learning_rate": 4.6731645569620255e-06, + "loss": 0.0083, + "step": 21550 + }, + { + "epoch": 12.01, + "learning_rate": 4.666835443037975e-06, + "loss": 0.0156, + "step": 21575 + }, + { + "epoch": 12.01, + "learning_rate": 4.660506329113924e-06, + "loss": 0.0137, + "step": 21600 + }, + { + "epoch": 12.02, + "learning_rate": 4.654177215189874e-06, + "loss": 0.0137, + "step": 21625 + }, + { + "epoch": 12.02, + "learning_rate": 4.647848101265823e-06, + "loss": 0.0092, + "step": 21650 + }, + { + "epoch": 12.02, + "learning_rate": 4.641518987341773e-06, + "loss": 0.0101, + "step": 21675 + }, + { + "epoch": 12.02, + "learning_rate": 4.635189873417722e-06, + "loss": 0.0118, + "step": 21700 + }, + { + "epoch": 12.02, + "learning_rate": 4.6288607594936715e-06, + "loss": 0.0119, + "step": 21725 + }, + { + "epoch": 12.02, + "learning_rate": 4.622531645569621e-06, + "loss": 0.009, + "step": 21750 + }, + { + "epoch": 12.02, + "learning_rate": 4.61620253164557e-06, + "loss": 0.014, + "step": 21775 + }, + { + "epoch": 12.02, + "learning_rate": 4.609873417721519e-06, + "loss": 0.0108, + "step": 21800 + }, + { + "epoch": 12.02, + "learning_rate": 4.603544303797468e-06, + "loss": 0.0117, + "step": 21825 + }, + { + "epoch": 12.02, + "learning_rate": 4.597215189873418e-06, + "loss": 0.0147, + "step": 21850 + }, + { + "epoch": 12.02, + "learning_rate": 4.590886075949367e-06, + "loss": 0.0133, + "step": 21875 + }, + { + "epoch": 12.02, + "learning_rate": 4.584556962025317e-06, + "loss": 0.0131, + "step": 21900 + }, + { + "epoch": 12.02, + "learning_rate": 4.578227848101266e-06, + "loss": 0.0156, + "step": 21925 + }, + { + "epoch": 12.02, + "learning_rate": 4.5718987341772155e-06, + "loss": 0.0211, + "step": 21950 + }, + { + "epoch": 12.02, + "learning_rate": 4.565569620253165e-06, + "loss": 0.0127, + "step": 21975 + }, + { + "epoch": 12.02, + "learning_rate": 4.559240506329114e-06, + "loss": 0.011, + "step": 22000 + }, + { + "epoch": 12.02, + "eval_loss": 0.26152506470680237, + "eval_runtime": 1497.9019, + "eval_samples_per_second": 6.95, + "eval_steps_per_second": 0.435, + "eval_wer": 26.236198962584755, + "step": 22000 + }, + { + "epoch": 12.03, + "learning_rate": 4.552911392405064e-06, + "loss": 0.0164, + "step": 22025 + }, + { + "epoch": 12.03, + "learning_rate": 4.546582278481013e-06, + "loss": 0.01, + "step": 22050 + }, + { + "epoch": 12.03, + "learning_rate": 4.540253164556963e-06, + "loss": 0.0127, + "step": 22075 + }, + { + "epoch": 12.03, + "learning_rate": 4.533924050632912e-06, + "loss": 0.0089, + "step": 22100 + }, + { + "epoch": 12.03, + "learning_rate": 4.5275949367088615e-06, + "loss": 0.0077, + "step": 22125 + }, + { + "epoch": 12.03, + "learning_rate": 4.521265822784811e-06, + "loss": 0.0098, + "step": 22150 + }, + { + "epoch": 12.03, + "learning_rate": 4.5149367088607595e-06, + "loss": 0.0084, + "step": 22175 + }, + { + "epoch": 12.03, + "learning_rate": 4.508607594936709e-06, + "loss": 0.0096, + "step": 22200 + }, + { + "epoch": 12.03, + "learning_rate": 4.502278481012658e-06, + "loss": 0.0097, + "step": 22225 + }, + { + "epoch": 12.03, + "learning_rate": 4.495949367088608e-06, + "loss": 0.0114, + "step": 22250 + }, + { + "epoch": 12.03, + "learning_rate": 4.489620253164557e-06, + "loss": 0.0109, + "step": 22275 + }, + { + "epoch": 12.03, + "learning_rate": 4.483291139240507e-06, + "loss": 0.0096, + "step": 22300 + }, + { + "epoch": 12.03, + "learning_rate": 4.476962025316456e-06, + "loss": 0.0117, + "step": 22325 + }, + { + "epoch": 12.03, + "learning_rate": 4.4706329113924055e-06, + "loss": 0.0105, + "step": 22350 + }, + { + "epoch": 12.03, + "learning_rate": 4.464303797468355e-06, + "loss": 0.0103, + "step": 22375 + }, + { + "epoch": 12.03, + "learning_rate": 4.457974683544304e-06, + "loss": 0.0085, + "step": 22400 + }, + { + "epoch": 12.04, + "learning_rate": 4.451645569620254e-06, + "loss": 0.0129, + "step": 22425 + }, + { + "epoch": 12.04, + "learning_rate": 4.445316455696203e-06, + "loss": 0.0188, + "step": 22450 + }, + { + "epoch": 12.04, + "learning_rate": 4.438987341772153e-06, + "loss": 0.013, + "step": 22475 + }, + { + "epoch": 12.04, + "learning_rate": 4.432658227848102e-06, + "loss": 0.0095, + "step": 22500 + }, + { + "epoch": 12.04, + "learning_rate": 4.4263291139240515e-06, + "loss": 0.0111, + "step": 22525 + }, + { + "epoch": 12.04, + "learning_rate": 4.42e-06, + "loss": 0.0115, + "step": 22550 + }, + { + "epoch": 12.04, + "learning_rate": 4.4136708860759495e-06, + "loss": 0.0096, + "step": 22575 + }, + { + "epoch": 12.04, + "learning_rate": 4.407341772151899e-06, + "loss": 0.0108, + "step": 22600 + }, + { + "epoch": 12.04, + "learning_rate": 4.401012658227848e-06, + "loss": 0.0118, + "step": 22625 + }, + { + "epoch": 12.04, + "learning_rate": 4.394683544303798e-06, + "loss": 0.0136, + "step": 22650 + }, + { + "epoch": 12.04, + "learning_rate": 4.388354430379747e-06, + "loss": 0.0176, + "step": 22675 + }, + { + "epoch": 12.04, + "learning_rate": 4.382025316455697e-06, + "loss": 0.011, + "step": 22700 + }, + { + "epoch": 12.04, + "learning_rate": 4.375696202531646e-06, + "loss": 0.0125, + "step": 22725 + }, + { + "epoch": 12.04, + "learning_rate": 4.3693670886075955e-06, + "loss": 0.0164, + "step": 22750 + }, + { + "epoch": 13.0, + "learning_rate": 4.363037974683544e-06, + "loss": 0.0192, + "step": 22775 + }, + { + "epoch": 13.0, + "learning_rate": 4.356708860759494e-06, + "loss": 0.0145, + "step": 22800 + }, + { + "epoch": 13.0, + "learning_rate": 4.350379746835444e-06, + "loss": 0.0174, + "step": 22825 + }, + { + "epoch": 13.0, + "learning_rate": 4.344050632911393e-06, + "loss": 0.0142, + "step": 22850 + }, + { + "epoch": 13.0, + "learning_rate": 4.337721518987343e-06, + "loss": 0.0228, + "step": 22875 + }, + { + "epoch": 13.0, + "learning_rate": 4.331392405063292e-06, + "loss": 0.0201, + "step": 22900 + }, + { + "epoch": 13.0, + "learning_rate": 4.325063291139241e-06, + "loss": 0.0188, + "step": 22925 + }, + { + "epoch": 13.0, + "learning_rate": 4.31873417721519e-06, + "loss": 0.0185, + "step": 22950 + }, + { + "epoch": 13.01, + "learning_rate": 4.3124050632911395e-06, + "loss": 0.0186, + "step": 22975 + }, + { + "epoch": 13.01, + "learning_rate": 4.306075949367089e-06, + "loss": 0.0191, + "step": 23000 + }, + { + "epoch": 13.01, + "eval_loss": 0.25467297434806824, + "eval_runtime": 1486.1182, + "eval_samples_per_second": 7.005, + "eval_steps_per_second": 0.438, + "eval_wer": 23.982503780549276, + "step": 23000 + }, + { + "epoch": 13.01, + "learning_rate": 4.299746835443038e-06, + "loss": 0.0145, + "step": 23025 + }, + { + "epoch": 13.01, + "learning_rate": 4.293417721518988e-06, + "loss": 0.0106, + "step": 23050 + }, + { + "epoch": 13.01, + "learning_rate": 4.287088607594937e-06, + "loss": 0.0135, + "step": 23075 + }, + { + "epoch": 13.01, + "learning_rate": 4.280759493670887e-06, + "loss": 0.0173, + "step": 23100 + }, + { + "epoch": 13.01, + "learning_rate": 4.274430379746836e-06, + "loss": 0.0126, + "step": 23125 + }, + { + "epoch": 13.01, + "learning_rate": 4.268101265822785e-06, + "loss": 0.0166, + "step": 23150 + }, + { + "epoch": 13.01, + "learning_rate": 4.261772151898734e-06, + "loss": 0.0097, + "step": 23175 + }, + { + "epoch": 13.01, + "learning_rate": 4.255443037974684e-06, + "loss": 0.0094, + "step": 23200 + }, + { + "epoch": 13.01, + "learning_rate": 4.249113924050634e-06, + "loss": 0.012, + "step": 23225 + }, + { + "epoch": 13.01, + "learning_rate": 4.242784810126583e-06, + "loss": 0.0153, + "step": 23250 + }, + { + "epoch": 13.01, + "learning_rate": 4.236455696202532e-06, + "loss": 0.0169, + "step": 23275 + }, + { + "epoch": 13.01, + "learning_rate": 4.230379746835443e-06, + "loss": 0.0097, + "step": 23300 + }, + { + "epoch": 13.01, + "learning_rate": 4.224050632911393e-06, + "loss": 0.0098, + "step": 23325 + }, + { + "epoch": 13.01, + "learning_rate": 4.217721518987342e-06, + "loss": 0.0129, + "step": 23350 + }, + { + "epoch": 13.02, + "learning_rate": 4.2113924050632915e-06, + "loss": 0.0157, + "step": 23375 + }, + { + "epoch": 13.02, + "learning_rate": 4.205063291139241e-06, + "loss": 0.0088, + "step": 23400 + }, + { + "epoch": 13.02, + "learning_rate": 4.19873417721519e-06, + "loss": 0.0105, + "step": 23425 + }, + { + "epoch": 13.02, + "learning_rate": 4.19240506329114e-06, + "loss": 0.0131, + "step": 23450 + }, + { + "epoch": 13.02, + "learning_rate": 4.186075949367089e-06, + "loss": 0.0133, + "step": 23475 + }, + { + "epoch": 13.02, + "learning_rate": 4.179746835443038e-06, + "loss": 0.0054, + "step": 23500 + }, + { + "epoch": 13.02, + "learning_rate": 4.173417721518987e-06, + "loss": 0.01, + "step": 23525 + }, + { + "epoch": 13.02, + "learning_rate": 4.167088607594937e-06, + "loss": 0.013, + "step": 23550 + }, + { + "epoch": 13.02, + "learning_rate": 4.160759493670886e-06, + "loss": 0.0141, + "step": 23575 + }, + { + "epoch": 13.02, + "learning_rate": 4.154430379746836e-06, + "loss": 0.0126, + "step": 23600 + }, + { + "epoch": 13.02, + "learning_rate": 4.148101265822786e-06, + "loss": 0.015, + "step": 23625 + }, + { + "epoch": 13.02, + "learning_rate": 4.141772151898734e-06, + "loss": 0.0164, + "step": 23650 + }, + { + "epoch": 13.02, + "learning_rate": 4.135443037974684e-06, + "loss": 0.0135, + "step": 23675 + }, + { + "epoch": 13.02, + "learning_rate": 4.129113924050633e-06, + "loss": 0.0151, + "step": 23700 + }, + { + "epoch": 13.02, + "learning_rate": 4.122784810126583e-06, + "loss": 0.0157, + "step": 23725 + }, + { + "epoch": 13.02, + "learning_rate": 4.116455696202532e-06, + "loss": 0.0087, + "step": 23750 + }, + { + "epoch": 13.03, + "learning_rate": 4.1101265822784815e-06, + "loss": 0.0097, + "step": 23775 + }, + { + "epoch": 13.03, + "learning_rate": 4.103797468354431e-06, + "loss": 0.0141, + "step": 23800 + }, + { + "epoch": 13.03, + "learning_rate": 4.09746835443038e-06, + "loss": 0.0079, + "step": 23825 + }, + { + "epoch": 13.03, + "learning_rate": 4.09113924050633e-06, + "loss": 0.0116, + "step": 23850 + }, + { + "epoch": 13.03, + "learning_rate": 4.084810126582278e-06, + "loss": 0.0093, + "step": 23875 + }, + { + "epoch": 13.03, + "learning_rate": 4.078481012658228e-06, + "loss": 0.0149, + "step": 23900 + }, + { + "epoch": 13.03, + "learning_rate": 4.072151898734177e-06, + "loss": 0.0125, + "step": 23925 + }, + { + "epoch": 13.03, + "learning_rate": 4.065822784810127e-06, + "loss": 0.0106, + "step": 23950 + }, + { + "epoch": 13.03, + "learning_rate": 4.059493670886076e-06, + "loss": 0.0124, + "step": 23975 + }, + { + "epoch": 13.03, + "learning_rate": 4.0531645569620255e-06, + "loss": 0.0074, + "step": 24000 + }, + { + "epoch": 13.03, + "eval_loss": 0.27036768198013306, + "eval_runtime": 1458.9195, + "eval_samples_per_second": 7.136, + "eval_steps_per_second": 0.446, + "eval_wer": 24.153238264036816, + "step": 24000 + }, + { + "epoch": 13.03, + "learning_rate": 4.046835443037975e-06, + "loss": 0.0128, + "step": 24025 + }, + { + "epoch": 13.03, + "learning_rate": 4.040506329113924e-06, + "loss": 0.012, + "step": 24050 + }, + { + "epoch": 13.03, + "learning_rate": 4.034177215189874e-06, + "loss": 0.0098, + "step": 24075 + }, + { + "epoch": 13.03, + "learning_rate": 4.027848101265823e-06, + "loss": 0.0091, + "step": 24100 + }, + { + "epoch": 13.03, + "learning_rate": 4.021518987341773e-06, + "loss": 0.0077, + "step": 24125 + }, + { + "epoch": 13.03, + "learning_rate": 4.015189873417722e-06, + "loss": 0.0135, + "step": 24150 + }, + { + "epoch": 13.04, + "learning_rate": 4.0088607594936715e-06, + "loss": 0.0124, + "step": 24175 + }, + { + "epoch": 13.04, + "learning_rate": 4.002531645569621e-06, + "loss": 0.0152, + "step": 24200 + }, + { + "epoch": 13.04, + "learning_rate": 3.99620253164557e-06, + "loss": 0.0127, + "step": 24225 + }, + { + "epoch": 13.04, + "learning_rate": 3.989873417721519e-06, + "loss": 0.0066, + "step": 24250 + }, + { + "epoch": 13.04, + "learning_rate": 3.983544303797468e-06, + "loss": 0.0163, + "step": 24275 + }, + { + "epoch": 13.04, + "learning_rate": 3.977215189873418e-06, + "loss": 0.0095, + "step": 24300 + }, + { + "epoch": 13.04, + "learning_rate": 3.970886075949367e-06, + "loss": 0.0103, + "step": 24325 + }, + { + "epoch": 13.04, + "learning_rate": 3.964556962025317e-06, + "loss": 0.0112, + "step": 24350 + }, + { + "epoch": 13.04, + "learning_rate": 3.958227848101266e-06, + "loss": 0.01, + "step": 24375 + }, + { + "epoch": 13.04, + "learning_rate": 3.9518987341772155e-06, + "loss": 0.0131, + "step": 24400 + }, + { + "epoch": 13.04, + "learning_rate": 3.945569620253165e-06, + "loss": 0.0143, + "step": 24425 + }, + { + "epoch": 13.04, + "learning_rate": 3.939240506329114e-06, + "loss": 0.0151, + "step": 24450 + }, + { + "epoch": 13.04, + "learning_rate": 3.932911392405064e-06, + "loss": 0.0129, + "step": 24475 + }, + { + "epoch": 13.04, + "learning_rate": 3.926582278481013e-06, + "loss": 0.0125, + "step": 24500 + }, + { + "epoch": 14.0, + "learning_rate": 3.920506329113925e-06, + "loss": 0.0165, + "step": 24525 + }, + { + "epoch": 14.0, + "learning_rate": 3.914177215189874e-06, + "loss": 0.0114, + "step": 24550 + }, + { + "epoch": 14.0, + "learning_rate": 3.9078481012658235e-06, + "loss": 0.0111, + "step": 24575 + }, + { + "epoch": 14.0, + "learning_rate": 3.901518987341772e-06, + "loss": 0.0134, + "step": 24600 + }, + { + "epoch": 14.0, + "learning_rate": 3.8951898734177215e-06, + "loss": 0.0138, + "step": 24625 + }, + { + "epoch": 14.0, + "learning_rate": 3.888860759493671e-06, + "loss": 0.0151, + "step": 24650 + }, + { + "epoch": 14.0, + "learning_rate": 3.88253164556962e-06, + "loss": 0.0101, + "step": 24675 + }, + { + "epoch": 14.0, + "learning_rate": 3.87620253164557e-06, + "loss": 0.0108, + "step": 24700 + }, + { + "epoch": 14.01, + "learning_rate": 3.869873417721519e-06, + "loss": 0.0151, + "step": 24725 + }, + { + "epoch": 14.01, + "learning_rate": 3.863544303797469e-06, + "loss": 0.0132, + "step": 24750 + }, + { + "epoch": 14.01, + "learning_rate": 3.857215189873418e-06, + "loss": 0.012, + "step": 24775 + }, + { + "epoch": 14.01, + "learning_rate": 3.8508860759493675e-06, + "loss": 0.017, + "step": 24800 + }, + { + "epoch": 14.01, + "learning_rate": 3.844556962025317e-06, + "loss": 0.0141, + "step": 24825 + }, + { + "epoch": 14.01, + "learning_rate": 3.838227848101266e-06, + "loss": 0.0166, + "step": 24850 + }, + { + "epoch": 14.01, + "learning_rate": 3.831898734177216e-06, + "loss": 0.0112, + "step": 24875 + }, + { + "epoch": 14.01, + "learning_rate": 3.825569620253165e-06, + "loss": 0.0097, + "step": 24900 + }, + { + "epoch": 14.01, + "learning_rate": 3.819240506329115e-06, + "loss": 0.0138, + "step": 24925 + }, + { + "epoch": 14.01, + "learning_rate": 3.8129113924050636e-06, + "loss": 0.0069, + "step": 24950 + }, + { + "epoch": 14.01, + "learning_rate": 3.806582278481013e-06, + "loss": 0.0111, + "step": 24975 + }, + { + "epoch": 14.01, + "learning_rate": 3.8002531645569625e-06, + "loss": 0.0149, + "step": 25000 + }, + { + "epoch": 14.01, + "eval_loss": 0.2640444040298462, + "eval_runtime": 1478.4977, + "eval_samples_per_second": 7.042, + "eval_steps_per_second": 0.44, + "eval_wer": 24.063805915543345, + "step": 25000 + }, + { + "epoch": 14.01, + "learning_rate": 3.7939240506329115e-06, + "loss": 0.0176, + "step": 25025 + }, + { + "epoch": 14.01, + "learning_rate": 3.787594936708861e-06, + "loss": 0.0104, + "step": 25050 + }, + { + "epoch": 14.01, + "learning_rate": 3.7812658227848103e-06, + "loss": 0.0084, + "step": 25075 + }, + { + "epoch": 14.01, + "learning_rate": 3.7749367088607598e-06, + "loss": 0.0118, + "step": 25100 + }, + { + "epoch": 14.02, + "learning_rate": 3.768607594936709e-06, + "loss": 0.0099, + "step": 25125 + }, + { + "epoch": 14.02, + "learning_rate": 3.762278481012658e-06, + "loss": 0.0089, + "step": 25150 + }, + { + "epoch": 14.02, + "learning_rate": 3.7559493670886076e-06, + "loss": 0.0119, + "step": 25175 + }, + { + "epoch": 14.02, + "learning_rate": 3.749620253164557e-06, + "loss": 0.0118, + "step": 25200 + }, + { + "epoch": 14.02, + "learning_rate": 3.743291139240507e-06, + "loss": 0.0108, + "step": 25225 + }, + { + "epoch": 14.02, + "learning_rate": 3.7369620253164563e-06, + "loss": 0.0101, + "step": 25250 + }, + { + "epoch": 14.02, + "learning_rate": 3.7306329113924058e-06, + "loss": 0.0095, + "step": 25275 + }, + { + "epoch": 14.02, + "learning_rate": 3.7243037974683548e-06, + "loss": 0.0106, + "step": 25300 + }, + { + "epoch": 14.02, + "learning_rate": 3.717974683544304e-06, + "loss": 0.0135, + "step": 25325 + }, + { + "epoch": 14.02, + "learning_rate": 3.7116455696202536e-06, + "loss": 0.0115, + "step": 25350 + }, + { + "epoch": 14.02, + "learning_rate": 3.705316455696203e-06, + "loss": 0.0114, + "step": 25375 + }, + { + "epoch": 14.02, + "learning_rate": 3.698987341772152e-06, + "loss": 0.0119, + "step": 25400 + }, + { + "epoch": 14.02, + "learning_rate": 3.6926582278481015e-06, + "loss": 0.0123, + "step": 25425 + }, + { + "epoch": 14.02, + "learning_rate": 3.686329113924051e-06, + "loss": 0.0138, + "step": 25450 + }, + { + "epoch": 14.02, + "learning_rate": 3.6800000000000003e-06, + "loss": 0.0133, + "step": 25475 + }, + { + "epoch": 14.02, + "learning_rate": 3.6736708860759498e-06, + "loss": 0.0106, + "step": 25500 + }, + { + "epoch": 14.03, + "learning_rate": 3.6673417721518988e-06, + "loss": 0.0099, + "step": 25525 + }, + { + "epoch": 14.03, + "learning_rate": 3.661012658227848e-06, + "loss": 0.0104, + "step": 25550 + }, + { + "epoch": 14.03, + "learning_rate": 3.6546835443037976e-06, + "loss": 0.0143, + "step": 25575 + }, + { + "epoch": 14.03, + "learning_rate": 3.648354430379747e-06, + "loss": 0.0096, + "step": 25600 + }, + { + "epoch": 14.03, + "learning_rate": 3.642025316455696e-06, + "loss": 0.0094, + "step": 25625 + }, + { + "epoch": 14.03, + "learning_rate": 3.6356962025316463e-06, + "loss": 0.0162, + "step": 25650 + }, + { + "epoch": 14.03, + "learning_rate": 3.6293670886075953e-06, + "loss": 0.008, + "step": 25675 + }, + { + "epoch": 14.03, + "learning_rate": 3.6230379746835448e-06, + "loss": 0.0094, + "step": 25700 + }, + { + "epoch": 14.03, + "learning_rate": 3.616708860759494e-06, + "loss": 0.0122, + "step": 25725 + }, + { + "epoch": 14.03, + "learning_rate": 3.6103797468354436e-06, + "loss": 0.0128, + "step": 25750 + }, + { + "epoch": 14.03, + "learning_rate": 3.6040506329113926e-06, + "loss": 0.0117, + "step": 25775 + }, + { + "epoch": 14.03, + "learning_rate": 3.597721518987342e-06, + "loss": 0.0067, + "step": 25800 + }, + { + "epoch": 14.03, + "learning_rate": 3.5913924050632915e-06, + "loss": 0.0078, + "step": 25825 + }, + { + "epoch": 14.03, + "learning_rate": 3.585063291139241e-06, + "loss": 0.0071, + "step": 25850 + }, + { + "epoch": 14.03, + "learning_rate": 3.57873417721519e-06, + "loss": 0.0093, + "step": 25875 + }, + { + "epoch": 14.03, + "learning_rate": 3.5724050632911393e-06, + "loss": 0.0064, + "step": 25900 + }, + { + "epoch": 14.04, + "learning_rate": 3.5660759493670888e-06, + "loss": 0.0067, + "step": 25925 + }, + { + "epoch": 14.04, + "learning_rate": 3.559746835443038e-06, + "loss": 0.0076, + "step": 25950 + }, + { + "epoch": 14.04, + "learning_rate": 3.5534177215189876e-06, + "loss": 0.0099, + "step": 25975 + }, + { + "epoch": 14.04, + "learning_rate": 3.5470886075949366e-06, + "loss": 0.0111, + "step": 26000 + }, + { + "epoch": 14.04, + "eval_loss": 0.2734878659248352, + "eval_runtime": 1456.7487, + "eval_samples_per_second": 7.147, + "eval_steps_per_second": 0.447, + "eval_wer": 24.359745686921737, + "step": 26000 + }, + { + "epoch": 14.04, + "learning_rate": 3.540759493670886e-06, + "loss": 0.0134, + "step": 26025 + }, + { + "epoch": 14.04, + "learning_rate": 3.534430379746836e-06, + "loss": 0.008, + "step": 26050 + }, + { + "epoch": 14.04, + "learning_rate": 3.5281012658227853e-06, + "loss": 0.0138, + "step": 26075 + }, + { + "epoch": 14.04, + "learning_rate": 3.5217721518987348e-06, + "loss": 0.0075, + "step": 26100 + }, + { + "epoch": 14.04, + "learning_rate": 3.515443037974684e-06, + "loss": 0.0097, + "step": 26125 + }, + { + "epoch": 14.04, + "learning_rate": 3.509113924050633e-06, + "loss": 0.0139, + "step": 26150 + }, + { + "epoch": 14.04, + "learning_rate": 3.5027848101265826e-06, + "loss": 0.0099, + "step": 26175 + }, + { + "epoch": 14.04, + "learning_rate": 3.496455696202532e-06, + "loss": 0.0148, + "step": 26200 + }, + { + "epoch": 14.04, + "learning_rate": 3.4901265822784815e-06, + "loss": 0.0119, + "step": 26225 + }, + { + "epoch": 14.04, + "learning_rate": 3.4837974683544305e-06, + "loss": 0.0086, + "step": 26250 + }, + { + "epoch": 15.0, + "learning_rate": 3.47746835443038e-06, + "loss": 0.0091, + "step": 26275 + }, + { + "epoch": 15.0, + "learning_rate": 3.4711392405063293e-06, + "loss": 0.0105, + "step": 26300 + }, + { + "epoch": 15.0, + "learning_rate": 3.465063291139241e-06, + "loss": 0.0138, + "step": 26325 + }, + { + "epoch": 15.0, + "learning_rate": 3.45873417721519e-06, + "loss": 0.0185, + "step": 26350 + }, + { + "epoch": 15.0, + "learning_rate": 3.4524050632911392e-06, + "loss": 0.015, + "step": 26375 + }, + { + "epoch": 15.0, + "learning_rate": 3.4460759493670886e-06, + "loss": 0.0177, + "step": 26400 + }, + { + "epoch": 15.0, + "learning_rate": 3.439746835443038e-06, + "loss": 0.014, + "step": 26425 + }, + { + "epoch": 15.0, + "learning_rate": 3.433417721518988e-06, + "loss": 0.0127, + "step": 26450 + }, + { + "epoch": 15.01, + "learning_rate": 3.4270886075949374e-06, + "loss": 0.0135, + "step": 26475 + }, + { + "epoch": 15.01, + "learning_rate": 3.4207594936708864e-06, + "loss": 0.0171, + "step": 26500 + }, + { + "epoch": 15.01, + "learning_rate": 3.4144303797468358e-06, + "loss": 0.0114, + "step": 26525 + }, + { + "epoch": 15.01, + "learning_rate": 3.4081012658227852e-06, + "loss": 0.0099, + "step": 26550 + }, + { + "epoch": 15.01, + "learning_rate": 3.4017721518987346e-06, + "loss": 0.0126, + "step": 26575 + }, + { + "epoch": 15.01, + "learning_rate": 3.395443037974684e-06, + "loss": 0.0116, + "step": 26600 + }, + { + "epoch": 15.01, + "learning_rate": 3.389113924050633e-06, + "loss": 0.0087, + "step": 26625 + }, + { + "epoch": 15.01, + "learning_rate": 3.3827848101265825e-06, + "loss": 0.0073, + "step": 26650 + }, + { + "epoch": 15.01, + "learning_rate": 3.376455696202532e-06, + "loss": 0.0102, + "step": 26675 + }, + { + "epoch": 15.01, + "learning_rate": 3.3701265822784814e-06, + "loss": 0.0106, + "step": 26700 + }, + { + "epoch": 15.01, + "learning_rate": 3.3637974683544304e-06, + "loss": 0.0094, + "step": 26725 + }, + { + "epoch": 15.01, + "learning_rate": 3.35746835443038e-06, + "loss": 0.0113, + "step": 26750 + }, + { + "epoch": 15.01, + "learning_rate": 3.3511392405063292e-06, + "loss": 0.0169, + "step": 26775 + }, + { + "epoch": 15.01, + "learning_rate": 3.3448101265822786e-06, + "loss": 0.0084, + "step": 26800 + }, + { + "epoch": 15.01, + "learning_rate": 3.338481012658228e-06, + "loss": 0.0153, + "step": 26825 + }, + { + "epoch": 15.01, + "learning_rate": 3.332151898734177e-06, + "loss": 0.012, + "step": 26850 + }, + { + "epoch": 15.02, + "learning_rate": 3.325822784810127e-06, + "loss": 0.0106, + "step": 26875 + }, + { + "epoch": 15.02, + "learning_rate": 3.3194936708860764e-06, + "loss": 0.0099, + "step": 26900 + }, + { + "epoch": 15.02, + "learning_rate": 3.3131645569620258e-06, + "loss": 0.008, + "step": 26925 + }, + { + "epoch": 15.02, + "learning_rate": 3.306835443037975e-06, + "loss": 0.0076, + "step": 26950 + }, + { + "epoch": 15.02, + "learning_rate": 3.3005063291139246e-06, + "loss": 0.0088, + "step": 26975 + }, + { + "epoch": 15.02, + "learning_rate": 3.2941772151898736e-06, + "loss": 0.009, + "step": 27000 + }, + { + "epoch": 15.02, + "eval_loss": 0.2693029046058655, + "eval_runtime": 1479.1374, + "eval_samples_per_second": 7.039, + "eval_steps_per_second": 0.44, + "eval_wer": 24.28819980812696, + "step": 27000 + }, + { + "epoch": 15.02, + "learning_rate": 3.287848101265823e-06, + "loss": 0.0115, + "step": 27025 + }, + { + "epoch": 15.02, + "learning_rate": 3.2815189873417725e-06, + "loss": 0.0099, + "step": 27050 + }, + { + "epoch": 15.02, + "learning_rate": 3.275189873417722e-06, + "loss": 0.0073, + "step": 27075 + }, + { + "epoch": 15.02, + "learning_rate": 3.268860759493671e-06, + "loss": 0.0061, + "step": 27100 + }, + { + "epoch": 15.02, + "learning_rate": 3.2625316455696204e-06, + "loss": 0.007, + "step": 27125 + }, + { + "epoch": 15.02, + "learning_rate": 3.2562025316455698e-06, + "loss": 0.0109, + "step": 27150 + }, + { + "epoch": 15.02, + "learning_rate": 3.2498734177215192e-06, + "loss": 0.0123, + "step": 27175 + }, + { + "epoch": 15.02, + "learning_rate": 3.2435443037974686e-06, + "loss": 0.0121, + "step": 27200 + }, + { + "epoch": 15.02, + "learning_rate": 3.2372151898734176e-06, + "loss": 0.0129, + "step": 27225 + }, + { + "epoch": 15.02, + "learning_rate": 3.230886075949367e-06, + "loss": 0.0128, + "step": 27250 + }, + { + "epoch": 15.03, + "learning_rate": 3.224556962025317e-06, + "loss": 0.0094, + "step": 27275 + }, + { + "epoch": 15.03, + "learning_rate": 3.2182278481012664e-06, + "loss": 0.0081, + "step": 27300 + }, + { + "epoch": 15.03, + "learning_rate": 3.2118987341772158e-06, + "loss": 0.0087, + "step": 27325 + }, + { + "epoch": 15.03, + "learning_rate": 3.2055696202531648e-06, + "loss": 0.0131, + "step": 27350 + }, + { + "epoch": 15.03, + "learning_rate": 3.199240506329114e-06, + "loss": 0.0073, + "step": 27375 + }, + { + "epoch": 15.03, + "learning_rate": 3.1929113924050636e-06, + "loss": 0.0086, + "step": 27400 + }, + { + "epoch": 15.03, + "learning_rate": 3.186582278481013e-06, + "loss": 0.0108, + "step": 27425 + }, + { + "epoch": 15.03, + "learning_rate": 3.1802531645569625e-06, + "loss": 0.01, + "step": 27450 + }, + { + "epoch": 15.03, + "learning_rate": 3.1739240506329115e-06, + "loss": 0.0123, + "step": 27475 + }, + { + "epoch": 15.03, + "learning_rate": 3.167594936708861e-06, + "loss": 0.0081, + "step": 27500 + }, + { + "epoch": 15.03, + "learning_rate": 3.1612658227848104e-06, + "loss": 0.0123, + "step": 27525 + }, + { + "epoch": 15.03, + "learning_rate": 3.1549367088607598e-06, + "loss": 0.0096, + "step": 27550 + }, + { + "epoch": 15.03, + "learning_rate": 3.1486075949367088e-06, + "loss": 0.0054, + "step": 27575 + }, + { + "epoch": 15.03, + "learning_rate": 3.1422784810126582e-06, + "loss": 0.006, + "step": 27600 + }, + { + "epoch": 15.03, + "learning_rate": 3.1359493670886076e-06, + "loss": 0.0089, + "step": 27625 + }, + { + "epoch": 15.03, + "learning_rate": 3.129620253164557e-06, + "loss": 0.0109, + "step": 27650 + }, + { + "epoch": 15.04, + "learning_rate": 3.1232911392405065e-06, + "loss": 0.0089, + "step": 27675 + }, + { + "epoch": 15.04, + "learning_rate": 3.1169620253164563e-06, + "loss": 0.0094, + "step": 27700 + }, + { + "epoch": 15.04, + "learning_rate": 3.1106329113924054e-06, + "loss": 0.0102, + "step": 27725 + }, + { + "epoch": 15.04, + "learning_rate": 3.1043037974683548e-06, + "loss": 0.0097, + "step": 27750 + }, + { + "epoch": 15.04, + "learning_rate": 3.097974683544304e-06, + "loss": 0.0078, + "step": 27775 + }, + { + "epoch": 15.04, + "learning_rate": 3.0916455696202536e-06, + "loss": 0.0073, + "step": 27800 + }, + { + "epoch": 15.04, + "learning_rate": 3.085316455696203e-06, + "loss": 0.0105, + "step": 27825 + }, + { + "epoch": 15.04, + "learning_rate": 3.078987341772152e-06, + "loss": 0.0083, + "step": 27850 + }, + { + "epoch": 15.04, + "learning_rate": 3.0726582278481015e-06, + "loss": 0.0138, + "step": 27875 + }, + { + "epoch": 15.04, + "learning_rate": 3.066329113924051e-06, + "loss": 0.0049, + "step": 27900 + }, + { + "epoch": 15.04, + "learning_rate": 3.0600000000000003e-06, + "loss": 0.0107, + "step": 27925 + }, + { + "epoch": 15.04, + "learning_rate": 3.0536708860759494e-06, + "loss": 0.0105, + "step": 27950 + }, + { + "epoch": 15.04, + "learning_rate": 3.0473417721518988e-06, + "loss": 0.0134, + "step": 27975 + }, + { + "epoch": 15.04, + "learning_rate": 3.041012658227848e-06, + "loss": 0.0137, + "step": 28000 + }, + { + "epoch": 15.04, + "eval_loss": 0.2780563235282898, + "eval_runtime": 1465.9542, + "eval_samples_per_second": 7.102, + "eval_steps_per_second": 0.444, + "eval_wer": 25.010162766874256, + "step": 28000 + }, + { + "epoch": 16.0, + "learning_rate": 3.0346835443037976e-06, + "loss": 0.009, + "step": 28025 + }, + { + "epoch": 16.0, + "learning_rate": 3.028354430379747e-06, + "loss": 0.0081, + "step": 28050 + }, + { + "epoch": 16.0, + "learning_rate": 3.022025316455696e-06, + "loss": 0.0109, + "step": 28075 + }, + { + "epoch": 16.0, + "learning_rate": 3.015696202531646e-06, + "loss": 0.0132, + "step": 28100 + }, + { + "epoch": 16.0, + "learning_rate": 3.0093670886075953e-06, + "loss": 0.0142, + "step": 28125 + }, + { + "epoch": 16.0, + "learning_rate": 3.0030379746835448e-06, + "loss": 0.0152, + "step": 28150 + }, + { + "epoch": 16.0, + "learning_rate": 2.996708860759494e-06, + "loss": 0.0074, + "step": 28175 + }, + { + "epoch": 16.0, + "learning_rate": 2.990379746835443e-06, + "loss": 0.0129, + "step": 28200 + }, + { + "epoch": 16.01, + "learning_rate": 2.9840506329113926e-06, + "loss": 0.0118, + "step": 28225 + }, + { + "epoch": 16.01, + "learning_rate": 2.977721518987342e-06, + "loss": 0.0153, + "step": 28250 + }, + { + "epoch": 16.01, + "learning_rate": 2.9713924050632915e-06, + "loss": 0.0121, + "step": 28275 + }, + { + "epoch": 16.01, + "learning_rate": 2.965063291139241e-06, + "loss": 0.0097, + "step": 28300 + }, + { + "epoch": 16.01, + "learning_rate": 2.95873417721519e-06, + "loss": 0.0083, + "step": 28325 + }, + { + "epoch": 16.01, + "learning_rate": 2.9524050632911393e-06, + "loss": 0.01, + "step": 28350 + }, + { + "epoch": 16.01, + "learning_rate": 2.9460759493670888e-06, + "loss": 0.0084, + "step": 28375 + }, + { + "epoch": 16.01, + "learning_rate": 2.939746835443038e-06, + "loss": 0.0124, + "step": 28400 + }, + { + "epoch": 16.01, + "learning_rate": 2.933417721518987e-06, + "loss": 0.0071, + "step": 28425 + }, + { + "epoch": 16.01, + "learning_rate": 2.9270886075949366e-06, + "loss": 0.0113, + "step": 28450 + }, + { + "epoch": 16.01, + "learning_rate": 2.920759493670886e-06, + "loss": 0.0085, + "step": 28475 + }, + { + "epoch": 16.01, + "learning_rate": 2.914430379746836e-06, + "loss": 0.0133, + "step": 28500 + }, + { + "epoch": 16.01, + "learning_rate": 2.9081012658227853e-06, + "loss": 0.014, + "step": 28525 + }, + { + "epoch": 16.01, + "learning_rate": 2.9017721518987348e-06, + "loss": 0.0105, + "step": 28550 + }, + { + "epoch": 16.01, + "learning_rate": 2.8954430379746838e-06, + "loss": 0.01, + "step": 28575 + }, + { + "epoch": 16.01, + "learning_rate": 2.889113924050633e-06, + "loss": 0.0135, + "step": 28600 + }, + { + "epoch": 16.02, + "learning_rate": 2.8827848101265826e-06, + "loss": 0.0118, + "step": 28625 + }, + { + "epoch": 16.02, + "learning_rate": 2.876455696202532e-06, + "loss": 0.0111, + "step": 28650 + }, + { + "epoch": 16.02, + "learning_rate": 2.8701265822784815e-06, + "loss": 0.008, + "step": 28675 + }, + { + "epoch": 16.02, + "learning_rate": 2.8637974683544305e-06, + "loss": 0.0103, + "step": 28700 + }, + { + "epoch": 16.02, + "learning_rate": 2.85746835443038e-06, + "loss": 0.0082, + "step": 28725 + }, + { + "epoch": 16.02, + "learning_rate": 2.8511392405063293e-06, + "loss": 0.0057, + "step": 28750 + }, + { + "epoch": 16.02, + "learning_rate": 2.8448101265822788e-06, + "loss": 0.0062, + "step": 28775 + }, + { + "epoch": 16.02, + "learning_rate": 2.8384810126582278e-06, + "loss": 0.0092, + "step": 28800 + }, + { + "epoch": 16.02, + "learning_rate": 2.832151898734177e-06, + "loss": 0.0084, + "step": 28825 + }, + { + "epoch": 16.02, + "learning_rate": 2.8258227848101266e-06, + "loss": 0.0099, + "step": 28850 + }, + { + "epoch": 16.02, + "learning_rate": 2.819493670886076e-06, + "loss": 0.0117, + "step": 28875 + }, + { + "epoch": 16.02, + "learning_rate": 2.8131645569620255e-06, + "loss": 0.0117, + "step": 28900 + }, + { + "epoch": 16.02, + "learning_rate": 2.8068354430379753e-06, + "loss": 0.0127, + "step": 28925 + }, + { + "epoch": 16.02, + "learning_rate": 2.8005063291139243e-06, + "loss": 0.0123, + "step": 28950 + }, + { + "epoch": 16.02, + "learning_rate": 2.7941772151898738e-06, + "loss": 0.0128, + "step": 28975 + }, + { + "epoch": 16.02, + "learning_rate": 2.787848101265823e-06, + "loss": 0.0073, + "step": 29000 + }, + { + "epoch": 16.02, + "eval_loss": 0.2685891091823578, + "eval_runtime": 1456.562, + "eval_samples_per_second": 7.148, + "eval_steps_per_second": 0.447, + "eval_wer": 23.278427291500677, + "step": 29000 + }, + { + "epoch": 16.03, + "learning_rate": 2.7815189873417726e-06, + "loss": 0.0068, + "step": 29025 + }, + { + "epoch": 16.03, + "learning_rate": 2.7751898734177216e-06, + "loss": 0.0087, + "step": 29050 + }, + { + "epoch": 16.03, + "learning_rate": 2.768860759493671e-06, + "loss": 0.0084, + "step": 29075 + }, + { + "epoch": 16.03, + "learning_rate": 2.7625316455696205e-06, + "loss": 0.0039, + "step": 29100 + }, + { + "epoch": 16.03, + "learning_rate": 2.75620253164557e-06, + "loss": 0.0066, + "step": 29125 + }, + { + "epoch": 16.03, + "learning_rate": 2.7498734177215193e-06, + "loss": 0.0108, + "step": 29150 + }, + { + "epoch": 16.03, + "learning_rate": 2.7435443037974683e-06, + "loss": 0.0052, + "step": 29175 + }, + { + "epoch": 16.03, + "learning_rate": 2.7372151898734178e-06, + "loss": 0.0061, + "step": 29200 + }, + { + "epoch": 16.03, + "learning_rate": 2.730886075949367e-06, + "loss": 0.0083, + "step": 29225 + }, + { + "epoch": 16.03, + "learning_rate": 2.7245569620253166e-06, + "loss": 0.0062, + "step": 29250 + }, + { + "epoch": 16.03, + "learning_rate": 2.7182278481012656e-06, + "loss": 0.0078, + "step": 29275 + }, + { + "epoch": 16.03, + "learning_rate": 2.711898734177215e-06, + "loss": 0.0127, + "step": 29300 + }, + { + "epoch": 16.03, + "learning_rate": 2.705569620253165e-06, + "loss": 0.0092, + "step": 29325 + }, + { + "epoch": 16.03, + "learning_rate": 2.6992405063291143e-06, + "loss": 0.0087, + "step": 29350 + }, + { + "epoch": 16.03, + "learning_rate": 2.6929113924050638e-06, + "loss": 0.0062, + "step": 29375 + }, + { + "epoch": 16.03, + "learning_rate": 2.686582278481013e-06, + "loss": 0.0065, + "step": 29400 + }, + { + "epoch": 16.04, + "learning_rate": 2.680253164556962e-06, + "loss": 0.0057, + "step": 29425 + }, + { + "epoch": 16.04, + "learning_rate": 2.6739240506329116e-06, + "loss": 0.0071, + "step": 29450 + }, + { + "epoch": 16.04, + "learning_rate": 2.667594936708861e-06, + "loss": 0.01, + "step": 29475 + }, + { + "epoch": 16.04, + "learning_rate": 2.6612658227848105e-06, + "loss": 0.009, + "step": 29500 + }, + { + "epoch": 16.04, + "learning_rate": 2.65493670886076e-06, + "loss": 0.0091, + "step": 29525 + }, + { + "epoch": 16.04, + "learning_rate": 2.648607594936709e-06, + "loss": 0.0104, + "step": 29550 + }, + { + "epoch": 16.04, + "learning_rate": 2.6422784810126583e-06, + "loss": 0.0071, + "step": 29575 + }, + { + "epoch": 16.04, + "learning_rate": 2.6359493670886078e-06, + "loss": 0.0081, + "step": 29600 + }, + { + "epoch": 16.04, + "learning_rate": 2.629620253164557e-06, + "loss": 0.008, + "step": 29625 + }, + { + "epoch": 16.04, + "learning_rate": 2.623291139240506e-06, + "loss": 0.0067, + "step": 29650 + }, + { + "epoch": 16.04, + "learning_rate": 2.6169620253164556e-06, + "loss": 0.0066, + "step": 29675 + }, + { + "epoch": 16.04, + "learning_rate": 2.610632911392405e-06, + "loss": 0.0121, + "step": 29700 + }, + { + "epoch": 16.04, + "learning_rate": 2.6043037974683545e-06, + "loss": 0.0075, + "step": 29725 + }, + { + "epoch": 16.04, + "learning_rate": 2.5979746835443043e-06, + "loss": 0.0111, + "step": 29750 + }, + { + "epoch": 17.0, + "learning_rate": 2.5916455696202538e-06, + "loss": 0.0095, + "step": 29775 + }, + { + "epoch": 17.0, + "learning_rate": 2.5853164556962028e-06, + "loss": 0.0077, + "step": 29800 + }, + { + "epoch": 17.0, + "learning_rate": 2.578987341772152e-06, + "loss": 0.0047, + "step": 29825 + }, + { + "epoch": 17.0, + "learning_rate": 2.5726582278481016e-06, + "loss": 0.0092, + "step": 29850 + }, + { + "epoch": 17.0, + "learning_rate": 2.566329113924051e-06, + "loss": 0.0191, + "step": 29875 + }, + { + "epoch": 17.0, + "learning_rate": 2.56e-06, + "loss": 0.0102, + "step": 29900 + }, + { + "epoch": 17.0, + "learning_rate": 2.5536708860759495e-06, + "loss": 0.0146, + "step": 29925 + }, + { + "epoch": 17.0, + "learning_rate": 2.547341772151899e-06, + "loss": 0.0096, + "step": 29950 + }, + { + "epoch": 17.01, + "learning_rate": 2.5410126582278483e-06, + "loss": 0.0159, + "step": 29975 + }, + { + "epoch": 17.01, + "learning_rate": 2.5346835443037978e-06, + "loss": 0.0094, + "step": 30000 + }, + { + "epoch": 17.01, + "eval_loss": 0.2636851966381073, + "eval_runtime": 1508.7196, + "eval_samples_per_second": 6.901, + "eval_steps_per_second": 0.431, + "eval_wer": 23.029642758418838, + "step": 30000 + }, + { + "epoch": 17.01, + "learning_rate": 2.5283544303797468e-06, + "loss": 0.0078, + "step": 30025 + }, + { + "epoch": 17.01, + "learning_rate": 2.522025316455696e-06, + "loss": 0.01, + "step": 30050 + }, + { + "epoch": 17.01, + "learning_rate": 2.5156962025316456e-06, + "loss": 0.0058, + "step": 30075 + }, + { + "epoch": 17.01, + "learning_rate": 2.509367088607595e-06, + "loss": 0.0092, + "step": 30100 + }, + { + "epoch": 17.01, + "learning_rate": 2.503037974683544e-06, + "loss": 0.0084, + "step": 30125 + }, + { + "epoch": 17.01, + "learning_rate": 2.496708860759494e-06, + "loss": 0.0069, + "step": 30150 + }, + { + "epoch": 17.01, + "learning_rate": 2.4903797468354433e-06, + "loss": 0.0059, + "step": 30175 + }, + { + "epoch": 17.01, + "learning_rate": 2.4840506329113923e-06, + "loss": 0.0097, + "step": 30200 + }, + { + "epoch": 17.01, + "learning_rate": 2.4777215189873418e-06, + "loss": 0.0126, + "step": 30225 + }, + { + "epoch": 17.01, + "learning_rate": 2.4713924050632916e-06, + "loss": 0.0096, + "step": 30250 + }, + { + "epoch": 17.01, + "learning_rate": 2.4650632911392406e-06, + "loss": 0.0094, + "step": 30275 + }, + { + "epoch": 17.01, + "learning_rate": 2.45873417721519e-06, + "loss": 0.0111, + "step": 30300 + }, + { + "epoch": 17.01, + "learning_rate": 2.4524050632911395e-06, + "loss": 0.0062, + "step": 30325 + }, + { + "epoch": 17.01, + "learning_rate": 2.446075949367089e-06, + "loss": 0.0078, + "step": 30350 + }, + { + "epoch": 17.02, + "learning_rate": 2.4397468354430383e-06, + "loss": 0.0043, + "step": 30375 + }, + { + "epoch": 17.02, + "learning_rate": 2.4336708860759494e-06, + "loss": 0.0102, + "step": 30400 + }, + { + "epoch": 17.02, + "learning_rate": 2.4273417721518988e-06, + "loss": 0.0101, + "step": 30425 + }, + { + "epoch": 17.02, + "learning_rate": 2.4210126582278482e-06, + "loss": 0.0066, + "step": 30450 + }, + { + "epoch": 17.02, + "learning_rate": 2.4146835443037976e-06, + "loss": 0.0078, + "step": 30475 + }, + { + "epoch": 17.02, + "learning_rate": 2.408354430379747e-06, + "loss": 0.0074, + "step": 30500 + }, + { + "epoch": 17.02, + "learning_rate": 2.4020253164556965e-06, + "loss": 0.0072, + "step": 30525 + }, + { + "epoch": 17.02, + "learning_rate": 2.395696202531646e-06, + "loss": 0.0109, + "step": 30550 + }, + { + "epoch": 17.02, + "learning_rate": 2.389367088607595e-06, + "loss": 0.0113, + "step": 30575 + }, + { + "epoch": 17.02, + "learning_rate": 2.3830379746835444e-06, + "loss": 0.0109, + "step": 30600 + }, + { + "epoch": 17.02, + "learning_rate": 2.3767088607594938e-06, + "loss": 0.0089, + "step": 30625 + }, + { + "epoch": 17.02, + "learning_rate": 2.3703797468354432e-06, + "loss": 0.0062, + "step": 30650 + }, + { + "epoch": 17.02, + "learning_rate": 2.3640506329113926e-06, + "loss": 0.0101, + "step": 30675 + }, + { + "epoch": 17.02, + "learning_rate": 2.357721518987342e-06, + "loss": 0.0096, + "step": 30700 + }, + { + "epoch": 17.02, + "learning_rate": 2.3513924050632915e-06, + "loss": 0.0093, + "step": 30725 + }, + { + "epoch": 17.02, + "learning_rate": 2.3450632911392405e-06, + "loss": 0.0113, + "step": 30750 + }, + { + "epoch": 17.03, + "learning_rate": 2.33873417721519e-06, + "loss": 0.0094, + "step": 30775 + }, + { + "epoch": 17.03, + "learning_rate": 2.3324050632911394e-06, + "loss": 0.0086, + "step": 30800 + }, + { + "epoch": 17.03, + "learning_rate": 2.3260759493670888e-06, + "loss": 0.0096, + "step": 30825 + }, + { + "epoch": 17.03, + "learning_rate": 2.319746835443038e-06, + "loss": 0.0086, + "step": 30850 + }, + { + "epoch": 17.03, + "learning_rate": 2.3134177215189876e-06, + "loss": 0.0064, + "step": 30875 + }, + { + "epoch": 17.03, + "learning_rate": 2.307088607594937e-06, + "loss": 0.0107, + "step": 30900 + }, + { + "epoch": 17.03, + "learning_rate": 2.3007594936708865e-06, + "loss": 0.0073, + "step": 30925 + }, + { + "epoch": 17.03, + "learning_rate": 2.2944303797468355e-06, + "loss": 0.005, + "step": 30950 + }, + { + "epoch": 17.03, + "learning_rate": 2.288101265822785e-06, + "loss": 0.0055, + "step": 30975 + }, + { + "epoch": 17.03, + "learning_rate": 2.2817721518987344e-06, + "loss": 0.006, + "step": 31000 + }, + { + "epoch": 17.03, + "eval_loss": 0.2709895372390747, + "eval_runtime": 1461.5544, + "eval_samples_per_second": 7.123, + "eval_steps_per_second": 0.445, + "eval_wer": 23.29468771849949, + "step": 31000 + }, + { + "epoch": 17.03, + "learning_rate": 2.2754430379746838e-06, + "loss": 0.0094, + "step": 31025 + }, + { + "epoch": 17.03, + "learning_rate": 2.2691139240506328e-06, + "loss": 0.0088, + "step": 31050 + }, + { + "epoch": 17.03, + "learning_rate": 2.2627848101265826e-06, + "loss": 0.0057, + "step": 31075 + }, + { + "epoch": 17.03, + "learning_rate": 2.256455696202532e-06, + "loss": 0.0066, + "step": 31100 + }, + { + "epoch": 17.03, + "learning_rate": 2.250126582278481e-06, + "loss": 0.008, + "step": 31125 + }, + { + "epoch": 17.03, + "learning_rate": 2.2437974683544305e-06, + "loss": 0.0048, + "step": 31150 + }, + { + "epoch": 17.04, + "learning_rate": 2.23746835443038e-06, + "loss": 0.0074, + "step": 31175 + }, + { + "epoch": 17.04, + "learning_rate": 2.2311392405063294e-06, + "loss": 0.0061, + "step": 31200 + }, + { + "epoch": 17.04, + "learning_rate": 2.2248101265822788e-06, + "loss": 0.0088, + "step": 31225 + }, + { + "epoch": 17.04, + "learning_rate": 2.2184810126582278e-06, + "loss": 0.0087, + "step": 31250 + }, + { + "epoch": 17.04, + "learning_rate": 2.2121518987341776e-06, + "loss": 0.0089, + "step": 31275 + }, + { + "epoch": 17.04, + "learning_rate": 2.2058227848101266e-06, + "loss": 0.0061, + "step": 31300 + }, + { + "epoch": 17.04, + "learning_rate": 2.199493670886076e-06, + "loss": 0.0097, + "step": 31325 + }, + { + "epoch": 17.04, + "learning_rate": 2.1931645569620255e-06, + "loss": 0.0063, + "step": 31350 + }, + { + "epoch": 17.04, + "learning_rate": 2.186835443037975e-06, + "loss": 0.0084, + "step": 31375 + }, + { + "epoch": 17.04, + "learning_rate": 2.1805063291139243e-06, + "loss": 0.0089, + "step": 31400 + }, + { + "epoch": 17.04, + "learning_rate": 2.1741772151898734e-06, + "loss": 0.0077, + "step": 31425 + }, + { + "epoch": 17.04, + "learning_rate": 2.1678481012658228e-06, + "loss": 0.0153, + "step": 31450 + }, + { + "epoch": 17.04, + "learning_rate": 2.1615189873417726e-06, + "loss": 0.0088, + "step": 31475 + }, + { + "epoch": 17.04, + "learning_rate": 2.1551898734177216e-06, + "loss": 0.0095, + "step": 31500 + }, + { + "epoch": 18.0, + "learning_rate": 2.148860759493671e-06, + "loss": 0.0084, + "step": 31525 + }, + { + "epoch": 18.0, + "learning_rate": 2.1425316455696205e-06, + "loss": 0.0054, + "step": 31550 + }, + { + "epoch": 18.0, + "learning_rate": 2.13620253164557e-06, + "loss": 0.013, + "step": 31575 + }, + { + "epoch": 18.0, + "learning_rate": 2.129873417721519e-06, + "loss": 0.0074, + "step": 31600 + }, + { + "epoch": 18.0, + "learning_rate": 2.1235443037974684e-06, + "loss": 0.0083, + "step": 31625 + }, + { + "epoch": 18.0, + "learning_rate": 2.1172151898734178e-06, + "loss": 0.0118, + "step": 31650 + }, + { + "epoch": 18.0, + "learning_rate": 2.110886075949367e-06, + "loss": 0.0095, + "step": 31675 + }, + { + "epoch": 18.0, + "learning_rate": 2.1045569620253166e-06, + "loss": 0.0075, + "step": 31700 + }, + { + "epoch": 18.01, + "learning_rate": 2.098227848101266e-06, + "loss": 0.0149, + "step": 31725 + }, + { + "epoch": 18.01, + "learning_rate": 2.0918987341772155e-06, + "loss": 0.0095, + "step": 31750 + }, + { + "epoch": 18.01, + "learning_rate": 2.085569620253165e-06, + "loss": 0.0098, + "step": 31775 + }, + { + "epoch": 18.01, + "learning_rate": 2.079240506329114e-06, + "loss": 0.0058, + "step": 31800 + }, + { + "epoch": 18.01, + "learning_rate": 2.0729113924050633e-06, + "loss": 0.0072, + "step": 31825 + }, + { + "epoch": 18.01, + "learning_rate": 2.0665822784810128e-06, + "loss": 0.0103, + "step": 31850 + }, + { + "epoch": 18.01, + "learning_rate": 2.060253164556962e-06, + "loss": 0.0085, + "step": 31875 + }, + { + "epoch": 18.01, + "learning_rate": 2.0539240506329116e-06, + "loss": 0.0064, + "step": 31900 + }, + { + "epoch": 18.01, + "learning_rate": 2.047594936708861e-06, + "loss": 0.0077, + "step": 31925 + }, + { + "epoch": 18.01, + "learning_rate": 2.0412658227848105e-06, + "loss": 0.0086, + "step": 31950 + }, + { + "epoch": 18.01, + "learning_rate": 2.0349367088607595e-06, + "loss": 0.0136, + "step": 31975 + }, + { + "epoch": 18.01, + "learning_rate": 2.028607594936709e-06, + "loss": 0.0085, + "step": 32000 + }, + { + "epoch": 18.01, + "eval_loss": 0.26488059759140015, + "eval_runtime": 1466.953, + "eval_samples_per_second": 7.097, + "eval_steps_per_second": 0.444, + "eval_wer": 23.089806338314446, + "step": 32000 + }, + { + "epoch": 18.01, + "learning_rate": 2.0222784810126583e-06, + "loss": 0.011, + "step": 32025 + }, + { + "epoch": 18.01, + "learning_rate": 2.0159493670886078e-06, + "loss": 0.0086, + "step": 32050 + }, + { + "epoch": 18.01, + "learning_rate": 2.009620253164557e-06, + "loss": 0.008, + "step": 32075 + }, + { + "epoch": 18.01, + "learning_rate": 2.0032911392405066e-06, + "loss": 0.0083, + "step": 32100 + }, + { + "epoch": 18.02, + "learning_rate": 1.996962025316456e-06, + "loss": 0.0073, + "step": 32125 + }, + { + "epoch": 18.02, + "learning_rate": 1.9906329113924055e-06, + "loss": 0.0036, + "step": 32150 + }, + { + "epoch": 18.02, + "learning_rate": 1.9843037974683545e-06, + "loss": 0.0096, + "step": 32175 + }, + { + "epoch": 18.02, + "learning_rate": 1.977974683544304e-06, + "loss": 0.0071, + "step": 32200 + }, + { + "epoch": 18.02, + "learning_rate": 1.9716455696202533e-06, + "loss": 0.009, + "step": 32225 + }, + { + "epoch": 18.02, + "learning_rate": 1.9653164556962028e-06, + "loss": 0.0093, + "step": 32250 + }, + { + "epoch": 18.02, + "learning_rate": 1.9589873417721518e-06, + "loss": 0.0071, + "step": 32275 + }, + { + "epoch": 18.02, + "learning_rate": 1.9526582278481016e-06, + "loss": 0.0037, + "step": 32300 + }, + { + "epoch": 18.02, + "learning_rate": 1.946329113924051e-06, + "loss": 0.007, + "step": 32325 + }, + { + "epoch": 18.02, + "learning_rate": 1.94e-06, + "loss": 0.0104, + "step": 32350 + }, + { + "epoch": 18.02, + "learning_rate": 1.9336708860759495e-06, + "loss": 0.0082, + "step": 32375 + }, + { + "epoch": 18.02, + "learning_rate": 1.927341772151899e-06, + "loss": 0.0095, + "step": 32400 + }, + { + "epoch": 18.02, + "learning_rate": 1.9210126582278483e-06, + "loss": 0.0101, + "step": 32425 + }, + { + "epoch": 18.02, + "learning_rate": 1.9146835443037973e-06, + "loss": 0.0122, + "step": 32450 + }, + { + "epoch": 18.02, + "learning_rate": 1.9083544303797468e-06, + "loss": 0.0082, + "step": 32475 + }, + { + "epoch": 18.02, + "learning_rate": 1.9020253164556964e-06, + "loss": 0.0093, + "step": 32500 + }, + { + "epoch": 18.03, + "learning_rate": 1.8956962025316458e-06, + "loss": 0.0073, + "step": 32525 + }, + { + "epoch": 18.03, + "learning_rate": 1.889367088607595e-06, + "loss": 0.0061, + "step": 32550 + }, + { + "epoch": 18.03, + "learning_rate": 1.8830379746835445e-06, + "loss": 0.0072, + "step": 32575 + }, + { + "epoch": 18.03, + "learning_rate": 1.876708860759494e-06, + "loss": 0.0064, + "step": 32600 + }, + { + "epoch": 18.03, + "learning_rate": 1.8703797468354431e-06, + "loss": 0.0084, + "step": 32625 + }, + { + "epoch": 18.03, + "learning_rate": 1.8640506329113926e-06, + "loss": 0.0067, + "step": 32650 + }, + { + "epoch": 18.03, + "learning_rate": 1.8577215189873418e-06, + "loss": 0.0052, + "step": 32675 + }, + { + "epoch": 18.03, + "learning_rate": 1.8513924050632912e-06, + "loss": 0.0085, + "step": 32700 + }, + { + "epoch": 18.03, + "learning_rate": 1.8450632911392408e-06, + "loss": 0.0077, + "step": 32725 + }, + { + "epoch": 18.03, + "learning_rate": 1.83873417721519e-06, + "loss": 0.0068, + "step": 32750 + }, + { + "epoch": 18.03, + "learning_rate": 1.8324050632911395e-06, + "loss": 0.0079, + "step": 32775 + }, + { + "epoch": 18.03, + "learning_rate": 1.8260759493670887e-06, + "loss": 0.008, + "step": 32800 + }, + { + "epoch": 18.03, + "learning_rate": 1.8197468354430381e-06, + "loss": 0.0071, + "step": 32825 + }, + { + "epoch": 18.03, + "learning_rate": 1.8134177215189873e-06, + "loss": 0.0091, + "step": 32850 + }, + { + "epoch": 18.03, + "learning_rate": 1.8070886075949368e-06, + "loss": 0.0059, + "step": 32875 + }, + { + "epoch": 18.03, + "learning_rate": 1.8007594936708862e-06, + "loss": 0.0079, + "step": 32900 + }, + { + "epoch": 18.04, + "learning_rate": 1.7944303797468356e-06, + "loss": 0.0055, + "step": 32925 + }, + { + "epoch": 18.04, + "learning_rate": 1.788101265822785e-06, + "loss": 0.0095, + "step": 32950 + }, + { + "epoch": 18.04, + "learning_rate": 1.7817721518987343e-06, + "loss": 0.0086, + "step": 32975 + }, + { + "epoch": 18.04, + "learning_rate": 1.7754430379746837e-06, + "loss": 0.0057, + "step": 33000 + }, + { + "epoch": 18.04, + "eval_loss": 0.2678743898868561, + "eval_runtime": 1484.126, + "eval_samples_per_second": 7.015, + "eval_steps_per_second": 0.439, + "eval_wer": 23.405258622091416, + "step": 33000 + }, + { + "epoch": 18.04, + "learning_rate": 1.7691139240506331e-06, + "loss": 0.0085, + "step": 33025 + }, + { + "epoch": 18.04, + "learning_rate": 1.7627848101265823e-06, + "loss": 0.0073, + "step": 33050 + }, + { + "epoch": 18.04, + "learning_rate": 1.7564556962025318e-06, + "loss": 0.0045, + "step": 33075 + }, + { + "epoch": 18.04, + "learning_rate": 1.750126582278481e-06, + "loss": 0.0065, + "step": 33100 + }, + { + "epoch": 18.04, + "learning_rate": 1.7437974683544306e-06, + "loss": 0.0076, + "step": 33125 + }, + { + "epoch": 18.04, + "learning_rate": 1.73746835443038e-06, + "loss": 0.007, + "step": 33150 + }, + { + "epoch": 18.04, + "learning_rate": 1.7311392405063293e-06, + "loss": 0.0109, + "step": 33175 + }, + { + "epoch": 18.04, + "learning_rate": 1.7248101265822787e-06, + "loss": 0.0095, + "step": 33200 + }, + { + "epoch": 18.04, + "learning_rate": 1.718481012658228e-06, + "loss": 0.0109, + "step": 33225 + }, + { + "epoch": 18.04, + "learning_rate": 1.7121518987341773e-06, + "loss": 0.0117, + "step": 33250 + }, + { + "epoch": 19.0, + "learning_rate": 1.7058227848101266e-06, + "loss": 0.0038, + "step": 33275 + }, + { + "epoch": 19.0, + "learning_rate": 1.699493670886076e-06, + "loss": 0.013, + "step": 33300 + }, + { + "epoch": 19.0, + "learning_rate": 1.6931645569620256e-06, + "loss": 0.0104, + "step": 33325 + }, + { + "epoch": 19.0, + "learning_rate": 1.6868354430379748e-06, + "loss": 0.0064, + "step": 33350 + }, + { + "epoch": 19.0, + "learning_rate": 1.6805063291139243e-06, + "loss": 0.0137, + "step": 33375 + }, + { + "epoch": 19.0, + "learning_rate": 1.6741772151898735e-06, + "loss": 0.012, + "step": 33400 + }, + { + "epoch": 19.0, + "learning_rate": 1.667848101265823e-06, + "loss": 0.0106, + "step": 33425 + }, + { + "epoch": 19.0, + "learning_rate": 1.6615189873417723e-06, + "loss": 0.0072, + "step": 33450 + }, + { + "epoch": 19.01, + "learning_rate": 1.6551898734177216e-06, + "loss": 0.0154, + "step": 33475 + }, + { + "epoch": 19.01, + "learning_rate": 1.648860759493671e-06, + "loss": 0.0129, + "step": 33500 + }, + { + "epoch": 19.01, + "learning_rate": 1.6425316455696206e-06, + "loss": 0.008, + "step": 33525 + }, + { + "epoch": 19.01, + "learning_rate": 1.6362025316455698e-06, + "loss": 0.0071, + "step": 33550 + }, + { + "epoch": 19.01, + "learning_rate": 1.6298734177215193e-06, + "loss": 0.0088, + "step": 33575 + }, + { + "epoch": 19.01, + "learning_rate": 1.6235443037974685e-06, + "loss": 0.0102, + "step": 33600 + }, + { + "epoch": 19.01, + "learning_rate": 1.617215189873418e-06, + "loss": 0.0064, + "step": 33625 + }, + { + "epoch": 19.01, + "learning_rate": 1.6108860759493671e-06, + "loss": 0.0098, + "step": 33650 + }, + { + "epoch": 19.01, + "learning_rate": 1.6045569620253166e-06, + "loss": 0.009, + "step": 33675 + }, + { + "epoch": 19.01, + "learning_rate": 1.5982278481012658e-06, + "loss": 0.0074, + "step": 33700 + }, + { + "epoch": 19.01, + "learning_rate": 1.5918987341772152e-06, + "loss": 0.0055, + "step": 33725 + }, + { + "epoch": 19.01, + "learning_rate": 1.5855696202531648e-06, + "loss": 0.0059, + "step": 33750 + }, + { + "epoch": 19.01, + "learning_rate": 1.579240506329114e-06, + "loss": 0.0102, + "step": 33775 + }, + { + "epoch": 19.01, + "learning_rate": 1.5729113924050635e-06, + "loss": 0.0111, + "step": 33800 + }, + { + "epoch": 19.01, + "learning_rate": 1.5665822784810127e-06, + "loss": 0.0033, + "step": 33825 + }, + { + "epoch": 19.01, + "learning_rate": 1.5602531645569621e-06, + "loss": 0.0057, + "step": 33850 + }, + { + "epoch": 19.02, + "learning_rate": 1.5539240506329115e-06, + "loss": 0.0074, + "step": 33875 + }, + { + "epoch": 19.02, + "learning_rate": 1.5475949367088608e-06, + "loss": 0.0056, + "step": 33900 + }, + { + "epoch": 19.02, + "learning_rate": 1.5412658227848102e-06, + "loss": 0.0053, + "step": 33925 + }, + { + "epoch": 19.02, + "learning_rate": 1.5349367088607598e-06, + "loss": 0.0049, + "step": 33950 + }, + { + "epoch": 19.02, + "learning_rate": 1.528607594936709e-06, + "loss": 0.0065, + "step": 33975 + }, + { + "epoch": 19.02, + "learning_rate": 1.5222784810126585e-06, + "loss": 0.007, + "step": 34000 + }, + { + "epoch": 19.02, + "eval_loss": 0.2660910487174988, + "eval_runtime": 1471.5922, + "eval_samples_per_second": 7.075, + "eval_steps_per_second": 0.442, + "eval_wer": 23.14834387551017, + "step": 34000 + }, + { + "epoch": 19.02, + "learning_rate": 1.5159493670886077e-06, + "loss": 0.0027, + "step": 34025 + }, + { + "epoch": 19.02, + "learning_rate": 1.5096202531645571e-06, + "loss": 0.0081, + "step": 34050 + }, + { + "epoch": 19.02, + "learning_rate": 1.5032911392405063e-06, + "loss": 0.0044, + "step": 34075 + }, + { + "epoch": 19.02, + "learning_rate": 1.4969620253164558e-06, + "loss": 0.0093, + "step": 34100 + }, + { + "epoch": 19.02, + "learning_rate": 1.490632911392405e-06, + "loss": 0.0067, + "step": 34125 + }, + { + "epoch": 19.02, + "learning_rate": 1.4843037974683546e-06, + "loss": 0.0047, + "step": 34150 + }, + { + "epoch": 19.02, + "learning_rate": 1.477974683544304e-06, + "loss": 0.0119, + "step": 34175 + }, + { + "epoch": 19.02, + "learning_rate": 1.4716455696202533e-06, + "loss": 0.0091, + "step": 34200 + }, + { + "epoch": 19.02, + "learning_rate": 1.4653164556962027e-06, + "loss": 0.0094, + "step": 34225 + }, + { + "epoch": 19.02, + "learning_rate": 1.458987341772152e-06, + "loss": 0.0069, + "step": 34250 + }, + { + "epoch": 19.03, + "learning_rate": 1.4526582278481013e-06, + "loss": 0.0066, + "step": 34275 + }, + { + "epoch": 19.03, + "learning_rate": 1.4463291139240508e-06, + "loss": 0.0072, + "step": 34300 + }, + { + "epoch": 19.03, + "learning_rate": 1.44e-06, + "loss": 0.0067, + "step": 34325 + }, + { + "epoch": 19.03, + "learning_rate": 1.4336708860759496e-06, + "loss": 0.0076, + "step": 34350 + }, + { + "epoch": 19.03, + "learning_rate": 1.427341772151899e-06, + "loss": 0.0083, + "step": 34375 + }, + { + "epoch": 19.03, + "learning_rate": 1.4212658227848103e-06, + "loss": 0.0072, + "step": 34400 + }, + { + "epoch": 19.03, + "learning_rate": 1.4149367088607597e-06, + "loss": 0.0086, + "step": 34425 + }, + { + "epoch": 19.03, + "learning_rate": 1.408607594936709e-06, + "loss": 0.0057, + "step": 34450 + }, + { + "epoch": 19.03, + "learning_rate": 1.4022784810126584e-06, + "loss": 0.0111, + "step": 34475 + }, + { + "epoch": 19.03, + "learning_rate": 1.3959493670886076e-06, + "loss": 0.01, + "step": 34500 + }, + { + "epoch": 19.03, + "learning_rate": 1.389620253164557e-06, + "loss": 0.008, + "step": 34525 + }, + { + "epoch": 19.03, + "learning_rate": 1.3832911392405066e-06, + "loss": 0.0082, + "step": 34550 + }, + { + "epoch": 19.03, + "learning_rate": 1.3769620253164559e-06, + "loss": 0.0062, + "step": 34575 + }, + { + "epoch": 19.03, + "learning_rate": 1.3706329113924053e-06, + "loss": 0.0052, + "step": 34600 + }, + { + "epoch": 19.03, + "learning_rate": 1.3643037974683545e-06, + "loss": 0.007, + "step": 34625 + }, + { + "epoch": 19.03, + "learning_rate": 1.357974683544304e-06, + "loss": 0.0045, + "step": 34650 + }, + { + "epoch": 19.04, + "learning_rate": 1.3516455696202531e-06, + "loss": 0.0089, + "step": 34675 + }, + { + "epoch": 19.04, + "learning_rate": 1.3453164556962026e-06, + "loss": 0.0081, + "step": 34700 + }, + { + "epoch": 19.04, + "learning_rate": 1.338987341772152e-06, + "loss": 0.0101, + "step": 34725 + }, + { + "epoch": 19.04, + "learning_rate": 1.3326582278481014e-06, + "loss": 0.0072, + "step": 34750 + }, + { + "epoch": 19.04, + "learning_rate": 1.3263291139240509e-06, + "loss": 0.005, + "step": 34775 + }, + { + "epoch": 19.04, + "learning_rate": 1.32e-06, + "loss": 0.0083, + "step": 34800 + }, + { + "epoch": 19.04, + "learning_rate": 1.3136708860759495e-06, + "loss": 0.0087, + "step": 34825 + }, + { + "epoch": 19.04, + "learning_rate": 1.307341772151899e-06, + "loss": 0.0067, + "step": 34850 + }, + { + "epoch": 19.04, + "learning_rate": 1.3010126582278481e-06, + "loss": 0.0048, + "step": 34875 + }, + { + "epoch": 19.04, + "learning_rate": 1.2946835443037976e-06, + "loss": 0.0065, + "step": 34900 + }, + { + "epoch": 19.04, + "learning_rate": 1.2883544303797468e-06, + "loss": 0.0076, + "step": 34925 + }, + { + "epoch": 19.04, + "learning_rate": 1.2820253164556964e-06, + "loss": 0.007, + "step": 34950 + }, + { + "epoch": 19.04, + "learning_rate": 1.2756962025316458e-06, + "loss": 0.0044, + "step": 34975 + }, + { + "epoch": 19.04, + "learning_rate": 1.269367088607595e-06, + "loss": 0.0082, + "step": 35000 + }, + { + "epoch": 19.04, + "eval_loss": 0.26719748973846436, + "eval_runtime": 1465.7868, + "eval_samples_per_second": 7.103, + "eval_steps_per_second": 0.444, + "eval_wer": 24.169498691035628, + "step": 35000 + }, + { + "epoch": 20.0, + "learning_rate": 1.2630379746835445e-06, + "loss": 0.0072, + "step": 35025 + }, + { + "epoch": 20.0, + "learning_rate": 1.2567088607594937e-06, + "loss": 0.0078, + "step": 35050 + }, + { + "epoch": 20.0, + "learning_rate": 1.2503797468354431e-06, + "loss": 0.0069, + "step": 35075 + }, + { + "epoch": 20.0, + "learning_rate": 1.2440506329113924e-06, + "loss": 0.0063, + "step": 35100 + }, + { + "epoch": 20.0, + "learning_rate": 1.237721518987342e-06, + "loss": 0.0073, + "step": 35125 + }, + { + "epoch": 20.0, + "learning_rate": 1.2313924050632912e-06, + "loss": 0.0076, + "step": 35150 + }, + { + "epoch": 20.0, + "learning_rate": 1.2250632911392406e-06, + "loss": 0.0084, + "step": 35175 + }, + { + "epoch": 20.0, + "learning_rate": 1.2187341772151899e-06, + "loss": 0.0061, + "step": 35200 + }, + { + "epoch": 20.01, + "learning_rate": 1.2124050632911393e-06, + "loss": 0.0074, + "step": 35225 + }, + { + "epoch": 20.01, + "learning_rate": 1.2060759493670887e-06, + "loss": 0.011, + "step": 35250 + }, + { + "epoch": 20.01, + "learning_rate": 1.1997468354430381e-06, + "loss": 0.0076, + "step": 35275 + }, + { + "epoch": 20.01, + "learning_rate": 1.1934177215189874e-06, + "loss": 0.007, + "step": 35300 + }, + { + "epoch": 20.01, + "learning_rate": 1.1870886075949368e-06, + "loss": 0.0074, + "step": 35325 + }, + { + "epoch": 20.01, + "learning_rate": 1.1807594936708862e-06, + "loss": 0.0077, + "step": 35350 + }, + { + "epoch": 20.01, + "learning_rate": 1.1744303797468354e-06, + "loss": 0.0074, + "step": 35375 + }, + { + "epoch": 20.01, + "learning_rate": 1.1681012658227848e-06, + "loss": 0.007, + "step": 35400 + }, + { + "epoch": 20.01, + "learning_rate": 1.1617721518987343e-06, + "loss": 0.0068, + "step": 35425 + }, + { + "epoch": 20.01, + "learning_rate": 1.1554430379746837e-06, + "loss": 0.0049, + "step": 35450 + }, + { + "epoch": 20.01, + "learning_rate": 1.149113924050633e-06, + "loss": 0.0074, + "step": 35475 + }, + { + "epoch": 20.01, + "learning_rate": 1.1427848101265823e-06, + "loss": 0.0097, + "step": 35500 + }, + { + "epoch": 20.01, + "learning_rate": 1.1364556962025318e-06, + "loss": 0.0065, + "step": 35525 + }, + { + "epoch": 20.01, + "learning_rate": 1.1301265822784812e-06, + "loss": 0.0056, + "step": 35550 + }, + { + "epoch": 20.01, + "learning_rate": 1.1237974683544304e-06, + "loss": 0.0056, + "step": 35575 + }, + { + "epoch": 20.01, + "learning_rate": 1.1174683544303798e-06, + "loss": 0.0056, + "step": 35600 + }, + { + "epoch": 20.02, + "learning_rate": 1.1111392405063293e-06, + "loss": 0.0104, + "step": 35625 + }, + { + "epoch": 20.02, + "learning_rate": 1.1048101265822787e-06, + "loss": 0.0071, + "step": 35650 + }, + { + "epoch": 20.02, + "learning_rate": 1.098481012658228e-06, + "loss": 0.0079, + "step": 35675 + }, + { + "epoch": 20.02, + "learning_rate": 1.0921518987341773e-06, + "loss": 0.0077, + "step": 35700 + }, + { + "epoch": 20.02, + "learning_rate": 1.0858227848101268e-06, + "loss": 0.0038, + "step": 35725 + }, + { + "epoch": 20.02, + "learning_rate": 1.079493670886076e-06, + "loss": 0.0035, + "step": 35750 + }, + { + "epoch": 20.02, + "learning_rate": 1.0731645569620254e-06, + "loss": 0.0047, + "step": 35775 + }, + { + "epoch": 20.02, + "learning_rate": 1.0668354430379746e-06, + "loss": 0.0079, + "step": 35800 + }, + { + "epoch": 20.02, + "learning_rate": 1.0605063291139243e-06, + "loss": 0.006, + "step": 35825 + }, + { + "epoch": 20.02, + "learning_rate": 1.0541772151898735e-06, + "loss": 0.0084, + "step": 35850 + }, + { + "epoch": 20.02, + "learning_rate": 1.047848101265823e-06, + "loss": 0.0073, + "step": 35875 + }, + { + "epoch": 20.02, + "learning_rate": 1.0415189873417721e-06, + "loss": 0.0049, + "step": 35900 + }, + { + "epoch": 20.02, + "learning_rate": 1.0351898734177216e-06, + "loss": 0.0114, + "step": 35925 + }, + { + "epoch": 20.02, + "learning_rate": 1.028860759493671e-06, + "loss": 0.0086, + "step": 35950 + }, + { + "epoch": 20.02, + "learning_rate": 1.0225316455696204e-06, + "loss": 0.0088, + "step": 35975 + }, + { + "epoch": 20.02, + "learning_rate": 1.0162025316455696e-06, + "loss": 0.0054, + "step": 36000 + }, + { + "epoch": 20.02, + "eval_loss": 0.26699596643447876, + "eval_runtime": 1450.3878, + "eval_samples_per_second": 7.178, + "eval_steps_per_second": 0.449, + "eval_wer": 22.689799834143646, + "step": 36000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.66167473504256e+20, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-36000/training_args.bin b/checkpoint-36000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-36000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-37000/config.json b/checkpoint-37000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-37000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-37000/generation_config.json b/checkpoint-37000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-37000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-37000/optimizer.pt b/checkpoint-37000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e9eec20bd8455995853212fa0915b70125adc94 --- /dev/null +++ b/checkpoint-37000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cf729fd1d299423d1a22a0b908928e021304f5de6e4bf0e4c3ccc9c714d2cea +size 1934161093 diff --git a/checkpoint-37000/preprocessor_config.json b/checkpoint-37000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-37000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-37000/pytorch_model.bin b/checkpoint-37000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..e9d9a75218641894b455bd7e459de50ad4b03524 --- /dev/null +++ b/checkpoint-37000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db29bff28b200eb158b9e6c2c70f6ac9d41aaba1ec850753562c70593293339e +size 967102601 diff --git a/checkpoint-37000/rng_state.pth b/checkpoint-37000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f998d04f642874b446678e0147836a9bbeb0175c --- /dev/null +++ b/checkpoint-37000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac5c0df19a5c48b1e514e60412b8d9721ee00284e4adce3393fefcffe27ef5db +size 14639 diff --git a/checkpoint-37000/scaler.pt b/checkpoint-37000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d48283ff866b9a4a933da37337d670d9bb6d5675 --- /dev/null +++ b/checkpoint-37000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b00ce69f284be57536c96f28c8d97853890edeafd60730feecc5944ddf84ac5 +size 557 diff --git a/checkpoint-37000/scheduler.pt b/checkpoint-37000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ec6a651deaec9d705952c4fa948758c319b13c1 --- /dev/null +++ b/checkpoint-37000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73c926f3c1486f184a02d9a54797c252996dc56d6c54487fef7d7bdfbc7932ad +size 627 diff --git a/checkpoint-37000/trainer_state.json b/checkpoint-37000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7377b390e7b86b1c8da89f73625d32de02c77e06 --- /dev/null +++ b/checkpoint-37000/trainer_state.json @@ -0,0 +1,9229 @@ +{ + "best_metric": 22.689799834143646, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-36000", + "epoch": 21.005725, + "global_step": 37000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + }, + { + "epoch": 7.02, + "learning_rate": 6.830632911392405e-06, + "loss": 0.0159, + "step": 13025 + }, + { + "epoch": 7.02, + "learning_rate": 6.824303797468354e-06, + "loss": 0.0186, + "step": 13050 + }, + { + "epoch": 7.02, + "learning_rate": 6.817974683544304e-06, + "loss": 0.0246, + "step": 13075 + }, + { + "epoch": 7.02, + "learning_rate": 6.811645569620253e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 7.02, + "learning_rate": 6.805316455696203e-06, + "loss": 0.0287, + "step": 13125 + }, + { + "epoch": 7.02, + "learning_rate": 6.798987341772152e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 7.02, + "learning_rate": 6.792658227848102e-06, + "loss": 0.035, + "step": 13175 + }, + { + "epoch": 7.02, + "learning_rate": 6.786329113924052e-06, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 7.02, + "learning_rate": 6.780000000000001e-06, + "loss": 0.0283, + "step": 13225 + }, + { + "epoch": 7.02, + "learning_rate": 6.773670886075951e-06, + "loss": 0.0203, + "step": 13250 + }, + { + "epoch": 7.03, + "learning_rate": 6.7673417721519e-06, + "loss": 0.0193, + "step": 13275 + }, + { + "epoch": 7.03, + "learning_rate": 6.7610126582278494e-06, + "loss": 0.0174, + "step": 13300 + }, + { + "epoch": 7.03, + "learning_rate": 6.754683544303798e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 7.03, + "learning_rate": 6.7483544303797474e-06, + "loss": 0.023, + "step": 13350 + }, + { + "epoch": 7.03, + "learning_rate": 6.742025316455697e-06, + "loss": 0.0266, + "step": 13375 + }, + { + "epoch": 7.03, + "learning_rate": 6.735696202531646e-06, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 7.03, + "learning_rate": 6.729367088607596e-06, + "loss": 0.0164, + "step": 13425 + }, + { + "epoch": 7.03, + "learning_rate": 6.723037974683545e-06, + "loss": 0.019, + "step": 13450 + }, + { + "epoch": 7.03, + "learning_rate": 6.716708860759495e-06, + "loss": 0.0209, + "step": 13475 + }, + { + "epoch": 7.03, + "learning_rate": 6.710379746835444e-06, + "loss": 0.0241, + "step": 13500 + }, + { + "epoch": 7.03, + "learning_rate": 6.7040506329113934e-06, + "loss": 0.0233, + "step": 13525 + }, + { + "epoch": 7.03, + "learning_rate": 6.697721518987342e-06, + "loss": 0.0204, + "step": 13550 + }, + { + "epoch": 7.03, + "learning_rate": 6.6913924050632914e-06, + "loss": 0.0131, + "step": 13575 + }, + { + "epoch": 7.03, + "learning_rate": 6.685063291139241e-06, + "loss": 0.0268, + "step": 13600 + }, + { + "epoch": 7.03, + "learning_rate": 6.67873417721519e-06, + "loss": 0.0156, + "step": 13625 + }, + { + "epoch": 7.03, + "learning_rate": 6.67240506329114e-06, + "loss": 0.0205, + "step": 13650 + }, + { + "epoch": 7.04, + "learning_rate": 6.666075949367089e-06, + "loss": 0.0217, + "step": 13675 + }, + { + "epoch": 7.04, + "learning_rate": 6.659746835443039e-06, + "loss": 0.0258, + "step": 13700 + }, + { + "epoch": 7.04, + "learning_rate": 6.653417721518988e-06, + "loss": 0.0211, + "step": 13725 + }, + { + "epoch": 7.04, + "learning_rate": 6.647088607594937e-06, + "loss": 0.0188, + "step": 13750 + }, + { + "epoch": 7.04, + "learning_rate": 6.640759493670886e-06, + "loss": 0.0234, + "step": 13775 + }, + { + "epoch": 7.04, + "learning_rate": 6.6344303797468355e-06, + "loss": 0.0202, + "step": 13800 + }, + { + "epoch": 7.04, + "learning_rate": 6.628101265822785e-06, + "loss": 0.0251, + "step": 13825 + }, + { + "epoch": 7.04, + "learning_rate": 6.621772151898734e-06, + "loss": 0.017, + "step": 13850 + }, + { + "epoch": 7.04, + "learning_rate": 6.615443037974684e-06, + "loss": 0.0254, + "step": 13875 + }, + { + "epoch": 7.04, + "learning_rate": 6.609113924050633e-06, + "loss": 0.0289, + "step": 13900 + }, + { + "epoch": 7.04, + "learning_rate": 6.602784810126583e-06, + "loss": 0.0238, + "step": 13925 + }, + { + "epoch": 7.04, + "learning_rate": 6.596455696202532e-06, + "loss": 0.0218, + "step": 13950 + }, + { + "epoch": 7.04, + "learning_rate": 6.590126582278481e-06, + "loss": 0.0289, + "step": 13975 + }, + { + "epoch": 7.04, + "learning_rate": 6.583797468354432e-06, + "loss": 0.0271, + "step": 14000 + }, + { + "epoch": 7.04, + "eval_loss": 0.26684999465942383, + "eval_runtime": 1448.0228, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "eval_wer": 42.464105107400115, + "step": 14000 + }, + { + "epoch": 8.0, + "learning_rate": 6.577468354430381e-06, + "loss": 0.0292, + "step": 14025 + }, + { + "epoch": 8.0, + "learning_rate": 6.57113924050633e-06, + "loss": 0.0192, + "step": 14050 + }, + { + "epoch": 8.0, + "learning_rate": 6.564810126582279e-06, + "loss": 0.027, + "step": 14075 + }, + { + "epoch": 8.0, + "learning_rate": 6.558481012658229e-06, + "loss": 0.03, + "step": 14100 + }, + { + "epoch": 8.0, + "learning_rate": 6.552151898734178e-06, + "loss": 0.0375, + "step": 14125 + }, + { + "epoch": 8.0, + "learning_rate": 6.5458227848101274e-06, + "loss": 0.0338, + "step": 14150 + }, + { + "epoch": 8.0, + "learning_rate": 6.539493670886077e-06, + "loss": 0.0305, + "step": 14175 + }, + { + "epoch": 8.0, + "learning_rate": 6.533164556962026e-06, + "loss": 0.0266, + "step": 14200 + }, + { + "epoch": 8.01, + "learning_rate": 6.526835443037976e-06, + "loss": 0.0378, + "step": 14225 + }, + { + "epoch": 8.01, + "learning_rate": 6.520506329113925e-06, + "loss": 0.0256, + "step": 14250 + }, + { + "epoch": 8.01, + "learning_rate": 6.514177215189874e-06, + "loss": 0.0203, + "step": 14275 + }, + { + "epoch": 8.01, + "learning_rate": 6.507848101265823e-06, + "loss": 0.0212, + "step": 14300 + }, + { + "epoch": 8.01, + "learning_rate": 6.501518987341773e-06, + "loss": 0.0204, + "step": 14325 + }, + { + "epoch": 8.01, + "learning_rate": 6.495189873417722e-06, + "loss": 0.0259, + "step": 14350 + }, + { + "epoch": 8.01, + "learning_rate": 6.4888607594936714e-06, + "loss": 0.0214, + "step": 14375 + }, + { + "epoch": 8.01, + "learning_rate": 6.482531645569621e-06, + "loss": 0.021, + "step": 14400 + }, + { + "epoch": 8.01, + "learning_rate": 6.47620253164557e-06, + "loss": 0.0241, + "step": 14425 + }, + { + "epoch": 8.01, + "learning_rate": 6.46987341772152e-06, + "loss": 0.0216, + "step": 14450 + }, + { + "epoch": 8.01, + "learning_rate": 6.463544303797469e-06, + "loss": 0.0209, + "step": 14475 + }, + { + "epoch": 8.01, + "learning_rate": 6.457215189873418e-06, + "loss": 0.0292, + "step": 14500 + }, + { + "epoch": 8.01, + "learning_rate": 6.450886075949367e-06, + "loss": 0.0286, + "step": 14525 + }, + { + "epoch": 8.01, + "learning_rate": 6.444556962025317e-06, + "loss": 0.0218, + "step": 14550 + }, + { + "epoch": 8.01, + "learning_rate": 6.438227848101266e-06, + "loss": 0.0209, + "step": 14575 + }, + { + "epoch": 8.01, + "learning_rate": 6.4318987341772154e-06, + "loss": 0.0244, + "step": 14600 + }, + { + "epoch": 8.02, + "learning_rate": 6.425569620253165e-06, + "loss": 0.0212, + "step": 14625 + }, + { + "epoch": 8.02, + "learning_rate": 6.419240506329114e-06, + "loss": 0.0155, + "step": 14650 + }, + { + "epoch": 8.02, + "learning_rate": 6.412911392405064e-06, + "loss": 0.0209, + "step": 14675 + }, + { + "epoch": 8.02, + "learning_rate": 6.406582278481013e-06, + "loss": 0.0203, + "step": 14700 + }, + { + "epoch": 8.02, + "learning_rate": 6.400253164556962e-06, + "loss": 0.0192, + "step": 14725 + }, + { + "epoch": 8.02, + "learning_rate": 6.393924050632911e-06, + "loss": 0.0216, + "step": 14750 + }, + { + "epoch": 8.02, + "learning_rate": 6.387594936708861e-06, + "loss": 0.012, + "step": 14775 + }, + { + "epoch": 8.02, + "learning_rate": 6.38126582278481e-06, + "loss": 0.0241, + "step": 14800 + }, + { + "epoch": 8.02, + "learning_rate": 6.37493670886076e-06, + "loss": 0.0192, + "step": 14825 + }, + { + "epoch": 8.02, + "learning_rate": 6.36860759493671e-06, + "loss": 0.0202, + "step": 14850 + }, + { + "epoch": 8.02, + "learning_rate": 6.362278481012659e-06, + "loss": 0.0257, + "step": 14875 + }, + { + "epoch": 8.02, + "learning_rate": 6.3559493670886086e-06, + "loss": 0.0218, + "step": 14900 + }, + { + "epoch": 8.02, + "learning_rate": 6.349620253164558e-06, + "loss": 0.0232, + "step": 14925 + }, + { + "epoch": 8.02, + "learning_rate": 6.3432911392405074e-06, + "loss": 0.03, + "step": 14950 + }, + { + "epoch": 8.02, + "learning_rate": 6.337215189873418e-06, + "loss": 0.0252, + "step": 14975 + }, + { + "epoch": 8.02, + "learning_rate": 6.3308860759493675e-06, + "loss": 0.0176, + "step": 15000 + }, + { + "epoch": 8.02, + "eval_loss": 0.2596448063850403, + "eval_runtime": 1417.6481, + "eval_samples_per_second": 7.344, + "eval_steps_per_second": 0.459, + "eval_wer": 28.52241499861786, + "step": 15000 + }, + { + "epoch": 8.03, + "learning_rate": 6.324556962025317e-06, + "loss": 0.0166, + "step": 15025 + }, + { + "epoch": 8.03, + "learning_rate": 6.318227848101266e-06, + "loss": 0.017, + "step": 15050 + }, + { + "epoch": 8.03, + "learning_rate": 6.311898734177216e-06, + "loss": 0.0291, + "step": 15075 + }, + { + "epoch": 8.03, + "learning_rate": 6.305569620253164e-06, + "loss": 0.0149, + "step": 15100 + }, + { + "epoch": 8.03, + "learning_rate": 6.299240506329114e-06, + "loss": 0.0223, + "step": 15125 + }, + { + "epoch": 8.03, + "learning_rate": 6.292911392405063e-06, + "loss": 0.0212, + "step": 15150 + }, + { + "epoch": 8.03, + "learning_rate": 6.286582278481013e-06, + "loss": 0.0125, + "step": 15175 + }, + { + "epoch": 8.03, + "learning_rate": 6.280253164556962e-06, + "loss": 0.0169, + "step": 15200 + }, + { + "epoch": 8.03, + "learning_rate": 6.273924050632912e-06, + "loss": 0.0244, + "step": 15225 + }, + { + "epoch": 8.03, + "learning_rate": 6.267594936708862e-06, + "loss": 0.015, + "step": 15250 + }, + { + "epoch": 8.03, + "learning_rate": 6.261265822784811e-06, + "loss": 0.0171, + "step": 15275 + }, + { + "epoch": 8.03, + "learning_rate": 6.254936708860761e-06, + "loss": 0.017, + "step": 15300 + }, + { + "epoch": 8.03, + "learning_rate": 6.24860759493671e-06, + "loss": 0.0162, + "step": 15325 + }, + { + "epoch": 8.03, + "learning_rate": 6.2422784810126594e-06, + "loss": 0.0133, + "step": 15350 + }, + { + "epoch": 8.03, + "learning_rate": 6.235949367088608e-06, + "loss": 0.0139, + "step": 15375 + }, + { + "epoch": 8.03, + "learning_rate": 6.2296202531645575e-06, + "loss": 0.0177, + "step": 15400 + }, + { + "epoch": 8.04, + "learning_rate": 6.223291139240507e-06, + "loss": 0.0207, + "step": 15425 + }, + { + "epoch": 8.04, + "learning_rate": 6.216962025316456e-06, + "loss": 0.0192, + "step": 15450 + }, + { + "epoch": 8.04, + "learning_rate": 6.210632911392406e-06, + "loss": 0.0176, + "step": 15475 + }, + { + "epoch": 8.04, + "learning_rate": 6.204303797468355e-06, + "loss": 0.0133, + "step": 15500 + }, + { + "epoch": 8.04, + "learning_rate": 6.197974683544305e-06, + "loss": 0.0199, + "step": 15525 + }, + { + "epoch": 8.04, + "learning_rate": 6.191645569620254e-06, + "loss": 0.0135, + "step": 15550 + }, + { + "epoch": 8.04, + "learning_rate": 6.1853164556962035e-06, + "loss": 0.0121, + "step": 15575 + }, + { + "epoch": 8.04, + "learning_rate": 6.178987341772152e-06, + "loss": 0.0152, + "step": 15600 + }, + { + "epoch": 8.04, + "learning_rate": 6.1726582278481015e-06, + "loss": 0.0201, + "step": 15625 + }, + { + "epoch": 8.04, + "learning_rate": 6.166329113924051e-06, + "loss": 0.0226, + "step": 15650 + }, + { + "epoch": 8.04, + "learning_rate": 6.16e-06, + "loss": 0.0222, + "step": 15675 + }, + { + "epoch": 8.04, + "learning_rate": 6.15367088607595e-06, + "loss": 0.0184, + "step": 15700 + }, + { + "epoch": 8.04, + "learning_rate": 6.147341772151899e-06, + "loss": 0.0219, + "step": 15725 + }, + { + "epoch": 8.04, + "learning_rate": 6.141012658227849e-06, + "loss": 0.0219, + "step": 15750 + }, + { + "epoch": 9.0, + "learning_rate": 6.134683544303798e-06, + "loss": 0.0208, + "step": 15775 + }, + { + "epoch": 9.0, + "learning_rate": 6.1283544303797475e-06, + "loss": 0.0143, + "step": 15800 + }, + { + "epoch": 9.0, + "learning_rate": 6.122025316455696e-06, + "loss": 0.0254, + "step": 15825 + }, + { + "epoch": 9.0, + "learning_rate": 6.1156962025316455e-06, + "loss": 0.0287, + "step": 15850 + }, + { + "epoch": 9.0, + "learning_rate": 6.109367088607595e-06, + "loss": 0.0252, + "step": 15875 + }, + { + "epoch": 9.0, + "learning_rate": 6.103037974683544e-06, + "loss": 0.0275, + "step": 15900 + }, + { + "epoch": 9.0, + "learning_rate": 6.096708860759494e-06, + "loss": 0.0193, + "step": 15925 + }, + { + "epoch": 9.0, + "learning_rate": 6.090379746835443e-06, + "loss": 0.0255, + "step": 15950 + }, + { + "epoch": 9.01, + "learning_rate": 6.084050632911393e-06, + "loss": 0.0248, + "step": 15975 + }, + { + "epoch": 9.01, + "learning_rate": 6.077721518987342e-06, + "loss": 0.0328, + "step": 16000 + }, + { + "epoch": 9.01, + "eval_loss": 0.2512108087539673, + "eval_runtime": 1474.2883, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.442, + "eval_wer": 35.66236849379665, + "step": 16000 + }, + { + "epoch": 9.01, + "learning_rate": 6.0713924050632915e-06, + "loss": 0.0181, + "step": 16025 + }, + { + "epoch": 9.01, + "learning_rate": 6.065063291139242e-06, + "loss": 0.0134, + "step": 16050 + }, + { + "epoch": 9.01, + "learning_rate": 6.058734177215191e-06, + "loss": 0.0203, + "step": 16075 + }, + { + "epoch": 9.01, + "learning_rate": 6.052405063291141e-06, + "loss": 0.0224, + "step": 16100 + }, + { + "epoch": 9.01, + "learning_rate": 6.046075949367089e-06, + "loss": 0.0164, + "step": 16125 + }, + { + "epoch": 9.01, + "learning_rate": 6.039746835443039e-06, + "loss": 0.019, + "step": 16150 + }, + { + "epoch": 9.01, + "learning_rate": 6.033417721518988e-06, + "loss": 0.0152, + "step": 16175 + }, + { + "epoch": 9.01, + "learning_rate": 6.0270886075949374e-06, + "loss": 0.0197, + "step": 16200 + }, + { + "epoch": 9.01, + "learning_rate": 6.020759493670887e-06, + "loss": 0.0188, + "step": 16225 + }, + { + "epoch": 9.01, + "learning_rate": 6.014430379746836e-06, + "loss": 0.0245, + "step": 16250 + }, + { + "epoch": 9.01, + "learning_rate": 6.008101265822786e-06, + "loss": 0.0202, + "step": 16275 + }, + { + "epoch": 9.01, + "learning_rate": 6.001772151898735e-06, + "loss": 0.0184, + "step": 16300 + }, + { + "epoch": 9.01, + "learning_rate": 5.995443037974685e-06, + "loss": 0.0158, + "step": 16325 + }, + { + "epoch": 9.01, + "learning_rate": 5.989113924050633e-06, + "loss": 0.0175, + "step": 16350 + }, + { + "epoch": 9.02, + "learning_rate": 5.982784810126583e-06, + "loss": 0.014, + "step": 16375 + }, + { + "epoch": 9.02, + "learning_rate": 5.976455696202532e-06, + "loss": 0.0149, + "step": 16400 + }, + { + "epoch": 9.02, + "learning_rate": 5.9701265822784815e-06, + "loss": 0.0188, + "step": 16425 + }, + { + "epoch": 9.02, + "learning_rate": 5.963797468354431e-06, + "loss": 0.0215, + "step": 16450 + }, + { + "epoch": 9.02, + "learning_rate": 5.95746835443038e-06, + "loss": 0.017, + "step": 16475 + }, + { + "epoch": 9.02, + "learning_rate": 5.95113924050633e-06, + "loss": 0.0095, + "step": 16500 + }, + { + "epoch": 9.02, + "learning_rate": 5.944810126582279e-06, + "loss": 0.0112, + "step": 16525 + }, + { + "epoch": 9.02, + "learning_rate": 5.938481012658229e-06, + "loss": 0.0198, + "step": 16550 + }, + { + "epoch": 9.02, + "learning_rate": 5.932151898734177e-06, + "loss": 0.0252, + "step": 16575 + }, + { + "epoch": 9.02, + "learning_rate": 5.925822784810127e-06, + "loss": 0.0226, + "step": 16600 + }, + { + "epoch": 9.02, + "learning_rate": 5.919493670886076e-06, + "loss": 0.0167, + "step": 16625 + }, + { + "epoch": 9.02, + "learning_rate": 5.9131645569620255e-06, + "loss": 0.0259, + "step": 16650 + }, + { + "epoch": 9.02, + "learning_rate": 5.906835443037975e-06, + "loss": 0.0229, + "step": 16675 + }, + { + "epoch": 9.02, + "learning_rate": 5.900506329113924e-06, + "loss": 0.0238, + "step": 16700 + }, + { + "epoch": 9.02, + "learning_rate": 5.894177215189874e-06, + "loss": 0.02, + "step": 16725 + }, + { + "epoch": 9.02, + "learning_rate": 5.887848101265823e-06, + "loss": 0.0182, + "step": 16750 + }, + { + "epoch": 9.03, + "learning_rate": 5.881518987341773e-06, + "loss": 0.0157, + "step": 16775 + }, + { + "epoch": 9.03, + "learning_rate": 5.875189873417721e-06, + "loss": 0.0145, + "step": 16800 + }, + { + "epoch": 9.03, + "learning_rate": 5.868860759493671e-06, + "loss": 0.0166, + "step": 16825 + }, + { + "epoch": 9.03, + "learning_rate": 5.86253164556962e-06, + "loss": 0.0108, + "step": 16850 + }, + { + "epoch": 9.03, + "learning_rate": 5.85620253164557e-06, + "loss": 0.014, + "step": 16875 + }, + { + "epoch": 9.03, + "learning_rate": 5.84987341772152e-06, + "loss": 0.0167, + "step": 16900 + }, + { + "epoch": 9.03, + "learning_rate": 5.843544303797469e-06, + "loss": 0.0121, + "step": 16925 + }, + { + "epoch": 9.03, + "learning_rate": 5.837215189873419e-06, + "loss": 0.0149, + "step": 16950 + }, + { + "epoch": 9.03, + "learning_rate": 5.831139240506329e-06, + "loss": 0.0143, + "step": 16975 + }, + { + "epoch": 9.03, + "learning_rate": 5.824810126582279e-06, + "loss": 0.0139, + "step": 17000 + }, + { + "epoch": 9.03, + "eval_loss": 0.2688957154750824, + "eval_runtime": 1423.1196, + "eval_samples_per_second": 7.316, + "eval_steps_per_second": 0.457, + "eval_wer": 25.920746678807788, + "step": 17000 + }, + { + "epoch": 9.03, + "learning_rate": 5.818481012658228e-06, + "loss": 0.0169, + "step": 17025 + }, + { + "epoch": 9.03, + "learning_rate": 5.8121518987341775e-06, + "loss": 0.013, + "step": 17050 + }, + { + "epoch": 9.03, + "learning_rate": 5.805822784810127e-06, + "loss": 0.0123, + "step": 17075 + }, + { + "epoch": 9.03, + "learning_rate": 5.799493670886076e-06, + "loss": 0.0161, + "step": 17100 + }, + { + "epoch": 9.03, + "learning_rate": 5.793164556962026e-06, + "loss": 0.0114, + "step": 17125 + }, + { + "epoch": 9.03, + "learning_rate": 5.786835443037974e-06, + "loss": 0.0194, + "step": 17150 + }, + { + "epoch": 9.04, + "learning_rate": 5.780506329113924e-06, + "loss": 0.0179, + "step": 17175 + }, + { + "epoch": 9.04, + "learning_rate": 5.774177215189873e-06, + "loss": 0.0165, + "step": 17200 + }, + { + "epoch": 9.04, + "learning_rate": 5.767848101265823e-06, + "loss": 0.0165, + "step": 17225 + }, + { + "epoch": 9.04, + "learning_rate": 5.761518987341773e-06, + "loss": 0.0125, + "step": 17250 + }, + { + "epoch": 9.04, + "learning_rate": 5.755189873417722e-06, + "loss": 0.0172, + "step": 17275 + }, + { + "epoch": 9.04, + "learning_rate": 5.748860759493672e-06, + "loss": 0.0158, + "step": 17300 + }, + { + "epoch": 9.04, + "learning_rate": 5.742531645569621e-06, + "loss": 0.0174, + "step": 17325 + }, + { + "epoch": 9.04, + "learning_rate": 5.736202531645571e-06, + "loss": 0.0162, + "step": 17350 + }, + { + "epoch": 9.04, + "learning_rate": 5.72987341772152e-06, + "loss": 0.0203, + "step": 17375 + }, + { + "epoch": 9.04, + "learning_rate": 5.7235443037974695e-06, + "loss": 0.0157, + "step": 17400 + }, + { + "epoch": 9.04, + "learning_rate": 5.717215189873419e-06, + "loss": 0.018, + "step": 17425 + }, + { + "epoch": 9.04, + "learning_rate": 5.7108860759493675e-06, + "loss": 0.0199, + "step": 17450 + }, + { + "epoch": 9.04, + "learning_rate": 5.704556962025317e-06, + "loss": 0.0153, + "step": 17475 + }, + { + "epoch": 9.04, + "learning_rate": 5.698227848101266e-06, + "loss": 0.0234, + "step": 17500 + }, + { + "epoch": 10.0, + "learning_rate": 5.691898734177216e-06, + "loss": 0.0168, + "step": 17525 + }, + { + "epoch": 10.0, + "learning_rate": 5.685569620253165e-06, + "loss": 0.0214, + "step": 17550 + }, + { + "epoch": 10.0, + "learning_rate": 5.679240506329115e-06, + "loss": 0.0185, + "step": 17575 + }, + { + "epoch": 10.0, + "learning_rate": 5.672911392405064e-06, + "loss": 0.0228, + "step": 17600 + }, + { + "epoch": 10.0, + "learning_rate": 5.6665822784810135e-06, + "loss": 0.024, + "step": 17625 + }, + { + "epoch": 10.0, + "learning_rate": 5.660253164556963e-06, + "loss": 0.0235, + "step": 17650 + }, + { + "epoch": 10.0, + "learning_rate": 5.6539240506329115e-06, + "loss": 0.0262, + "step": 17675 + }, + { + "epoch": 10.0, + "learning_rate": 5.647594936708861e-06, + "loss": 0.0214, + "step": 17700 + }, + { + "epoch": 10.01, + "learning_rate": 5.64126582278481e-06, + "loss": 0.0279, + "step": 17725 + }, + { + "epoch": 10.01, + "learning_rate": 5.63493670886076e-06, + "loss": 0.0248, + "step": 17750 + }, + { + "epoch": 10.01, + "learning_rate": 5.628607594936709e-06, + "loss": 0.0155, + "step": 17775 + }, + { + "epoch": 10.01, + "learning_rate": 5.622278481012659e-06, + "loss": 0.017, + "step": 17800 + }, + { + "epoch": 10.01, + "learning_rate": 5.615949367088608e-06, + "loss": 0.0205, + "step": 17825 + }, + { + "epoch": 10.01, + "learning_rate": 5.6096202531645575e-06, + "loss": 0.021, + "step": 17850 + }, + { + "epoch": 10.01, + "learning_rate": 5.603291139240507e-06, + "loss": 0.0178, + "step": 17875 + }, + { + "epoch": 10.01, + "learning_rate": 5.5969620253164555e-06, + "loss": 0.0169, + "step": 17900 + }, + { + "epoch": 10.01, + "learning_rate": 5.590632911392405e-06, + "loss": 0.0158, + "step": 17925 + }, + { + "epoch": 10.01, + "learning_rate": 5.584303797468354e-06, + "loss": 0.0195, + "step": 17950 + }, + { + "epoch": 10.01, + "learning_rate": 5.577974683544304e-06, + "loss": 0.0195, + "step": 17975 + }, + { + "epoch": 10.01, + "learning_rate": 5.571645569620253e-06, + "loss": 0.0187, + "step": 18000 + }, + { + "epoch": 10.01, + "eval_loss": 0.2605174481868744, + "eval_runtime": 1553.7854, + "eval_samples_per_second": 6.7, + "eval_steps_per_second": 0.419, + "eval_wer": 25.330493178750874, + "step": 18000 + }, + { + "epoch": 10.01, + "learning_rate": 5.565316455696203e-06, + "loss": 0.0214, + "step": 18025 + }, + { + "epoch": 10.01, + "learning_rate": 5.558987341772152e-06, + "loss": 0.0193, + "step": 18050 + }, + { + "epoch": 10.01, + "learning_rate": 5.552658227848102e-06, + "loss": 0.0168, + "step": 18075 + }, + { + "epoch": 10.01, + "learning_rate": 5.546329113924052e-06, + "loss": 0.0147, + "step": 18100 + }, + { + "epoch": 10.02, + "learning_rate": 5.540000000000001e-06, + "loss": 0.0192, + "step": 18125 + }, + { + "epoch": 10.02, + "learning_rate": 5.533670886075951e-06, + "loss": 0.014, + "step": 18150 + }, + { + "epoch": 10.02, + "learning_rate": 5.527341772151899e-06, + "loss": 0.0163, + "step": 18175 + }, + { + "epoch": 10.02, + "learning_rate": 5.521012658227849e-06, + "loss": 0.0139, + "step": 18200 + }, + { + "epoch": 10.02, + "learning_rate": 5.514683544303798e-06, + "loss": 0.0168, + "step": 18225 + }, + { + "epoch": 10.02, + "learning_rate": 5.5083544303797475e-06, + "loss": 0.0118, + "step": 18250 + }, + { + "epoch": 10.02, + "learning_rate": 5.502025316455697e-06, + "loss": 0.0119, + "step": 18275 + }, + { + "epoch": 10.02, + "learning_rate": 5.495696202531646e-06, + "loss": 0.0136, + "step": 18300 + }, + { + "epoch": 10.02, + "learning_rate": 5.489367088607596e-06, + "loss": 0.0132, + "step": 18325 + }, + { + "epoch": 10.02, + "learning_rate": 5.483037974683545e-06, + "loss": 0.0148, + "step": 18350 + }, + { + "epoch": 10.02, + "learning_rate": 5.476708860759495e-06, + "loss": 0.015, + "step": 18375 + }, + { + "epoch": 10.02, + "learning_rate": 5.470379746835443e-06, + "loss": 0.0154, + "step": 18400 + }, + { + "epoch": 10.02, + "learning_rate": 5.464050632911393e-06, + "loss": 0.0139, + "step": 18425 + }, + { + "epoch": 10.02, + "learning_rate": 5.457721518987342e-06, + "loss": 0.0239, + "step": 18450 + }, + { + "epoch": 10.02, + "learning_rate": 5.4513924050632915e-06, + "loss": 0.0203, + "step": 18475 + }, + { + "epoch": 10.02, + "learning_rate": 5.445063291139241e-06, + "loss": 0.0199, + "step": 18500 + }, + { + "epoch": 10.03, + "learning_rate": 5.43873417721519e-06, + "loss": 0.013, + "step": 18525 + }, + { + "epoch": 10.03, + "learning_rate": 5.43240506329114e-06, + "loss": 0.0128, + "step": 18550 + }, + { + "epoch": 10.03, + "learning_rate": 5.426075949367089e-06, + "loss": 0.0161, + "step": 18575 + }, + { + "epoch": 10.03, + "learning_rate": 5.419746835443039e-06, + "loss": 0.0129, + "step": 18600 + }, + { + "epoch": 10.03, + "learning_rate": 5.413417721518987e-06, + "loss": 0.0124, + "step": 18625 + }, + { + "epoch": 10.03, + "learning_rate": 5.407088607594937e-06, + "loss": 0.0161, + "step": 18650 + }, + { + "epoch": 10.03, + "learning_rate": 5.400759493670886e-06, + "loss": 0.0104, + "step": 18675 + }, + { + "epoch": 10.03, + "learning_rate": 5.3944303797468355e-06, + "loss": 0.0109, + "step": 18700 + }, + { + "epoch": 10.03, + "learning_rate": 5.388101265822785e-06, + "loss": 0.0128, + "step": 18725 + }, + { + "epoch": 10.03, + "learning_rate": 5.381772151898734e-06, + "loss": 0.014, + "step": 18750 + }, + { + "epoch": 10.03, + "learning_rate": 5.375443037974684e-06, + "loss": 0.0159, + "step": 18775 + }, + { + "epoch": 10.03, + "learning_rate": 5.369113924050633e-06, + "loss": 0.017, + "step": 18800 + }, + { + "epoch": 10.03, + "learning_rate": 5.362784810126583e-06, + "loss": 0.012, + "step": 18825 + }, + { + "epoch": 10.03, + "learning_rate": 5.356455696202531e-06, + "loss": 0.0165, + "step": 18850 + }, + { + "epoch": 10.03, + "learning_rate": 5.350126582278481e-06, + "loss": 0.0153, + "step": 18875 + }, + { + "epoch": 10.03, + "learning_rate": 5.343797468354432e-06, + "loss": 0.0182, + "step": 18900 + }, + { + "epoch": 10.04, + "learning_rate": 5.33746835443038e-06, + "loss": 0.0194, + "step": 18925 + }, + { + "epoch": 10.04, + "learning_rate": 5.33113924050633e-06, + "loss": 0.0144, + "step": 18950 + }, + { + "epoch": 10.04, + "learning_rate": 5.324810126582279e-06, + "loss": 0.0157, + "step": 18975 + }, + { + "epoch": 10.04, + "learning_rate": 5.318481012658229e-06, + "loss": 0.0111, + "step": 19000 + }, + { + "epoch": 10.04, + "eval_loss": 0.2696678638458252, + "eval_runtime": 1463.5986, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.445, + "eval_wer": 24.79877721588969, + "step": 19000 + }, + { + "epoch": 10.04, + "learning_rate": 5.312151898734178e-06, + "loss": 0.0176, + "step": 19025 + }, + { + "epoch": 10.04, + "learning_rate": 5.3058227848101275e-06, + "loss": 0.0144, + "step": 19050 + }, + { + "epoch": 10.04, + "learning_rate": 5.299493670886077e-06, + "loss": 0.0129, + "step": 19075 + }, + { + "epoch": 10.04, + "learning_rate": 5.293164556962026e-06, + "loss": 0.0162, + "step": 19100 + }, + { + "epoch": 10.04, + "learning_rate": 5.286835443037976e-06, + "loss": 0.0128, + "step": 19125 + }, + { + "epoch": 10.04, + "learning_rate": 5.280506329113924e-06, + "loss": 0.0152, + "step": 19150 + }, + { + "epoch": 10.04, + "learning_rate": 5.274177215189874e-06, + "loss": 0.0183, + "step": 19175 + }, + { + "epoch": 10.04, + "learning_rate": 5.267848101265823e-06, + "loss": 0.0198, + "step": 19200 + }, + { + "epoch": 10.04, + "learning_rate": 5.261518987341773e-06, + "loss": 0.0179, + "step": 19225 + }, + { + "epoch": 10.04, + "learning_rate": 5.255443037974683e-06, + "loss": 0.019, + "step": 19250 + }, + { + "epoch": 11.0, + "learning_rate": 5.249113924050633e-06, + "loss": 0.0159, + "step": 19275 + }, + { + "epoch": 11.0, + "learning_rate": 5.242784810126583e-06, + "loss": 0.0156, + "step": 19300 + }, + { + "epoch": 11.0, + "learning_rate": 5.236455696202532e-06, + "loss": 0.015, + "step": 19325 + }, + { + "epoch": 11.0, + "learning_rate": 5.230126582278482e-06, + "loss": 0.0162, + "step": 19350 + }, + { + "epoch": 11.0, + "learning_rate": 5.223797468354431e-06, + "loss": 0.0262, + "step": 19375 + }, + { + "epoch": 11.0, + "learning_rate": 5.217468354430381e-06, + "loss": 0.0207, + "step": 19400 + }, + { + "epoch": 11.0, + "learning_rate": 5.21113924050633e-06, + "loss": 0.0222, + "step": 19425 + }, + { + "epoch": 11.0, + "learning_rate": 5.2048101265822795e-06, + "loss": 0.016, + "step": 19450 + }, + { + "epoch": 11.01, + "learning_rate": 5.198481012658229e-06, + "loss": 0.0176, + "step": 19475 + }, + { + "epoch": 11.01, + "learning_rate": 5.192151898734178e-06, + "loss": 0.0209, + "step": 19500 + }, + { + "epoch": 11.01, + "learning_rate": 5.185822784810127e-06, + "loss": 0.0212, + "step": 19525 + }, + { + "epoch": 11.01, + "learning_rate": 5.179493670886076e-06, + "loss": 0.0143, + "step": 19550 + }, + { + "epoch": 11.01, + "learning_rate": 5.173164556962026e-06, + "loss": 0.0224, + "step": 19575 + }, + { + "epoch": 11.01, + "learning_rate": 5.166835443037975e-06, + "loss": 0.0171, + "step": 19600 + }, + { + "epoch": 11.01, + "learning_rate": 5.160506329113925e-06, + "loss": 0.0166, + "step": 19625 + }, + { + "epoch": 11.01, + "learning_rate": 5.154177215189874e-06, + "loss": 0.0135, + "step": 19650 + }, + { + "epoch": 11.01, + "learning_rate": 5.1478481012658235e-06, + "loss": 0.0164, + "step": 19675 + }, + { + "epoch": 11.01, + "learning_rate": 5.141518987341773e-06, + "loss": 0.0154, + "step": 19700 + }, + { + "epoch": 11.01, + "learning_rate": 5.135189873417722e-06, + "loss": 0.0183, + "step": 19725 + }, + { + "epoch": 11.01, + "learning_rate": 5.128860759493671e-06, + "loss": 0.0149, + "step": 19750 + }, + { + "epoch": 11.01, + "learning_rate": 5.12253164556962e-06, + "loss": 0.0176, + "step": 19775 + }, + { + "epoch": 11.01, + "learning_rate": 5.11620253164557e-06, + "loss": 0.0135, + "step": 19800 + }, + { + "epoch": 11.01, + "learning_rate": 5.109873417721519e-06, + "loss": 0.0121, + "step": 19825 + }, + { + "epoch": 11.01, + "learning_rate": 5.103544303797469e-06, + "loss": 0.0158, + "step": 19850 + }, + { + "epoch": 11.02, + "learning_rate": 5.097215189873418e-06, + "loss": 0.0191, + "step": 19875 + }, + { + "epoch": 11.02, + "learning_rate": 5.0908860759493675e-06, + "loss": 0.0127, + "step": 19900 + }, + { + "epoch": 11.02, + "learning_rate": 5.084556962025317e-06, + "loss": 0.0149, + "step": 19925 + }, + { + "epoch": 11.02, + "learning_rate": 5.0782278481012655e-06, + "loss": 0.0152, + "step": 19950 + }, + { + "epoch": 11.02, + "learning_rate": 5.071898734177215e-06, + "loss": 0.0139, + "step": 19975 + }, + { + "epoch": 11.02, + "learning_rate": 5.065569620253164e-06, + "loss": 0.015, + "step": 20000 + }, + { + "epoch": 11.02, + "eval_loss": 0.2623814344406128, + "eval_runtime": 1462.1621, + "eval_samples_per_second": 7.12, + "eval_steps_per_second": 0.445, + "eval_wer": 24.392266540919362, + "step": 20000 + }, + { + "epoch": 11.02, + "learning_rate": 5.059240506329114e-06, + "loss": 0.0094, + "step": 20025 + }, + { + "epoch": 11.02, + "learning_rate": 5.052911392405063e-06, + "loss": 0.0108, + "step": 20050 + }, + { + "epoch": 11.02, + "learning_rate": 5.046582278481013e-06, + "loss": 0.0155, + "step": 20075 + }, + { + "epoch": 11.02, + "learning_rate": 5.040253164556962e-06, + "loss": 0.0103, + "step": 20100 + }, + { + "epoch": 11.02, + "learning_rate": 5.033924050632912e-06, + "loss": 0.0109, + "step": 20125 + }, + { + "epoch": 11.02, + "learning_rate": 5.027594936708862e-06, + "loss": 0.0188, + "step": 20150 + }, + { + "epoch": 11.02, + "learning_rate": 5.021265822784811e-06, + "loss": 0.0178, + "step": 20175 + }, + { + "epoch": 11.02, + "learning_rate": 5.014936708860761e-06, + "loss": 0.0262, + "step": 20200 + }, + { + "epoch": 11.02, + "learning_rate": 5.00860759493671e-06, + "loss": 0.0194, + "step": 20225 + }, + { + "epoch": 11.02, + "learning_rate": 5.002278481012659e-06, + "loss": 0.0107, + "step": 20250 + }, + { + "epoch": 11.03, + "learning_rate": 4.995949367088608e-06, + "loss": 0.0147, + "step": 20275 + }, + { + "epoch": 11.03, + "learning_rate": 4.9896202531645575e-06, + "loss": 0.0084, + "step": 20300 + }, + { + "epoch": 11.03, + "learning_rate": 4.983291139240507e-06, + "loss": 0.0214, + "step": 20325 + }, + { + "epoch": 11.03, + "learning_rate": 4.976962025316456e-06, + "loss": 0.014, + "step": 20350 + }, + { + "epoch": 11.03, + "learning_rate": 4.970632911392406e-06, + "loss": 0.0129, + "step": 20375 + }, + { + "epoch": 11.03, + "learning_rate": 4.964303797468355e-06, + "loss": 0.0145, + "step": 20400 + }, + { + "epoch": 11.03, + "learning_rate": 4.957974683544305e-06, + "loss": 0.0109, + "step": 20425 + }, + { + "epoch": 11.03, + "learning_rate": 4.951645569620254e-06, + "loss": 0.0117, + "step": 20450 + }, + { + "epoch": 11.03, + "learning_rate": 4.945316455696203e-06, + "loss": 0.0137, + "step": 20475 + }, + { + "epoch": 11.03, + "learning_rate": 4.938987341772152e-06, + "loss": 0.0086, + "step": 20500 + }, + { + "epoch": 11.03, + "learning_rate": 4.9326582278481015e-06, + "loss": 0.0128, + "step": 20525 + }, + { + "epoch": 11.03, + "learning_rate": 4.926329113924051e-06, + "loss": 0.0101, + "step": 20550 + }, + { + "epoch": 11.03, + "learning_rate": 4.92e-06, + "loss": 0.0132, + "step": 20575 + }, + { + "epoch": 11.03, + "learning_rate": 4.91367088607595e-06, + "loss": 0.0147, + "step": 20600 + }, + { + "epoch": 11.03, + "learning_rate": 4.907341772151899e-06, + "loss": 0.0077, + "step": 20625 + }, + { + "epoch": 11.03, + "learning_rate": 4.901012658227849e-06, + "loss": 0.0125, + "step": 20650 + }, + { + "epoch": 11.04, + "learning_rate": 4.894683544303798e-06, + "loss": 0.0115, + "step": 20675 + }, + { + "epoch": 11.04, + "learning_rate": 4.888354430379747e-06, + "loss": 0.0136, + "step": 20700 + }, + { + "epoch": 11.04, + "learning_rate": 4.882025316455696e-06, + "loss": 0.0098, + "step": 20725 + }, + { + "epoch": 11.04, + "learning_rate": 4.875696202531646e-06, + "loss": 0.0154, + "step": 20750 + }, + { + "epoch": 11.04, + "learning_rate": 4.869367088607596e-06, + "loss": 0.0126, + "step": 20775 + }, + { + "epoch": 11.04, + "learning_rate": 4.863037974683545e-06, + "loss": 0.0116, + "step": 20800 + }, + { + "epoch": 11.04, + "learning_rate": 4.856708860759495e-06, + "loss": 0.0133, + "step": 20825 + }, + { + "epoch": 11.04, + "learning_rate": 4.850379746835443e-06, + "loss": 0.0106, + "step": 20850 + }, + { + "epoch": 11.04, + "learning_rate": 4.844050632911393e-06, + "loss": 0.0151, + "step": 20875 + }, + { + "epoch": 11.04, + "learning_rate": 4.837721518987342e-06, + "loss": 0.0148, + "step": 20900 + }, + { + "epoch": 11.04, + "learning_rate": 4.8313924050632915e-06, + "loss": 0.0173, + "step": 20925 + }, + { + "epoch": 11.04, + "learning_rate": 4.825063291139241e-06, + "loss": 0.0126, + "step": 20950 + }, + { + "epoch": 11.04, + "learning_rate": 4.81873417721519e-06, + "loss": 0.0163, + "step": 20975 + }, + { + "epoch": 11.04, + "learning_rate": 4.81240506329114e-06, + "loss": 0.0203, + "step": 21000 + }, + { + "epoch": 11.04, + "eval_loss": 0.27788785099983215, + "eval_runtime": 1546.852, + "eval_samples_per_second": 6.73, + "eval_steps_per_second": 0.421, + "eval_wer": 25.284963983154196, + "step": 21000 + }, + { + "epoch": 12.0, + "learning_rate": 4.806075949367089e-06, + "loss": 0.0176, + "step": 21025 + }, + { + "epoch": 12.0, + "learning_rate": 4.799746835443039e-06, + "loss": 0.0157, + "step": 21050 + }, + { + "epoch": 12.0, + "learning_rate": 4.793417721518987e-06, + "loss": 0.0192, + "step": 21075 + }, + { + "epoch": 12.0, + "learning_rate": 4.787088607594937e-06, + "loss": 0.0124, + "step": 21100 + }, + { + "epoch": 12.0, + "learning_rate": 4.780759493670886e-06, + "loss": 0.0194, + "step": 21125 + }, + { + "epoch": 12.0, + "learning_rate": 4.774430379746836e-06, + "loss": 0.0138, + "step": 21150 + }, + { + "epoch": 12.0, + "learning_rate": 4.768101265822786e-06, + "loss": 0.0191, + "step": 21175 + }, + { + "epoch": 12.0, + "learning_rate": 4.761772151898735e-06, + "loss": 0.0124, + "step": 21200 + }, + { + "epoch": 12.01, + "learning_rate": 4.755443037974684e-06, + "loss": 0.0175, + "step": 21225 + }, + { + "epoch": 12.01, + "learning_rate": 4.749113924050633e-06, + "loss": 0.0118, + "step": 21250 + }, + { + "epoch": 12.01, + "learning_rate": 4.742784810126583e-06, + "loss": 0.0172, + "step": 21275 + }, + { + "epoch": 12.01, + "learning_rate": 4.736455696202532e-06, + "loss": 0.0136, + "step": 21300 + }, + { + "epoch": 12.01, + "learning_rate": 4.7301265822784815e-06, + "loss": 0.018, + "step": 21325 + }, + { + "epoch": 12.01, + "learning_rate": 4.723797468354431e-06, + "loss": 0.0184, + "step": 21350 + }, + { + "epoch": 12.01, + "learning_rate": 4.71746835443038e-06, + "loss": 0.0166, + "step": 21375 + }, + { + "epoch": 12.01, + "learning_rate": 4.71113924050633e-06, + "loss": 0.0107, + "step": 21400 + }, + { + "epoch": 12.01, + "learning_rate": 4.704810126582279e-06, + "loss": 0.0125, + "step": 21425 + }, + { + "epoch": 12.01, + "learning_rate": 4.698481012658228e-06, + "loss": 0.0098, + "step": 21450 + }, + { + "epoch": 12.01, + "learning_rate": 4.692151898734177e-06, + "loss": 0.0103, + "step": 21475 + }, + { + "epoch": 12.01, + "learning_rate": 4.685822784810127e-06, + "loss": 0.0211, + "step": 21500 + }, + { + "epoch": 12.01, + "learning_rate": 4.679493670886076e-06, + "loss": 0.0125, + "step": 21525 + }, + { + "epoch": 12.01, + "learning_rate": 4.6731645569620255e-06, + "loss": 0.0083, + "step": 21550 + }, + { + "epoch": 12.01, + "learning_rate": 4.666835443037975e-06, + "loss": 0.0156, + "step": 21575 + }, + { + "epoch": 12.01, + "learning_rate": 4.660506329113924e-06, + "loss": 0.0137, + "step": 21600 + }, + { + "epoch": 12.02, + "learning_rate": 4.654177215189874e-06, + "loss": 0.0137, + "step": 21625 + }, + { + "epoch": 12.02, + "learning_rate": 4.647848101265823e-06, + "loss": 0.0092, + "step": 21650 + }, + { + "epoch": 12.02, + "learning_rate": 4.641518987341773e-06, + "loss": 0.0101, + "step": 21675 + }, + { + "epoch": 12.02, + "learning_rate": 4.635189873417722e-06, + "loss": 0.0118, + "step": 21700 + }, + { + "epoch": 12.02, + "learning_rate": 4.6288607594936715e-06, + "loss": 0.0119, + "step": 21725 + }, + { + "epoch": 12.02, + "learning_rate": 4.622531645569621e-06, + "loss": 0.009, + "step": 21750 + }, + { + "epoch": 12.02, + "learning_rate": 4.61620253164557e-06, + "loss": 0.014, + "step": 21775 + }, + { + "epoch": 12.02, + "learning_rate": 4.609873417721519e-06, + "loss": 0.0108, + "step": 21800 + }, + { + "epoch": 12.02, + "learning_rate": 4.603544303797468e-06, + "loss": 0.0117, + "step": 21825 + }, + { + "epoch": 12.02, + "learning_rate": 4.597215189873418e-06, + "loss": 0.0147, + "step": 21850 + }, + { + "epoch": 12.02, + "learning_rate": 4.590886075949367e-06, + "loss": 0.0133, + "step": 21875 + }, + { + "epoch": 12.02, + "learning_rate": 4.584556962025317e-06, + "loss": 0.0131, + "step": 21900 + }, + { + "epoch": 12.02, + "learning_rate": 4.578227848101266e-06, + "loss": 0.0156, + "step": 21925 + }, + { + "epoch": 12.02, + "learning_rate": 4.5718987341772155e-06, + "loss": 0.0211, + "step": 21950 + }, + { + "epoch": 12.02, + "learning_rate": 4.565569620253165e-06, + "loss": 0.0127, + "step": 21975 + }, + { + "epoch": 12.02, + "learning_rate": 4.559240506329114e-06, + "loss": 0.011, + "step": 22000 + }, + { + "epoch": 12.02, + "eval_loss": 0.26152506470680237, + "eval_runtime": 1497.9019, + "eval_samples_per_second": 6.95, + "eval_steps_per_second": 0.435, + "eval_wer": 26.236198962584755, + "step": 22000 + }, + { + "epoch": 12.03, + "learning_rate": 4.552911392405064e-06, + "loss": 0.0164, + "step": 22025 + }, + { + "epoch": 12.03, + "learning_rate": 4.546582278481013e-06, + "loss": 0.01, + "step": 22050 + }, + { + "epoch": 12.03, + "learning_rate": 4.540253164556963e-06, + "loss": 0.0127, + "step": 22075 + }, + { + "epoch": 12.03, + "learning_rate": 4.533924050632912e-06, + "loss": 0.0089, + "step": 22100 + }, + { + "epoch": 12.03, + "learning_rate": 4.5275949367088615e-06, + "loss": 0.0077, + "step": 22125 + }, + { + "epoch": 12.03, + "learning_rate": 4.521265822784811e-06, + "loss": 0.0098, + "step": 22150 + }, + { + "epoch": 12.03, + "learning_rate": 4.5149367088607595e-06, + "loss": 0.0084, + "step": 22175 + }, + { + "epoch": 12.03, + "learning_rate": 4.508607594936709e-06, + "loss": 0.0096, + "step": 22200 + }, + { + "epoch": 12.03, + "learning_rate": 4.502278481012658e-06, + "loss": 0.0097, + "step": 22225 + }, + { + "epoch": 12.03, + "learning_rate": 4.495949367088608e-06, + "loss": 0.0114, + "step": 22250 + }, + { + "epoch": 12.03, + "learning_rate": 4.489620253164557e-06, + "loss": 0.0109, + "step": 22275 + }, + { + "epoch": 12.03, + "learning_rate": 4.483291139240507e-06, + "loss": 0.0096, + "step": 22300 + }, + { + "epoch": 12.03, + "learning_rate": 4.476962025316456e-06, + "loss": 0.0117, + "step": 22325 + }, + { + "epoch": 12.03, + "learning_rate": 4.4706329113924055e-06, + "loss": 0.0105, + "step": 22350 + }, + { + "epoch": 12.03, + "learning_rate": 4.464303797468355e-06, + "loss": 0.0103, + "step": 22375 + }, + { + "epoch": 12.03, + "learning_rate": 4.457974683544304e-06, + "loss": 0.0085, + "step": 22400 + }, + { + "epoch": 12.04, + "learning_rate": 4.451645569620254e-06, + "loss": 0.0129, + "step": 22425 + }, + { + "epoch": 12.04, + "learning_rate": 4.445316455696203e-06, + "loss": 0.0188, + "step": 22450 + }, + { + "epoch": 12.04, + "learning_rate": 4.438987341772153e-06, + "loss": 0.013, + "step": 22475 + }, + { + "epoch": 12.04, + "learning_rate": 4.432658227848102e-06, + "loss": 0.0095, + "step": 22500 + }, + { + "epoch": 12.04, + "learning_rate": 4.4263291139240515e-06, + "loss": 0.0111, + "step": 22525 + }, + { + "epoch": 12.04, + "learning_rate": 4.42e-06, + "loss": 0.0115, + "step": 22550 + }, + { + "epoch": 12.04, + "learning_rate": 4.4136708860759495e-06, + "loss": 0.0096, + "step": 22575 + }, + { + "epoch": 12.04, + "learning_rate": 4.407341772151899e-06, + "loss": 0.0108, + "step": 22600 + }, + { + "epoch": 12.04, + "learning_rate": 4.401012658227848e-06, + "loss": 0.0118, + "step": 22625 + }, + { + "epoch": 12.04, + "learning_rate": 4.394683544303798e-06, + "loss": 0.0136, + "step": 22650 + }, + { + "epoch": 12.04, + "learning_rate": 4.388354430379747e-06, + "loss": 0.0176, + "step": 22675 + }, + { + "epoch": 12.04, + "learning_rate": 4.382025316455697e-06, + "loss": 0.011, + "step": 22700 + }, + { + "epoch": 12.04, + "learning_rate": 4.375696202531646e-06, + "loss": 0.0125, + "step": 22725 + }, + { + "epoch": 12.04, + "learning_rate": 4.3693670886075955e-06, + "loss": 0.0164, + "step": 22750 + }, + { + "epoch": 13.0, + "learning_rate": 4.363037974683544e-06, + "loss": 0.0192, + "step": 22775 + }, + { + "epoch": 13.0, + "learning_rate": 4.356708860759494e-06, + "loss": 0.0145, + "step": 22800 + }, + { + "epoch": 13.0, + "learning_rate": 4.350379746835444e-06, + "loss": 0.0174, + "step": 22825 + }, + { + "epoch": 13.0, + "learning_rate": 4.344050632911393e-06, + "loss": 0.0142, + "step": 22850 + }, + { + "epoch": 13.0, + "learning_rate": 4.337721518987343e-06, + "loss": 0.0228, + "step": 22875 + }, + { + "epoch": 13.0, + "learning_rate": 4.331392405063292e-06, + "loss": 0.0201, + "step": 22900 + }, + { + "epoch": 13.0, + "learning_rate": 4.325063291139241e-06, + "loss": 0.0188, + "step": 22925 + }, + { + "epoch": 13.0, + "learning_rate": 4.31873417721519e-06, + "loss": 0.0185, + "step": 22950 + }, + { + "epoch": 13.01, + "learning_rate": 4.3124050632911395e-06, + "loss": 0.0186, + "step": 22975 + }, + { + "epoch": 13.01, + "learning_rate": 4.306075949367089e-06, + "loss": 0.0191, + "step": 23000 + }, + { + "epoch": 13.01, + "eval_loss": 0.25467297434806824, + "eval_runtime": 1486.1182, + "eval_samples_per_second": 7.005, + "eval_steps_per_second": 0.438, + "eval_wer": 23.982503780549276, + "step": 23000 + }, + { + "epoch": 13.01, + "learning_rate": 4.299746835443038e-06, + "loss": 0.0145, + "step": 23025 + }, + { + "epoch": 13.01, + "learning_rate": 4.293417721518988e-06, + "loss": 0.0106, + "step": 23050 + }, + { + "epoch": 13.01, + "learning_rate": 4.287088607594937e-06, + "loss": 0.0135, + "step": 23075 + }, + { + "epoch": 13.01, + "learning_rate": 4.280759493670887e-06, + "loss": 0.0173, + "step": 23100 + }, + { + "epoch": 13.01, + "learning_rate": 4.274430379746836e-06, + "loss": 0.0126, + "step": 23125 + }, + { + "epoch": 13.01, + "learning_rate": 4.268101265822785e-06, + "loss": 0.0166, + "step": 23150 + }, + { + "epoch": 13.01, + "learning_rate": 4.261772151898734e-06, + "loss": 0.0097, + "step": 23175 + }, + { + "epoch": 13.01, + "learning_rate": 4.255443037974684e-06, + "loss": 0.0094, + "step": 23200 + }, + { + "epoch": 13.01, + "learning_rate": 4.249113924050634e-06, + "loss": 0.012, + "step": 23225 + }, + { + "epoch": 13.01, + "learning_rate": 4.242784810126583e-06, + "loss": 0.0153, + "step": 23250 + }, + { + "epoch": 13.01, + "learning_rate": 4.236455696202532e-06, + "loss": 0.0169, + "step": 23275 + }, + { + "epoch": 13.01, + "learning_rate": 4.230379746835443e-06, + "loss": 0.0097, + "step": 23300 + }, + { + "epoch": 13.01, + "learning_rate": 4.224050632911393e-06, + "loss": 0.0098, + "step": 23325 + }, + { + "epoch": 13.01, + "learning_rate": 4.217721518987342e-06, + "loss": 0.0129, + "step": 23350 + }, + { + "epoch": 13.02, + "learning_rate": 4.2113924050632915e-06, + "loss": 0.0157, + "step": 23375 + }, + { + "epoch": 13.02, + "learning_rate": 4.205063291139241e-06, + "loss": 0.0088, + "step": 23400 + }, + { + "epoch": 13.02, + "learning_rate": 4.19873417721519e-06, + "loss": 0.0105, + "step": 23425 + }, + { + "epoch": 13.02, + "learning_rate": 4.19240506329114e-06, + "loss": 0.0131, + "step": 23450 + }, + { + "epoch": 13.02, + "learning_rate": 4.186075949367089e-06, + "loss": 0.0133, + "step": 23475 + }, + { + "epoch": 13.02, + "learning_rate": 4.179746835443038e-06, + "loss": 0.0054, + "step": 23500 + }, + { + "epoch": 13.02, + "learning_rate": 4.173417721518987e-06, + "loss": 0.01, + "step": 23525 + }, + { + "epoch": 13.02, + "learning_rate": 4.167088607594937e-06, + "loss": 0.013, + "step": 23550 + }, + { + "epoch": 13.02, + "learning_rate": 4.160759493670886e-06, + "loss": 0.0141, + "step": 23575 + }, + { + "epoch": 13.02, + "learning_rate": 4.154430379746836e-06, + "loss": 0.0126, + "step": 23600 + }, + { + "epoch": 13.02, + "learning_rate": 4.148101265822786e-06, + "loss": 0.015, + "step": 23625 + }, + { + "epoch": 13.02, + "learning_rate": 4.141772151898734e-06, + "loss": 0.0164, + "step": 23650 + }, + { + "epoch": 13.02, + "learning_rate": 4.135443037974684e-06, + "loss": 0.0135, + "step": 23675 + }, + { + "epoch": 13.02, + "learning_rate": 4.129113924050633e-06, + "loss": 0.0151, + "step": 23700 + }, + { + "epoch": 13.02, + "learning_rate": 4.122784810126583e-06, + "loss": 0.0157, + "step": 23725 + }, + { + "epoch": 13.02, + "learning_rate": 4.116455696202532e-06, + "loss": 0.0087, + "step": 23750 + }, + { + "epoch": 13.03, + "learning_rate": 4.1101265822784815e-06, + "loss": 0.0097, + "step": 23775 + }, + { + "epoch": 13.03, + "learning_rate": 4.103797468354431e-06, + "loss": 0.0141, + "step": 23800 + }, + { + "epoch": 13.03, + "learning_rate": 4.09746835443038e-06, + "loss": 0.0079, + "step": 23825 + }, + { + "epoch": 13.03, + "learning_rate": 4.09113924050633e-06, + "loss": 0.0116, + "step": 23850 + }, + { + "epoch": 13.03, + "learning_rate": 4.084810126582278e-06, + "loss": 0.0093, + "step": 23875 + }, + { + "epoch": 13.03, + "learning_rate": 4.078481012658228e-06, + "loss": 0.0149, + "step": 23900 + }, + { + "epoch": 13.03, + "learning_rate": 4.072151898734177e-06, + "loss": 0.0125, + "step": 23925 + }, + { + "epoch": 13.03, + "learning_rate": 4.065822784810127e-06, + "loss": 0.0106, + "step": 23950 + }, + { + "epoch": 13.03, + "learning_rate": 4.059493670886076e-06, + "loss": 0.0124, + "step": 23975 + }, + { + "epoch": 13.03, + "learning_rate": 4.0531645569620255e-06, + "loss": 0.0074, + "step": 24000 + }, + { + "epoch": 13.03, + "eval_loss": 0.27036768198013306, + "eval_runtime": 1458.9195, + "eval_samples_per_second": 7.136, + "eval_steps_per_second": 0.446, + "eval_wer": 24.153238264036816, + "step": 24000 + }, + { + "epoch": 13.03, + "learning_rate": 4.046835443037975e-06, + "loss": 0.0128, + "step": 24025 + }, + { + "epoch": 13.03, + "learning_rate": 4.040506329113924e-06, + "loss": 0.012, + "step": 24050 + }, + { + "epoch": 13.03, + "learning_rate": 4.034177215189874e-06, + "loss": 0.0098, + "step": 24075 + }, + { + "epoch": 13.03, + "learning_rate": 4.027848101265823e-06, + "loss": 0.0091, + "step": 24100 + }, + { + "epoch": 13.03, + "learning_rate": 4.021518987341773e-06, + "loss": 0.0077, + "step": 24125 + }, + { + "epoch": 13.03, + "learning_rate": 4.015189873417722e-06, + "loss": 0.0135, + "step": 24150 + }, + { + "epoch": 13.04, + "learning_rate": 4.0088607594936715e-06, + "loss": 0.0124, + "step": 24175 + }, + { + "epoch": 13.04, + "learning_rate": 4.002531645569621e-06, + "loss": 0.0152, + "step": 24200 + }, + { + "epoch": 13.04, + "learning_rate": 3.99620253164557e-06, + "loss": 0.0127, + "step": 24225 + }, + { + "epoch": 13.04, + "learning_rate": 3.989873417721519e-06, + "loss": 0.0066, + "step": 24250 + }, + { + "epoch": 13.04, + "learning_rate": 3.983544303797468e-06, + "loss": 0.0163, + "step": 24275 + }, + { + "epoch": 13.04, + "learning_rate": 3.977215189873418e-06, + "loss": 0.0095, + "step": 24300 + }, + { + "epoch": 13.04, + "learning_rate": 3.970886075949367e-06, + "loss": 0.0103, + "step": 24325 + }, + { + "epoch": 13.04, + "learning_rate": 3.964556962025317e-06, + "loss": 0.0112, + "step": 24350 + }, + { + "epoch": 13.04, + "learning_rate": 3.958227848101266e-06, + "loss": 0.01, + "step": 24375 + }, + { + "epoch": 13.04, + "learning_rate": 3.9518987341772155e-06, + "loss": 0.0131, + "step": 24400 + }, + { + "epoch": 13.04, + "learning_rate": 3.945569620253165e-06, + "loss": 0.0143, + "step": 24425 + }, + { + "epoch": 13.04, + "learning_rate": 3.939240506329114e-06, + "loss": 0.0151, + "step": 24450 + }, + { + "epoch": 13.04, + "learning_rate": 3.932911392405064e-06, + "loss": 0.0129, + "step": 24475 + }, + { + "epoch": 13.04, + "learning_rate": 3.926582278481013e-06, + "loss": 0.0125, + "step": 24500 + }, + { + "epoch": 14.0, + "learning_rate": 3.920506329113925e-06, + "loss": 0.0165, + "step": 24525 + }, + { + "epoch": 14.0, + "learning_rate": 3.914177215189874e-06, + "loss": 0.0114, + "step": 24550 + }, + { + "epoch": 14.0, + "learning_rate": 3.9078481012658235e-06, + "loss": 0.0111, + "step": 24575 + }, + { + "epoch": 14.0, + "learning_rate": 3.901518987341772e-06, + "loss": 0.0134, + "step": 24600 + }, + { + "epoch": 14.0, + "learning_rate": 3.8951898734177215e-06, + "loss": 0.0138, + "step": 24625 + }, + { + "epoch": 14.0, + "learning_rate": 3.888860759493671e-06, + "loss": 0.0151, + "step": 24650 + }, + { + "epoch": 14.0, + "learning_rate": 3.88253164556962e-06, + "loss": 0.0101, + "step": 24675 + }, + { + "epoch": 14.0, + "learning_rate": 3.87620253164557e-06, + "loss": 0.0108, + "step": 24700 + }, + { + "epoch": 14.01, + "learning_rate": 3.869873417721519e-06, + "loss": 0.0151, + "step": 24725 + }, + { + "epoch": 14.01, + "learning_rate": 3.863544303797469e-06, + "loss": 0.0132, + "step": 24750 + }, + { + "epoch": 14.01, + "learning_rate": 3.857215189873418e-06, + "loss": 0.012, + "step": 24775 + }, + { + "epoch": 14.01, + "learning_rate": 3.8508860759493675e-06, + "loss": 0.017, + "step": 24800 + }, + { + "epoch": 14.01, + "learning_rate": 3.844556962025317e-06, + "loss": 0.0141, + "step": 24825 + }, + { + "epoch": 14.01, + "learning_rate": 3.838227848101266e-06, + "loss": 0.0166, + "step": 24850 + }, + { + "epoch": 14.01, + "learning_rate": 3.831898734177216e-06, + "loss": 0.0112, + "step": 24875 + }, + { + "epoch": 14.01, + "learning_rate": 3.825569620253165e-06, + "loss": 0.0097, + "step": 24900 + }, + { + "epoch": 14.01, + "learning_rate": 3.819240506329115e-06, + "loss": 0.0138, + "step": 24925 + }, + { + "epoch": 14.01, + "learning_rate": 3.8129113924050636e-06, + "loss": 0.0069, + "step": 24950 + }, + { + "epoch": 14.01, + "learning_rate": 3.806582278481013e-06, + "loss": 0.0111, + "step": 24975 + }, + { + "epoch": 14.01, + "learning_rate": 3.8002531645569625e-06, + "loss": 0.0149, + "step": 25000 + }, + { + "epoch": 14.01, + "eval_loss": 0.2640444040298462, + "eval_runtime": 1478.4977, + "eval_samples_per_second": 7.042, + "eval_steps_per_second": 0.44, + "eval_wer": 24.063805915543345, + "step": 25000 + }, + { + "epoch": 14.01, + "learning_rate": 3.7939240506329115e-06, + "loss": 0.0176, + "step": 25025 + }, + { + "epoch": 14.01, + "learning_rate": 3.787594936708861e-06, + "loss": 0.0104, + "step": 25050 + }, + { + "epoch": 14.01, + "learning_rate": 3.7812658227848103e-06, + "loss": 0.0084, + "step": 25075 + }, + { + "epoch": 14.01, + "learning_rate": 3.7749367088607598e-06, + "loss": 0.0118, + "step": 25100 + }, + { + "epoch": 14.02, + "learning_rate": 3.768607594936709e-06, + "loss": 0.0099, + "step": 25125 + }, + { + "epoch": 14.02, + "learning_rate": 3.762278481012658e-06, + "loss": 0.0089, + "step": 25150 + }, + { + "epoch": 14.02, + "learning_rate": 3.7559493670886076e-06, + "loss": 0.0119, + "step": 25175 + }, + { + "epoch": 14.02, + "learning_rate": 3.749620253164557e-06, + "loss": 0.0118, + "step": 25200 + }, + { + "epoch": 14.02, + "learning_rate": 3.743291139240507e-06, + "loss": 0.0108, + "step": 25225 + }, + { + "epoch": 14.02, + "learning_rate": 3.7369620253164563e-06, + "loss": 0.0101, + "step": 25250 + }, + { + "epoch": 14.02, + "learning_rate": 3.7306329113924058e-06, + "loss": 0.0095, + "step": 25275 + }, + { + "epoch": 14.02, + "learning_rate": 3.7243037974683548e-06, + "loss": 0.0106, + "step": 25300 + }, + { + "epoch": 14.02, + "learning_rate": 3.717974683544304e-06, + "loss": 0.0135, + "step": 25325 + }, + { + "epoch": 14.02, + "learning_rate": 3.7116455696202536e-06, + "loss": 0.0115, + "step": 25350 + }, + { + "epoch": 14.02, + "learning_rate": 3.705316455696203e-06, + "loss": 0.0114, + "step": 25375 + }, + { + "epoch": 14.02, + "learning_rate": 3.698987341772152e-06, + "loss": 0.0119, + "step": 25400 + }, + { + "epoch": 14.02, + "learning_rate": 3.6926582278481015e-06, + "loss": 0.0123, + "step": 25425 + }, + { + "epoch": 14.02, + "learning_rate": 3.686329113924051e-06, + "loss": 0.0138, + "step": 25450 + }, + { + "epoch": 14.02, + "learning_rate": 3.6800000000000003e-06, + "loss": 0.0133, + "step": 25475 + }, + { + "epoch": 14.02, + "learning_rate": 3.6736708860759498e-06, + "loss": 0.0106, + "step": 25500 + }, + { + "epoch": 14.03, + "learning_rate": 3.6673417721518988e-06, + "loss": 0.0099, + "step": 25525 + }, + { + "epoch": 14.03, + "learning_rate": 3.661012658227848e-06, + "loss": 0.0104, + "step": 25550 + }, + { + "epoch": 14.03, + "learning_rate": 3.6546835443037976e-06, + "loss": 0.0143, + "step": 25575 + }, + { + "epoch": 14.03, + "learning_rate": 3.648354430379747e-06, + "loss": 0.0096, + "step": 25600 + }, + { + "epoch": 14.03, + "learning_rate": 3.642025316455696e-06, + "loss": 0.0094, + "step": 25625 + }, + { + "epoch": 14.03, + "learning_rate": 3.6356962025316463e-06, + "loss": 0.0162, + "step": 25650 + }, + { + "epoch": 14.03, + "learning_rate": 3.6293670886075953e-06, + "loss": 0.008, + "step": 25675 + }, + { + "epoch": 14.03, + "learning_rate": 3.6230379746835448e-06, + "loss": 0.0094, + "step": 25700 + }, + { + "epoch": 14.03, + "learning_rate": 3.616708860759494e-06, + "loss": 0.0122, + "step": 25725 + }, + { + "epoch": 14.03, + "learning_rate": 3.6103797468354436e-06, + "loss": 0.0128, + "step": 25750 + }, + { + "epoch": 14.03, + "learning_rate": 3.6040506329113926e-06, + "loss": 0.0117, + "step": 25775 + }, + { + "epoch": 14.03, + "learning_rate": 3.597721518987342e-06, + "loss": 0.0067, + "step": 25800 + }, + { + "epoch": 14.03, + "learning_rate": 3.5913924050632915e-06, + "loss": 0.0078, + "step": 25825 + }, + { + "epoch": 14.03, + "learning_rate": 3.585063291139241e-06, + "loss": 0.0071, + "step": 25850 + }, + { + "epoch": 14.03, + "learning_rate": 3.57873417721519e-06, + "loss": 0.0093, + "step": 25875 + }, + { + "epoch": 14.03, + "learning_rate": 3.5724050632911393e-06, + "loss": 0.0064, + "step": 25900 + }, + { + "epoch": 14.04, + "learning_rate": 3.5660759493670888e-06, + "loss": 0.0067, + "step": 25925 + }, + { + "epoch": 14.04, + "learning_rate": 3.559746835443038e-06, + "loss": 0.0076, + "step": 25950 + }, + { + "epoch": 14.04, + "learning_rate": 3.5534177215189876e-06, + "loss": 0.0099, + "step": 25975 + }, + { + "epoch": 14.04, + "learning_rate": 3.5470886075949366e-06, + "loss": 0.0111, + "step": 26000 + }, + { + "epoch": 14.04, + "eval_loss": 0.2734878659248352, + "eval_runtime": 1456.7487, + "eval_samples_per_second": 7.147, + "eval_steps_per_second": 0.447, + "eval_wer": 24.359745686921737, + "step": 26000 + }, + { + "epoch": 14.04, + "learning_rate": 3.540759493670886e-06, + "loss": 0.0134, + "step": 26025 + }, + { + "epoch": 14.04, + "learning_rate": 3.534430379746836e-06, + "loss": 0.008, + "step": 26050 + }, + { + "epoch": 14.04, + "learning_rate": 3.5281012658227853e-06, + "loss": 0.0138, + "step": 26075 + }, + { + "epoch": 14.04, + "learning_rate": 3.5217721518987348e-06, + "loss": 0.0075, + "step": 26100 + }, + { + "epoch": 14.04, + "learning_rate": 3.515443037974684e-06, + "loss": 0.0097, + "step": 26125 + }, + { + "epoch": 14.04, + "learning_rate": 3.509113924050633e-06, + "loss": 0.0139, + "step": 26150 + }, + { + "epoch": 14.04, + "learning_rate": 3.5027848101265826e-06, + "loss": 0.0099, + "step": 26175 + }, + { + "epoch": 14.04, + "learning_rate": 3.496455696202532e-06, + "loss": 0.0148, + "step": 26200 + }, + { + "epoch": 14.04, + "learning_rate": 3.4901265822784815e-06, + "loss": 0.0119, + "step": 26225 + }, + { + "epoch": 14.04, + "learning_rate": 3.4837974683544305e-06, + "loss": 0.0086, + "step": 26250 + }, + { + "epoch": 15.0, + "learning_rate": 3.47746835443038e-06, + "loss": 0.0091, + "step": 26275 + }, + { + "epoch": 15.0, + "learning_rate": 3.4711392405063293e-06, + "loss": 0.0105, + "step": 26300 + }, + { + "epoch": 15.0, + "learning_rate": 3.465063291139241e-06, + "loss": 0.0138, + "step": 26325 + }, + { + "epoch": 15.0, + "learning_rate": 3.45873417721519e-06, + "loss": 0.0185, + "step": 26350 + }, + { + "epoch": 15.0, + "learning_rate": 3.4524050632911392e-06, + "loss": 0.015, + "step": 26375 + }, + { + "epoch": 15.0, + "learning_rate": 3.4460759493670886e-06, + "loss": 0.0177, + "step": 26400 + }, + { + "epoch": 15.0, + "learning_rate": 3.439746835443038e-06, + "loss": 0.014, + "step": 26425 + }, + { + "epoch": 15.0, + "learning_rate": 3.433417721518988e-06, + "loss": 0.0127, + "step": 26450 + }, + { + "epoch": 15.01, + "learning_rate": 3.4270886075949374e-06, + "loss": 0.0135, + "step": 26475 + }, + { + "epoch": 15.01, + "learning_rate": 3.4207594936708864e-06, + "loss": 0.0171, + "step": 26500 + }, + { + "epoch": 15.01, + "learning_rate": 3.4144303797468358e-06, + "loss": 0.0114, + "step": 26525 + }, + { + "epoch": 15.01, + "learning_rate": 3.4081012658227852e-06, + "loss": 0.0099, + "step": 26550 + }, + { + "epoch": 15.01, + "learning_rate": 3.4017721518987346e-06, + "loss": 0.0126, + "step": 26575 + }, + { + "epoch": 15.01, + "learning_rate": 3.395443037974684e-06, + "loss": 0.0116, + "step": 26600 + }, + { + "epoch": 15.01, + "learning_rate": 3.389113924050633e-06, + "loss": 0.0087, + "step": 26625 + }, + { + "epoch": 15.01, + "learning_rate": 3.3827848101265825e-06, + "loss": 0.0073, + "step": 26650 + }, + { + "epoch": 15.01, + "learning_rate": 3.376455696202532e-06, + "loss": 0.0102, + "step": 26675 + }, + { + "epoch": 15.01, + "learning_rate": 3.3701265822784814e-06, + "loss": 0.0106, + "step": 26700 + }, + { + "epoch": 15.01, + "learning_rate": 3.3637974683544304e-06, + "loss": 0.0094, + "step": 26725 + }, + { + "epoch": 15.01, + "learning_rate": 3.35746835443038e-06, + "loss": 0.0113, + "step": 26750 + }, + { + "epoch": 15.01, + "learning_rate": 3.3511392405063292e-06, + "loss": 0.0169, + "step": 26775 + }, + { + "epoch": 15.01, + "learning_rate": 3.3448101265822786e-06, + "loss": 0.0084, + "step": 26800 + }, + { + "epoch": 15.01, + "learning_rate": 3.338481012658228e-06, + "loss": 0.0153, + "step": 26825 + }, + { + "epoch": 15.01, + "learning_rate": 3.332151898734177e-06, + "loss": 0.012, + "step": 26850 + }, + { + "epoch": 15.02, + "learning_rate": 3.325822784810127e-06, + "loss": 0.0106, + "step": 26875 + }, + { + "epoch": 15.02, + "learning_rate": 3.3194936708860764e-06, + "loss": 0.0099, + "step": 26900 + }, + { + "epoch": 15.02, + "learning_rate": 3.3131645569620258e-06, + "loss": 0.008, + "step": 26925 + }, + { + "epoch": 15.02, + "learning_rate": 3.306835443037975e-06, + "loss": 0.0076, + "step": 26950 + }, + { + "epoch": 15.02, + "learning_rate": 3.3005063291139246e-06, + "loss": 0.0088, + "step": 26975 + }, + { + "epoch": 15.02, + "learning_rate": 3.2941772151898736e-06, + "loss": 0.009, + "step": 27000 + }, + { + "epoch": 15.02, + "eval_loss": 0.2693029046058655, + "eval_runtime": 1479.1374, + "eval_samples_per_second": 7.039, + "eval_steps_per_second": 0.44, + "eval_wer": 24.28819980812696, + "step": 27000 + }, + { + "epoch": 15.02, + "learning_rate": 3.287848101265823e-06, + "loss": 0.0115, + "step": 27025 + }, + { + "epoch": 15.02, + "learning_rate": 3.2815189873417725e-06, + "loss": 0.0099, + "step": 27050 + }, + { + "epoch": 15.02, + "learning_rate": 3.275189873417722e-06, + "loss": 0.0073, + "step": 27075 + }, + { + "epoch": 15.02, + "learning_rate": 3.268860759493671e-06, + "loss": 0.0061, + "step": 27100 + }, + { + "epoch": 15.02, + "learning_rate": 3.2625316455696204e-06, + "loss": 0.007, + "step": 27125 + }, + { + "epoch": 15.02, + "learning_rate": 3.2562025316455698e-06, + "loss": 0.0109, + "step": 27150 + }, + { + "epoch": 15.02, + "learning_rate": 3.2498734177215192e-06, + "loss": 0.0123, + "step": 27175 + }, + { + "epoch": 15.02, + "learning_rate": 3.2435443037974686e-06, + "loss": 0.0121, + "step": 27200 + }, + { + "epoch": 15.02, + "learning_rate": 3.2372151898734176e-06, + "loss": 0.0129, + "step": 27225 + }, + { + "epoch": 15.02, + "learning_rate": 3.230886075949367e-06, + "loss": 0.0128, + "step": 27250 + }, + { + "epoch": 15.03, + "learning_rate": 3.224556962025317e-06, + "loss": 0.0094, + "step": 27275 + }, + { + "epoch": 15.03, + "learning_rate": 3.2182278481012664e-06, + "loss": 0.0081, + "step": 27300 + }, + { + "epoch": 15.03, + "learning_rate": 3.2118987341772158e-06, + "loss": 0.0087, + "step": 27325 + }, + { + "epoch": 15.03, + "learning_rate": 3.2055696202531648e-06, + "loss": 0.0131, + "step": 27350 + }, + { + "epoch": 15.03, + "learning_rate": 3.199240506329114e-06, + "loss": 0.0073, + "step": 27375 + }, + { + "epoch": 15.03, + "learning_rate": 3.1929113924050636e-06, + "loss": 0.0086, + "step": 27400 + }, + { + "epoch": 15.03, + "learning_rate": 3.186582278481013e-06, + "loss": 0.0108, + "step": 27425 + }, + { + "epoch": 15.03, + "learning_rate": 3.1802531645569625e-06, + "loss": 0.01, + "step": 27450 + }, + { + "epoch": 15.03, + "learning_rate": 3.1739240506329115e-06, + "loss": 0.0123, + "step": 27475 + }, + { + "epoch": 15.03, + "learning_rate": 3.167594936708861e-06, + "loss": 0.0081, + "step": 27500 + }, + { + "epoch": 15.03, + "learning_rate": 3.1612658227848104e-06, + "loss": 0.0123, + "step": 27525 + }, + { + "epoch": 15.03, + "learning_rate": 3.1549367088607598e-06, + "loss": 0.0096, + "step": 27550 + }, + { + "epoch": 15.03, + "learning_rate": 3.1486075949367088e-06, + "loss": 0.0054, + "step": 27575 + }, + { + "epoch": 15.03, + "learning_rate": 3.1422784810126582e-06, + "loss": 0.006, + "step": 27600 + }, + { + "epoch": 15.03, + "learning_rate": 3.1359493670886076e-06, + "loss": 0.0089, + "step": 27625 + }, + { + "epoch": 15.03, + "learning_rate": 3.129620253164557e-06, + "loss": 0.0109, + "step": 27650 + }, + { + "epoch": 15.04, + "learning_rate": 3.1232911392405065e-06, + "loss": 0.0089, + "step": 27675 + }, + { + "epoch": 15.04, + "learning_rate": 3.1169620253164563e-06, + "loss": 0.0094, + "step": 27700 + }, + { + "epoch": 15.04, + "learning_rate": 3.1106329113924054e-06, + "loss": 0.0102, + "step": 27725 + }, + { + "epoch": 15.04, + "learning_rate": 3.1043037974683548e-06, + "loss": 0.0097, + "step": 27750 + }, + { + "epoch": 15.04, + "learning_rate": 3.097974683544304e-06, + "loss": 0.0078, + "step": 27775 + }, + { + "epoch": 15.04, + "learning_rate": 3.0916455696202536e-06, + "loss": 0.0073, + "step": 27800 + }, + { + "epoch": 15.04, + "learning_rate": 3.085316455696203e-06, + "loss": 0.0105, + "step": 27825 + }, + { + "epoch": 15.04, + "learning_rate": 3.078987341772152e-06, + "loss": 0.0083, + "step": 27850 + }, + { + "epoch": 15.04, + "learning_rate": 3.0726582278481015e-06, + "loss": 0.0138, + "step": 27875 + }, + { + "epoch": 15.04, + "learning_rate": 3.066329113924051e-06, + "loss": 0.0049, + "step": 27900 + }, + { + "epoch": 15.04, + "learning_rate": 3.0600000000000003e-06, + "loss": 0.0107, + "step": 27925 + }, + { + "epoch": 15.04, + "learning_rate": 3.0536708860759494e-06, + "loss": 0.0105, + "step": 27950 + }, + { + "epoch": 15.04, + "learning_rate": 3.0473417721518988e-06, + "loss": 0.0134, + "step": 27975 + }, + { + "epoch": 15.04, + "learning_rate": 3.041012658227848e-06, + "loss": 0.0137, + "step": 28000 + }, + { + "epoch": 15.04, + "eval_loss": 0.2780563235282898, + "eval_runtime": 1465.9542, + "eval_samples_per_second": 7.102, + "eval_steps_per_second": 0.444, + "eval_wer": 25.010162766874256, + "step": 28000 + }, + { + "epoch": 16.0, + "learning_rate": 3.0346835443037976e-06, + "loss": 0.009, + "step": 28025 + }, + { + "epoch": 16.0, + "learning_rate": 3.028354430379747e-06, + "loss": 0.0081, + "step": 28050 + }, + { + "epoch": 16.0, + "learning_rate": 3.022025316455696e-06, + "loss": 0.0109, + "step": 28075 + }, + { + "epoch": 16.0, + "learning_rate": 3.015696202531646e-06, + "loss": 0.0132, + "step": 28100 + }, + { + "epoch": 16.0, + "learning_rate": 3.0093670886075953e-06, + "loss": 0.0142, + "step": 28125 + }, + { + "epoch": 16.0, + "learning_rate": 3.0030379746835448e-06, + "loss": 0.0152, + "step": 28150 + }, + { + "epoch": 16.0, + "learning_rate": 2.996708860759494e-06, + "loss": 0.0074, + "step": 28175 + }, + { + "epoch": 16.0, + "learning_rate": 2.990379746835443e-06, + "loss": 0.0129, + "step": 28200 + }, + { + "epoch": 16.01, + "learning_rate": 2.9840506329113926e-06, + "loss": 0.0118, + "step": 28225 + }, + { + "epoch": 16.01, + "learning_rate": 2.977721518987342e-06, + "loss": 0.0153, + "step": 28250 + }, + { + "epoch": 16.01, + "learning_rate": 2.9713924050632915e-06, + "loss": 0.0121, + "step": 28275 + }, + { + "epoch": 16.01, + "learning_rate": 2.965063291139241e-06, + "loss": 0.0097, + "step": 28300 + }, + { + "epoch": 16.01, + "learning_rate": 2.95873417721519e-06, + "loss": 0.0083, + "step": 28325 + }, + { + "epoch": 16.01, + "learning_rate": 2.9524050632911393e-06, + "loss": 0.01, + "step": 28350 + }, + { + "epoch": 16.01, + "learning_rate": 2.9460759493670888e-06, + "loss": 0.0084, + "step": 28375 + }, + { + "epoch": 16.01, + "learning_rate": 2.939746835443038e-06, + "loss": 0.0124, + "step": 28400 + }, + { + "epoch": 16.01, + "learning_rate": 2.933417721518987e-06, + "loss": 0.0071, + "step": 28425 + }, + { + "epoch": 16.01, + "learning_rate": 2.9270886075949366e-06, + "loss": 0.0113, + "step": 28450 + }, + { + "epoch": 16.01, + "learning_rate": 2.920759493670886e-06, + "loss": 0.0085, + "step": 28475 + }, + { + "epoch": 16.01, + "learning_rate": 2.914430379746836e-06, + "loss": 0.0133, + "step": 28500 + }, + { + "epoch": 16.01, + "learning_rate": 2.9081012658227853e-06, + "loss": 0.014, + "step": 28525 + }, + { + "epoch": 16.01, + "learning_rate": 2.9017721518987348e-06, + "loss": 0.0105, + "step": 28550 + }, + { + "epoch": 16.01, + "learning_rate": 2.8954430379746838e-06, + "loss": 0.01, + "step": 28575 + }, + { + "epoch": 16.01, + "learning_rate": 2.889113924050633e-06, + "loss": 0.0135, + "step": 28600 + }, + { + "epoch": 16.02, + "learning_rate": 2.8827848101265826e-06, + "loss": 0.0118, + "step": 28625 + }, + { + "epoch": 16.02, + "learning_rate": 2.876455696202532e-06, + "loss": 0.0111, + "step": 28650 + }, + { + "epoch": 16.02, + "learning_rate": 2.8701265822784815e-06, + "loss": 0.008, + "step": 28675 + }, + { + "epoch": 16.02, + "learning_rate": 2.8637974683544305e-06, + "loss": 0.0103, + "step": 28700 + }, + { + "epoch": 16.02, + "learning_rate": 2.85746835443038e-06, + "loss": 0.0082, + "step": 28725 + }, + { + "epoch": 16.02, + "learning_rate": 2.8511392405063293e-06, + "loss": 0.0057, + "step": 28750 + }, + { + "epoch": 16.02, + "learning_rate": 2.8448101265822788e-06, + "loss": 0.0062, + "step": 28775 + }, + { + "epoch": 16.02, + "learning_rate": 2.8384810126582278e-06, + "loss": 0.0092, + "step": 28800 + }, + { + "epoch": 16.02, + "learning_rate": 2.832151898734177e-06, + "loss": 0.0084, + "step": 28825 + }, + { + "epoch": 16.02, + "learning_rate": 2.8258227848101266e-06, + "loss": 0.0099, + "step": 28850 + }, + { + "epoch": 16.02, + "learning_rate": 2.819493670886076e-06, + "loss": 0.0117, + "step": 28875 + }, + { + "epoch": 16.02, + "learning_rate": 2.8131645569620255e-06, + "loss": 0.0117, + "step": 28900 + }, + { + "epoch": 16.02, + "learning_rate": 2.8068354430379753e-06, + "loss": 0.0127, + "step": 28925 + }, + { + "epoch": 16.02, + "learning_rate": 2.8005063291139243e-06, + "loss": 0.0123, + "step": 28950 + }, + { + "epoch": 16.02, + "learning_rate": 2.7941772151898738e-06, + "loss": 0.0128, + "step": 28975 + }, + { + "epoch": 16.02, + "learning_rate": 2.787848101265823e-06, + "loss": 0.0073, + "step": 29000 + }, + { + "epoch": 16.02, + "eval_loss": 0.2685891091823578, + "eval_runtime": 1456.562, + "eval_samples_per_second": 7.148, + "eval_steps_per_second": 0.447, + "eval_wer": 23.278427291500677, + "step": 29000 + }, + { + "epoch": 16.03, + "learning_rate": 2.7815189873417726e-06, + "loss": 0.0068, + "step": 29025 + }, + { + "epoch": 16.03, + "learning_rate": 2.7751898734177216e-06, + "loss": 0.0087, + "step": 29050 + }, + { + "epoch": 16.03, + "learning_rate": 2.768860759493671e-06, + "loss": 0.0084, + "step": 29075 + }, + { + "epoch": 16.03, + "learning_rate": 2.7625316455696205e-06, + "loss": 0.0039, + "step": 29100 + }, + { + "epoch": 16.03, + "learning_rate": 2.75620253164557e-06, + "loss": 0.0066, + "step": 29125 + }, + { + "epoch": 16.03, + "learning_rate": 2.7498734177215193e-06, + "loss": 0.0108, + "step": 29150 + }, + { + "epoch": 16.03, + "learning_rate": 2.7435443037974683e-06, + "loss": 0.0052, + "step": 29175 + }, + { + "epoch": 16.03, + "learning_rate": 2.7372151898734178e-06, + "loss": 0.0061, + "step": 29200 + }, + { + "epoch": 16.03, + "learning_rate": 2.730886075949367e-06, + "loss": 0.0083, + "step": 29225 + }, + { + "epoch": 16.03, + "learning_rate": 2.7245569620253166e-06, + "loss": 0.0062, + "step": 29250 + }, + { + "epoch": 16.03, + "learning_rate": 2.7182278481012656e-06, + "loss": 0.0078, + "step": 29275 + }, + { + "epoch": 16.03, + "learning_rate": 2.711898734177215e-06, + "loss": 0.0127, + "step": 29300 + }, + { + "epoch": 16.03, + "learning_rate": 2.705569620253165e-06, + "loss": 0.0092, + "step": 29325 + }, + { + "epoch": 16.03, + "learning_rate": 2.6992405063291143e-06, + "loss": 0.0087, + "step": 29350 + }, + { + "epoch": 16.03, + "learning_rate": 2.6929113924050638e-06, + "loss": 0.0062, + "step": 29375 + }, + { + "epoch": 16.03, + "learning_rate": 2.686582278481013e-06, + "loss": 0.0065, + "step": 29400 + }, + { + "epoch": 16.04, + "learning_rate": 2.680253164556962e-06, + "loss": 0.0057, + "step": 29425 + }, + { + "epoch": 16.04, + "learning_rate": 2.6739240506329116e-06, + "loss": 0.0071, + "step": 29450 + }, + { + "epoch": 16.04, + "learning_rate": 2.667594936708861e-06, + "loss": 0.01, + "step": 29475 + }, + { + "epoch": 16.04, + "learning_rate": 2.6612658227848105e-06, + "loss": 0.009, + "step": 29500 + }, + { + "epoch": 16.04, + "learning_rate": 2.65493670886076e-06, + "loss": 0.0091, + "step": 29525 + }, + { + "epoch": 16.04, + "learning_rate": 2.648607594936709e-06, + "loss": 0.0104, + "step": 29550 + }, + { + "epoch": 16.04, + "learning_rate": 2.6422784810126583e-06, + "loss": 0.0071, + "step": 29575 + }, + { + "epoch": 16.04, + "learning_rate": 2.6359493670886078e-06, + "loss": 0.0081, + "step": 29600 + }, + { + "epoch": 16.04, + "learning_rate": 2.629620253164557e-06, + "loss": 0.008, + "step": 29625 + }, + { + "epoch": 16.04, + "learning_rate": 2.623291139240506e-06, + "loss": 0.0067, + "step": 29650 + }, + { + "epoch": 16.04, + "learning_rate": 2.6169620253164556e-06, + "loss": 0.0066, + "step": 29675 + }, + { + "epoch": 16.04, + "learning_rate": 2.610632911392405e-06, + "loss": 0.0121, + "step": 29700 + }, + { + "epoch": 16.04, + "learning_rate": 2.6043037974683545e-06, + "loss": 0.0075, + "step": 29725 + }, + { + "epoch": 16.04, + "learning_rate": 2.5979746835443043e-06, + "loss": 0.0111, + "step": 29750 + }, + { + "epoch": 17.0, + "learning_rate": 2.5916455696202538e-06, + "loss": 0.0095, + "step": 29775 + }, + { + "epoch": 17.0, + "learning_rate": 2.5853164556962028e-06, + "loss": 0.0077, + "step": 29800 + }, + { + "epoch": 17.0, + "learning_rate": 2.578987341772152e-06, + "loss": 0.0047, + "step": 29825 + }, + { + "epoch": 17.0, + "learning_rate": 2.5726582278481016e-06, + "loss": 0.0092, + "step": 29850 + }, + { + "epoch": 17.0, + "learning_rate": 2.566329113924051e-06, + "loss": 0.0191, + "step": 29875 + }, + { + "epoch": 17.0, + "learning_rate": 2.56e-06, + "loss": 0.0102, + "step": 29900 + }, + { + "epoch": 17.0, + "learning_rate": 2.5536708860759495e-06, + "loss": 0.0146, + "step": 29925 + }, + { + "epoch": 17.0, + "learning_rate": 2.547341772151899e-06, + "loss": 0.0096, + "step": 29950 + }, + { + "epoch": 17.01, + "learning_rate": 2.5410126582278483e-06, + "loss": 0.0159, + "step": 29975 + }, + { + "epoch": 17.01, + "learning_rate": 2.5346835443037978e-06, + "loss": 0.0094, + "step": 30000 + }, + { + "epoch": 17.01, + "eval_loss": 0.2636851966381073, + "eval_runtime": 1508.7196, + "eval_samples_per_second": 6.901, + "eval_steps_per_second": 0.431, + "eval_wer": 23.029642758418838, + "step": 30000 + }, + { + "epoch": 17.01, + "learning_rate": 2.5283544303797468e-06, + "loss": 0.0078, + "step": 30025 + }, + { + "epoch": 17.01, + "learning_rate": 2.522025316455696e-06, + "loss": 0.01, + "step": 30050 + }, + { + "epoch": 17.01, + "learning_rate": 2.5156962025316456e-06, + "loss": 0.0058, + "step": 30075 + }, + { + "epoch": 17.01, + "learning_rate": 2.509367088607595e-06, + "loss": 0.0092, + "step": 30100 + }, + { + "epoch": 17.01, + "learning_rate": 2.503037974683544e-06, + "loss": 0.0084, + "step": 30125 + }, + { + "epoch": 17.01, + "learning_rate": 2.496708860759494e-06, + "loss": 0.0069, + "step": 30150 + }, + { + "epoch": 17.01, + "learning_rate": 2.4903797468354433e-06, + "loss": 0.0059, + "step": 30175 + }, + { + "epoch": 17.01, + "learning_rate": 2.4840506329113923e-06, + "loss": 0.0097, + "step": 30200 + }, + { + "epoch": 17.01, + "learning_rate": 2.4777215189873418e-06, + "loss": 0.0126, + "step": 30225 + }, + { + "epoch": 17.01, + "learning_rate": 2.4713924050632916e-06, + "loss": 0.0096, + "step": 30250 + }, + { + "epoch": 17.01, + "learning_rate": 2.4650632911392406e-06, + "loss": 0.0094, + "step": 30275 + }, + { + "epoch": 17.01, + "learning_rate": 2.45873417721519e-06, + "loss": 0.0111, + "step": 30300 + }, + { + "epoch": 17.01, + "learning_rate": 2.4524050632911395e-06, + "loss": 0.0062, + "step": 30325 + }, + { + "epoch": 17.01, + "learning_rate": 2.446075949367089e-06, + "loss": 0.0078, + "step": 30350 + }, + { + "epoch": 17.02, + "learning_rate": 2.4397468354430383e-06, + "loss": 0.0043, + "step": 30375 + }, + { + "epoch": 17.02, + "learning_rate": 2.4336708860759494e-06, + "loss": 0.0102, + "step": 30400 + }, + { + "epoch": 17.02, + "learning_rate": 2.4273417721518988e-06, + "loss": 0.0101, + "step": 30425 + }, + { + "epoch": 17.02, + "learning_rate": 2.4210126582278482e-06, + "loss": 0.0066, + "step": 30450 + }, + { + "epoch": 17.02, + "learning_rate": 2.4146835443037976e-06, + "loss": 0.0078, + "step": 30475 + }, + { + "epoch": 17.02, + "learning_rate": 2.408354430379747e-06, + "loss": 0.0074, + "step": 30500 + }, + { + "epoch": 17.02, + "learning_rate": 2.4020253164556965e-06, + "loss": 0.0072, + "step": 30525 + }, + { + "epoch": 17.02, + "learning_rate": 2.395696202531646e-06, + "loss": 0.0109, + "step": 30550 + }, + { + "epoch": 17.02, + "learning_rate": 2.389367088607595e-06, + "loss": 0.0113, + "step": 30575 + }, + { + "epoch": 17.02, + "learning_rate": 2.3830379746835444e-06, + "loss": 0.0109, + "step": 30600 + }, + { + "epoch": 17.02, + "learning_rate": 2.3767088607594938e-06, + "loss": 0.0089, + "step": 30625 + }, + { + "epoch": 17.02, + "learning_rate": 2.3703797468354432e-06, + "loss": 0.0062, + "step": 30650 + }, + { + "epoch": 17.02, + "learning_rate": 2.3640506329113926e-06, + "loss": 0.0101, + "step": 30675 + }, + { + "epoch": 17.02, + "learning_rate": 2.357721518987342e-06, + "loss": 0.0096, + "step": 30700 + }, + { + "epoch": 17.02, + "learning_rate": 2.3513924050632915e-06, + "loss": 0.0093, + "step": 30725 + }, + { + "epoch": 17.02, + "learning_rate": 2.3450632911392405e-06, + "loss": 0.0113, + "step": 30750 + }, + { + "epoch": 17.03, + "learning_rate": 2.33873417721519e-06, + "loss": 0.0094, + "step": 30775 + }, + { + "epoch": 17.03, + "learning_rate": 2.3324050632911394e-06, + "loss": 0.0086, + "step": 30800 + }, + { + "epoch": 17.03, + "learning_rate": 2.3260759493670888e-06, + "loss": 0.0096, + "step": 30825 + }, + { + "epoch": 17.03, + "learning_rate": 2.319746835443038e-06, + "loss": 0.0086, + "step": 30850 + }, + { + "epoch": 17.03, + "learning_rate": 2.3134177215189876e-06, + "loss": 0.0064, + "step": 30875 + }, + { + "epoch": 17.03, + "learning_rate": 2.307088607594937e-06, + "loss": 0.0107, + "step": 30900 + }, + { + "epoch": 17.03, + "learning_rate": 2.3007594936708865e-06, + "loss": 0.0073, + "step": 30925 + }, + { + "epoch": 17.03, + "learning_rate": 2.2944303797468355e-06, + "loss": 0.005, + "step": 30950 + }, + { + "epoch": 17.03, + "learning_rate": 2.288101265822785e-06, + "loss": 0.0055, + "step": 30975 + }, + { + "epoch": 17.03, + "learning_rate": 2.2817721518987344e-06, + "loss": 0.006, + "step": 31000 + }, + { + "epoch": 17.03, + "eval_loss": 0.2709895372390747, + "eval_runtime": 1461.5544, + "eval_samples_per_second": 7.123, + "eval_steps_per_second": 0.445, + "eval_wer": 23.29468771849949, + "step": 31000 + }, + { + "epoch": 17.03, + "learning_rate": 2.2754430379746838e-06, + "loss": 0.0094, + "step": 31025 + }, + { + "epoch": 17.03, + "learning_rate": 2.2691139240506328e-06, + "loss": 0.0088, + "step": 31050 + }, + { + "epoch": 17.03, + "learning_rate": 2.2627848101265826e-06, + "loss": 0.0057, + "step": 31075 + }, + { + "epoch": 17.03, + "learning_rate": 2.256455696202532e-06, + "loss": 0.0066, + "step": 31100 + }, + { + "epoch": 17.03, + "learning_rate": 2.250126582278481e-06, + "loss": 0.008, + "step": 31125 + }, + { + "epoch": 17.03, + "learning_rate": 2.2437974683544305e-06, + "loss": 0.0048, + "step": 31150 + }, + { + "epoch": 17.04, + "learning_rate": 2.23746835443038e-06, + "loss": 0.0074, + "step": 31175 + }, + { + "epoch": 17.04, + "learning_rate": 2.2311392405063294e-06, + "loss": 0.0061, + "step": 31200 + }, + { + "epoch": 17.04, + "learning_rate": 2.2248101265822788e-06, + "loss": 0.0088, + "step": 31225 + }, + { + "epoch": 17.04, + "learning_rate": 2.2184810126582278e-06, + "loss": 0.0087, + "step": 31250 + }, + { + "epoch": 17.04, + "learning_rate": 2.2121518987341776e-06, + "loss": 0.0089, + "step": 31275 + }, + { + "epoch": 17.04, + "learning_rate": 2.2058227848101266e-06, + "loss": 0.0061, + "step": 31300 + }, + { + "epoch": 17.04, + "learning_rate": 2.199493670886076e-06, + "loss": 0.0097, + "step": 31325 + }, + { + "epoch": 17.04, + "learning_rate": 2.1931645569620255e-06, + "loss": 0.0063, + "step": 31350 + }, + { + "epoch": 17.04, + "learning_rate": 2.186835443037975e-06, + "loss": 0.0084, + "step": 31375 + }, + { + "epoch": 17.04, + "learning_rate": 2.1805063291139243e-06, + "loss": 0.0089, + "step": 31400 + }, + { + "epoch": 17.04, + "learning_rate": 2.1741772151898734e-06, + "loss": 0.0077, + "step": 31425 + }, + { + "epoch": 17.04, + "learning_rate": 2.1678481012658228e-06, + "loss": 0.0153, + "step": 31450 + }, + { + "epoch": 17.04, + "learning_rate": 2.1615189873417726e-06, + "loss": 0.0088, + "step": 31475 + }, + { + "epoch": 17.04, + "learning_rate": 2.1551898734177216e-06, + "loss": 0.0095, + "step": 31500 + }, + { + "epoch": 18.0, + "learning_rate": 2.148860759493671e-06, + "loss": 0.0084, + "step": 31525 + }, + { + "epoch": 18.0, + "learning_rate": 2.1425316455696205e-06, + "loss": 0.0054, + "step": 31550 + }, + { + "epoch": 18.0, + "learning_rate": 2.13620253164557e-06, + "loss": 0.013, + "step": 31575 + }, + { + "epoch": 18.0, + "learning_rate": 2.129873417721519e-06, + "loss": 0.0074, + "step": 31600 + }, + { + "epoch": 18.0, + "learning_rate": 2.1235443037974684e-06, + "loss": 0.0083, + "step": 31625 + }, + { + "epoch": 18.0, + "learning_rate": 2.1172151898734178e-06, + "loss": 0.0118, + "step": 31650 + }, + { + "epoch": 18.0, + "learning_rate": 2.110886075949367e-06, + "loss": 0.0095, + "step": 31675 + }, + { + "epoch": 18.0, + "learning_rate": 2.1045569620253166e-06, + "loss": 0.0075, + "step": 31700 + }, + { + "epoch": 18.01, + "learning_rate": 2.098227848101266e-06, + "loss": 0.0149, + "step": 31725 + }, + { + "epoch": 18.01, + "learning_rate": 2.0918987341772155e-06, + "loss": 0.0095, + "step": 31750 + }, + { + "epoch": 18.01, + "learning_rate": 2.085569620253165e-06, + "loss": 0.0098, + "step": 31775 + }, + { + "epoch": 18.01, + "learning_rate": 2.079240506329114e-06, + "loss": 0.0058, + "step": 31800 + }, + { + "epoch": 18.01, + "learning_rate": 2.0729113924050633e-06, + "loss": 0.0072, + "step": 31825 + }, + { + "epoch": 18.01, + "learning_rate": 2.0665822784810128e-06, + "loss": 0.0103, + "step": 31850 + }, + { + "epoch": 18.01, + "learning_rate": 2.060253164556962e-06, + "loss": 0.0085, + "step": 31875 + }, + { + "epoch": 18.01, + "learning_rate": 2.0539240506329116e-06, + "loss": 0.0064, + "step": 31900 + }, + { + "epoch": 18.01, + "learning_rate": 2.047594936708861e-06, + "loss": 0.0077, + "step": 31925 + }, + { + "epoch": 18.01, + "learning_rate": 2.0412658227848105e-06, + "loss": 0.0086, + "step": 31950 + }, + { + "epoch": 18.01, + "learning_rate": 2.0349367088607595e-06, + "loss": 0.0136, + "step": 31975 + }, + { + "epoch": 18.01, + "learning_rate": 2.028607594936709e-06, + "loss": 0.0085, + "step": 32000 + }, + { + "epoch": 18.01, + "eval_loss": 0.26488059759140015, + "eval_runtime": 1466.953, + "eval_samples_per_second": 7.097, + "eval_steps_per_second": 0.444, + "eval_wer": 23.089806338314446, + "step": 32000 + }, + { + "epoch": 18.01, + "learning_rate": 2.0222784810126583e-06, + "loss": 0.011, + "step": 32025 + }, + { + "epoch": 18.01, + "learning_rate": 2.0159493670886078e-06, + "loss": 0.0086, + "step": 32050 + }, + { + "epoch": 18.01, + "learning_rate": 2.009620253164557e-06, + "loss": 0.008, + "step": 32075 + }, + { + "epoch": 18.01, + "learning_rate": 2.0032911392405066e-06, + "loss": 0.0083, + "step": 32100 + }, + { + "epoch": 18.02, + "learning_rate": 1.996962025316456e-06, + "loss": 0.0073, + "step": 32125 + }, + { + "epoch": 18.02, + "learning_rate": 1.9906329113924055e-06, + "loss": 0.0036, + "step": 32150 + }, + { + "epoch": 18.02, + "learning_rate": 1.9843037974683545e-06, + "loss": 0.0096, + "step": 32175 + }, + { + "epoch": 18.02, + "learning_rate": 1.977974683544304e-06, + "loss": 0.0071, + "step": 32200 + }, + { + "epoch": 18.02, + "learning_rate": 1.9716455696202533e-06, + "loss": 0.009, + "step": 32225 + }, + { + "epoch": 18.02, + "learning_rate": 1.9653164556962028e-06, + "loss": 0.0093, + "step": 32250 + }, + { + "epoch": 18.02, + "learning_rate": 1.9589873417721518e-06, + "loss": 0.0071, + "step": 32275 + }, + { + "epoch": 18.02, + "learning_rate": 1.9526582278481016e-06, + "loss": 0.0037, + "step": 32300 + }, + { + "epoch": 18.02, + "learning_rate": 1.946329113924051e-06, + "loss": 0.007, + "step": 32325 + }, + { + "epoch": 18.02, + "learning_rate": 1.94e-06, + "loss": 0.0104, + "step": 32350 + }, + { + "epoch": 18.02, + "learning_rate": 1.9336708860759495e-06, + "loss": 0.0082, + "step": 32375 + }, + { + "epoch": 18.02, + "learning_rate": 1.927341772151899e-06, + "loss": 0.0095, + "step": 32400 + }, + { + "epoch": 18.02, + "learning_rate": 1.9210126582278483e-06, + "loss": 0.0101, + "step": 32425 + }, + { + "epoch": 18.02, + "learning_rate": 1.9146835443037973e-06, + "loss": 0.0122, + "step": 32450 + }, + { + "epoch": 18.02, + "learning_rate": 1.9083544303797468e-06, + "loss": 0.0082, + "step": 32475 + }, + { + "epoch": 18.02, + "learning_rate": 1.9020253164556964e-06, + "loss": 0.0093, + "step": 32500 + }, + { + "epoch": 18.03, + "learning_rate": 1.8956962025316458e-06, + "loss": 0.0073, + "step": 32525 + }, + { + "epoch": 18.03, + "learning_rate": 1.889367088607595e-06, + "loss": 0.0061, + "step": 32550 + }, + { + "epoch": 18.03, + "learning_rate": 1.8830379746835445e-06, + "loss": 0.0072, + "step": 32575 + }, + { + "epoch": 18.03, + "learning_rate": 1.876708860759494e-06, + "loss": 0.0064, + "step": 32600 + }, + { + "epoch": 18.03, + "learning_rate": 1.8703797468354431e-06, + "loss": 0.0084, + "step": 32625 + }, + { + "epoch": 18.03, + "learning_rate": 1.8640506329113926e-06, + "loss": 0.0067, + "step": 32650 + }, + { + "epoch": 18.03, + "learning_rate": 1.8577215189873418e-06, + "loss": 0.0052, + "step": 32675 + }, + { + "epoch": 18.03, + "learning_rate": 1.8513924050632912e-06, + "loss": 0.0085, + "step": 32700 + }, + { + "epoch": 18.03, + "learning_rate": 1.8450632911392408e-06, + "loss": 0.0077, + "step": 32725 + }, + { + "epoch": 18.03, + "learning_rate": 1.83873417721519e-06, + "loss": 0.0068, + "step": 32750 + }, + { + "epoch": 18.03, + "learning_rate": 1.8324050632911395e-06, + "loss": 0.0079, + "step": 32775 + }, + { + "epoch": 18.03, + "learning_rate": 1.8260759493670887e-06, + "loss": 0.008, + "step": 32800 + }, + { + "epoch": 18.03, + "learning_rate": 1.8197468354430381e-06, + "loss": 0.0071, + "step": 32825 + }, + { + "epoch": 18.03, + "learning_rate": 1.8134177215189873e-06, + "loss": 0.0091, + "step": 32850 + }, + { + "epoch": 18.03, + "learning_rate": 1.8070886075949368e-06, + "loss": 0.0059, + "step": 32875 + }, + { + "epoch": 18.03, + "learning_rate": 1.8007594936708862e-06, + "loss": 0.0079, + "step": 32900 + }, + { + "epoch": 18.04, + "learning_rate": 1.7944303797468356e-06, + "loss": 0.0055, + "step": 32925 + }, + { + "epoch": 18.04, + "learning_rate": 1.788101265822785e-06, + "loss": 0.0095, + "step": 32950 + }, + { + "epoch": 18.04, + "learning_rate": 1.7817721518987343e-06, + "loss": 0.0086, + "step": 32975 + }, + { + "epoch": 18.04, + "learning_rate": 1.7754430379746837e-06, + "loss": 0.0057, + "step": 33000 + }, + { + "epoch": 18.04, + "eval_loss": 0.2678743898868561, + "eval_runtime": 1484.126, + "eval_samples_per_second": 7.015, + "eval_steps_per_second": 0.439, + "eval_wer": 23.405258622091416, + "step": 33000 + }, + { + "epoch": 18.04, + "learning_rate": 1.7691139240506331e-06, + "loss": 0.0085, + "step": 33025 + }, + { + "epoch": 18.04, + "learning_rate": 1.7627848101265823e-06, + "loss": 0.0073, + "step": 33050 + }, + { + "epoch": 18.04, + "learning_rate": 1.7564556962025318e-06, + "loss": 0.0045, + "step": 33075 + }, + { + "epoch": 18.04, + "learning_rate": 1.750126582278481e-06, + "loss": 0.0065, + "step": 33100 + }, + { + "epoch": 18.04, + "learning_rate": 1.7437974683544306e-06, + "loss": 0.0076, + "step": 33125 + }, + { + "epoch": 18.04, + "learning_rate": 1.73746835443038e-06, + "loss": 0.007, + "step": 33150 + }, + { + "epoch": 18.04, + "learning_rate": 1.7311392405063293e-06, + "loss": 0.0109, + "step": 33175 + }, + { + "epoch": 18.04, + "learning_rate": 1.7248101265822787e-06, + "loss": 0.0095, + "step": 33200 + }, + { + "epoch": 18.04, + "learning_rate": 1.718481012658228e-06, + "loss": 0.0109, + "step": 33225 + }, + { + "epoch": 18.04, + "learning_rate": 1.7121518987341773e-06, + "loss": 0.0117, + "step": 33250 + }, + { + "epoch": 19.0, + "learning_rate": 1.7058227848101266e-06, + "loss": 0.0038, + "step": 33275 + }, + { + "epoch": 19.0, + "learning_rate": 1.699493670886076e-06, + "loss": 0.013, + "step": 33300 + }, + { + "epoch": 19.0, + "learning_rate": 1.6931645569620256e-06, + "loss": 0.0104, + "step": 33325 + }, + { + "epoch": 19.0, + "learning_rate": 1.6868354430379748e-06, + "loss": 0.0064, + "step": 33350 + }, + { + "epoch": 19.0, + "learning_rate": 1.6805063291139243e-06, + "loss": 0.0137, + "step": 33375 + }, + { + "epoch": 19.0, + "learning_rate": 1.6741772151898735e-06, + "loss": 0.012, + "step": 33400 + }, + { + "epoch": 19.0, + "learning_rate": 1.667848101265823e-06, + "loss": 0.0106, + "step": 33425 + }, + { + "epoch": 19.0, + "learning_rate": 1.6615189873417723e-06, + "loss": 0.0072, + "step": 33450 + }, + { + "epoch": 19.01, + "learning_rate": 1.6551898734177216e-06, + "loss": 0.0154, + "step": 33475 + }, + { + "epoch": 19.01, + "learning_rate": 1.648860759493671e-06, + "loss": 0.0129, + "step": 33500 + }, + { + "epoch": 19.01, + "learning_rate": 1.6425316455696206e-06, + "loss": 0.008, + "step": 33525 + }, + { + "epoch": 19.01, + "learning_rate": 1.6362025316455698e-06, + "loss": 0.0071, + "step": 33550 + }, + { + "epoch": 19.01, + "learning_rate": 1.6298734177215193e-06, + "loss": 0.0088, + "step": 33575 + }, + { + "epoch": 19.01, + "learning_rate": 1.6235443037974685e-06, + "loss": 0.0102, + "step": 33600 + }, + { + "epoch": 19.01, + "learning_rate": 1.617215189873418e-06, + "loss": 0.0064, + "step": 33625 + }, + { + "epoch": 19.01, + "learning_rate": 1.6108860759493671e-06, + "loss": 0.0098, + "step": 33650 + }, + { + "epoch": 19.01, + "learning_rate": 1.6045569620253166e-06, + "loss": 0.009, + "step": 33675 + }, + { + "epoch": 19.01, + "learning_rate": 1.5982278481012658e-06, + "loss": 0.0074, + "step": 33700 + }, + { + "epoch": 19.01, + "learning_rate": 1.5918987341772152e-06, + "loss": 0.0055, + "step": 33725 + }, + { + "epoch": 19.01, + "learning_rate": 1.5855696202531648e-06, + "loss": 0.0059, + "step": 33750 + }, + { + "epoch": 19.01, + "learning_rate": 1.579240506329114e-06, + "loss": 0.0102, + "step": 33775 + }, + { + "epoch": 19.01, + "learning_rate": 1.5729113924050635e-06, + "loss": 0.0111, + "step": 33800 + }, + { + "epoch": 19.01, + "learning_rate": 1.5665822784810127e-06, + "loss": 0.0033, + "step": 33825 + }, + { + "epoch": 19.01, + "learning_rate": 1.5602531645569621e-06, + "loss": 0.0057, + "step": 33850 + }, + { + "epoch": 19.02, + "learning_rate": 1.5539240506329115e-06, + "loss": 0.0074, + "step": 33875 + }, + { + "epoch": 19.02, + "learning_rate": 1.5475949367088608e-06, + "loss": 0.0056, + "step": 33900 + }, + { + "epoch": 19.02, + "learning_rate": 1.5412658227848102e-06, + "loss": 0.0053, + "step": 33925 + }, + { + "epoch": 19.02, + "learning_rate": 1.5349367088607598e-06, + "loss": 0.0049, + "step": 33950 + }, + { + "epoch": 19.02, + "learning_rate": 1.528607594936709e-06, + "loss": 0.0065, + "step": 33975 + }, + { + "epoch": 19.02, + "learning_rate": 1.5222784810126585e-06, + "loss": 0.007, + "step": 34000 + }, + { + "epoch": 19.02, + "eval_loss": 0.2660910487174988, + "eval_runtime": 1471.5922, + "eval_samples_per_second": 7.075, + "eval_steps_per_second": 0.442, + "eval_wer": 23.14834387551017, + "step": 34000 + }, + { + "epoch": 19.02, + "learning_rate": 1.5159493670886077e-06, + "loss": 0.0027, + "step": 34025 + }, + { + "epoch": 19.02, + "learning_rate": 1.5096202531645571e-06, + "loss": 0.0081, + "step": 34050 + }, + { + "epoch": 19.02, + "learning_rate": 1.5032911392405063e-06, + "loss": 0.0044, + "step": 34075 + }, + { + "epoch": 19.02, + "learning_rate": 1.4969620253164558e-06, + "loss": 0.0093, + "step": 34100 + }, + { + "epoch": 19.02, + "learning_rate": 1.490632911392405e-06, + "loss": 0.0067, + "step": 34125 + }, + { + "epoch": 19.02, + "learning_rate": 1.4843037974683546e-06, + "loss": 0.0047, + "step": 34150 + }, + { + "epoch": 19.02, + "learning_rate": 1.477974683544304e-06, + "loss": 0.0119, + "step": 34175 + }, + { + "epoch": 19.02, + "learning_rate": 1.4716455696202533e-06, + "loss": 0.0091, + "step": 34200 + }, + { + "epoch": 19.02, + "learning_rate": 1.4653164556962027e-06, + "loss": 0.0094, + "step": 34225 + }, + { + "epoch": 19.02, + "learning_rate": 1.458987341772152e-06, + "loss": 0.0069, + "step": 34250 + }, + { + "epoch": 19.03, + "learning_rate": 1.4526582278481013e-06, + "loss": 0.0066, + "step": 34275 + }, + { + "epoch": 19.03, + "learning_rate": 1.4463291139240508e-06, + "loss": 0.0072, + "step": 34300 + }, + { + "epoch": 19.03, + "learning_rate": 1.44e-06, + "loss": 0.0067, + "step": 34325 + }, + { + "epoch": 19.03, + "learning_rate": 1.4336708860759496e-06, + "loss": 0.0076, + "step": 34350 + }, + { + "epoch": 19.03, + "learning_rate": 1.427341772151899e-06, + "loss": 0.0083, + "step": 34375 + }, + { + "epoch": 19.03, + "learning_rate": 1.4212658227848103e-06, + "loss": 0.0072, + "step": 34400 + }, + { + "epoch": 19.03, + "learning_rate": 1.4149367088607597e-06, + "loss": 0.0086, + "step": 34425 + }, + { + "epoch": 19.03, + "learning_rate": 1.408607594936709e-06, + "loss": 0.0057, + "step": 34450 + }, + { + "epoch": 19.03, + "learning_rate": 1.4022784810126584e-06, + "loss": 0.0111, + "step": 34475 + }, + { + "epoch": 19.03, + "learning_rate": 1.3959493670886076e-06, + "loss": 0.01, + "step": 34500 + }, + { + "epoch": 19.03, + "learning_rate": 1.389620253164557e-06, + "loss": 0.008, + "step": 34525 + }, + { + "epoch": 19.03, + "learning_rate": 1.3832911392405066e-06, + "loss": 0.0082, + "step": 34550 + }, + { + "epoch": 19.03, + "learning_rate": 1.3769620253164559e-06, + "loss": 0.0062, + "step": 34575 + }, + { + "epoch": 19.03, + "learning_rate": 1.3706329113924053e-06, + "loss": 0.0052, + "step": 34600 + }, + { + "epoch": 19.03, + "learning_rate": 1.3643037974683545e-06, + "loss": 0.007, + "step": 34625 + }, + { + "epoch": 19.03, + "learning_rate": 1.357974683544304e-06, + "loss": 0.0045, + "step": 34650 + }, + { + "epoch": 19.04, + "learning_rate": 1.3516455696202531e-06, + "loss": 0.0089, + "step": 34675 + }, + { + "epoch": 19.04, + "learning_rate": 1.3453164556962026e-06, + "loss": 0.0081, + "step": 34700 + }, + { + "epoch": 19.04, + "learning_rate": 1.338987341772152e-06, + "loss": 0.0101, + "step": 34725 + }, + { + "epoch": 19.04, + "learning_rate": 1.3326582278481014e-06, + "loss": 0.0072, + "step": 34750 + }, + { + "epoch": 19.04, + "learning_rate": 1.3263291139240509e-06, + "loss": 0.005, + "step": 34775 + }, + { + "epoch": 19.04, + "learning_rate": 1.32e-06, + "loss": 0.0083, + "step": 34800 + }, + { + "epoch": 19.04, + "learning_rate": 1.3136708860759495e-06, + "loss": 0.0087, + "step": 34825 + }, + { + "epoch": 19.04, + "learning_rate": 1.307341772151899e-06, + "loss": 0.0067, + "step": 34850 + }, + { + "epoch": 19.04, + "learning_rate": 1.3010126582278481e-06, + "loss": 0.0048, + "step": 34875 + }, + { + "epoch": 19.04, + "learning_rate": 1.2946835443037976e-06, + "loss": 0.0065, + "step": 34900 + }, + { + "epoch": 19.04, + "learning_rate": 1.2883544303797468e-06, + "loss": 0.0076, + "step": 34925 + }, + { + "epoch": 19.04, + "learning_rate": 1.2820253164556964e-06, + "loss": 0.007, + "step": 34950 + }, + { + "epoch": 19.04, + "learning_rate": 1.2756962025316458e-06, + "loss": 0.0044, + "step": 34975 + }, + { + "epoch": 19.04, + "learning_rate": 1.269367088607595e-06, + "loss": 0.0082, + "step": 35000 + }, + { + "epoch": 19.04, + "eval_loss": 0.26719748973846436, + "eval_runtime": 1465.7868, + "eval_samples_per_second": 7.103, + "eval_steps_per_second": 0.444, + "eval_wer": 24.169498691035628, + "step": 35000 + }, + { + "epoch": 20.0, + "learning_rate": 1.2630379746835445e-06, + "loss": 0.0072, + "step": 35025 + }, + { + "epoch": 20.0, + "learning_rate": 1.2567088607594937e-06, + "loss": 0.0078, + "step": 35050 + }, + { + "epoch": 20.0, + "learning_rate": 1.2503797468354431e-06, + "loss": 0.0069, + "step": 35075 + }, + { + "epoch": 20.0, + "learning_rate": 1.2440506329113924e-06, + "loss": 0.0063, + "step": 35100 + }, + { + "epoch": 20.0, + "learning_rate": 1.237721518987342e-06, + "loss": 0.0073, + "step": 35125 + }, + { + "epoch": 20.0, + "learning_rate": 1.2313924050632912e-06, + "loss": 0.0076, + "step": 35150 + }, + { + "epoch": 20.0, + "learning_rate": 1.2250632911392406e-06, + "loss": 0.0084, + "step": 35175 + }, + { + "epoch": 20.0, + "learning_rate": 1.2187341772151899e-06, + "loss": 0.0061, + "step": 35200 + }, + { + "epoch": 20.01, + "learning_rate": 1.2124050632911393e-06, + "loss": 0.0074, + "step": 35225 + }, + { + "epoch": 20.01, + "learning_rate": 1.2060759493670887e-06, + "loss": 0.011, + "step": 35250 + }, + { + "epoch": 20.01, + "learning_rate": 1.1997468354430381e-06, + "loss": 0.0076, + "step": 35275 + }, + { + "epoch": 20.01, + "learning_rate": 1.1934177215189874e-06, + "loss": 0.007, + "step": 35300 + }, + { + "epoch": 20.01, + "learning_rate": 1.1870886075949368e-06, + "loss": 0.0074, + "step": 35325 + }, + { + "epoch": 20.01, + "learning_rate": 1.1807594936708862e-06, + "loss": 0.0077, + "step": 35350 + }, + { + "epoch": 20.01, + "learning_rate": 1.1744303797468354e-06, + "loss": 0.0074, + "step": 35375 + }, + { + "epoch": 20.01, + "learning_rate": 1.1681012658227848e-06, + "loss": 0.007, + "step": 35400 + }, + { + "epoch": 20.01, + "learning_rate": 1.1617721518987343e-06, + "loss": 0.0068, + "step": 35425 + }, + { + "epoch": 20.01, + "learning_rate": 1.1554430379746837e-06, + "loss": 0.0049, + "step": 35450 + }, + { + "epoch": 20.01, + "learning_rate": 1.149113924050633e-06, + "loss": 0.0074, + "step": 35475 + }, + { + "epoch": 20.01, + "learning_rate": 1.1427848101265823e-06, + "loss": 0.0097, + "step": 35500 + }, + { + "epoch": 20.01, + "learning_rate": 1.1364556962025318e-06, + "loss": 0.0065, + "step": 35525 + }, + { + "epoch": 20.01, + "learning_rate": 1.1301265822784812e-06, + "loss": 0.0056, + "step": 35550 + }, + { + "epoch": 20.01, + "learning_rate": 1.1237974683544304e-06, + "loss": 0.0056, + "step": 35575 + }, + { + "epoch": 20.01, + "learning_rate": 1.1174683544303798e-06, + "loss": 0.0056, + "step": 35600 + }, + { + "epoch": 20.02, + "learning_rate": 1.1111392405063293e-06, + "loss": 0.0104, + "step": 35625 + }, + { + "epoch": 20.02, + "learning_rate": 1.1048101265822787e-06, + "loss": 0.0071, + "step": 35650 + }, + { + "epoch": 20.02, + "learning_rate": 1.098481012658228e-06, + "loss": 0.0079, + "step": 35675 + }, + { + "epoch": 20.02, + "learning_rate": 1.0921518987341773e-06, + "loss": 0.0077, + "step": 35700 + }, + { + "epoch": 20.02, + "learning_rate": 1.0858227848101268e-06, + "loss": 0.0038, + "step": 35725 + }, + { + "epoch": 20.02, + "learning_rate": 1.079493670886076e-06, + "loss": 0.0035, + "step": 35750 + }, + { + "epoch": 20.02, + "learning_rate": 1.0731645569620254e-06, + "loss": 0.0047, + "step": 35775 + }, + { + "epoch": 20.02, + "learning_rate": 1.0668354430379746e-06, + "loss": 0.0079, + "step": 35800 + }, + { + "epoch": 20.02, + "learning_rate": 1.0605063291139243e-06, + "loss": 0.006, + "step": 35825 + }, + { + "epoch": 20.02, + "learning_rate": 1.0541772151898735e-06, + "loss": 0.0084, + "step": 35850 + }, + { + "epoch": 20.02, + "learning_rate": 1.047848101265823e-06, + "loss": 0.0073, + "step": 35875 + }, + { + "epoch": 20.02, + "learning_rate": 1.0415189873417721e-06, + "loss": 0.0049, + "step": 35900 + }, + { + "epoch": 20.02, + "learning_rate": 1.0351898734177216e-06, + "loss": 0.0114, + "step": 35925 + }, + { + "epoch": 20.02, + "learning_rate": 1.028860759493671e-06, + "loss": 0.0086, + "step": 35950 + }, + { + "epoch": 20.02, + "learning_rate": 1.0225316455696204e-06, + "loss": 0.0088, + "step": 35975 + }, + { + "epoch": 20.02, + "learning_rate": 1.0162025316455696e-06, + "loss": 0.0054, + "step": 36000 + }, + { + "epoch": 20.02, + "eval_loss": 0.26699596643447876, + "eval_runtime": 1450.3878, + "eval_samples_per_second": 7.178, + "eval_steps_per_second": 0.449, + "eval_wer": 22.689799834143646, + "step": 36000 + }, + { + "epoch": 20.03, + "learning_rate": 1.009873417721519e-06, + "loss": 0.005, + "step": 36025 + }, + { + "epoch": 20.03, + "learning_rate": 1.0035443037974685e-06, + "loss": 0.0058, + "step": 36050 + }, + { + "epoch": 20.03, + "learning_rate": 9.97215189873418e-07, + "loss": 0.0068, + "step": 36075 + }, + { + "epoch": 20.03, + "learning_rate": 9.908860759493671e-07, + "loss": 0.0074, + "step": 36100 + }, + { + "epoch": 20.03, + "learning_rate": 9.845569620253166e-07, + "loss": 0.0081, + "step": 36125 + }, + { + "epoch": 20.03, + "learning_rate": 9.78227848101266e-07, + "loss": 0.0063, + "step": 36150 + }, + { + "epoch": 20.03, + "learning_rate": 9.718987341772152e-07, + "loss": 0.0085, + "step": 36175 + }, + { + "epoch": 20.03, + "learning_rate": 9.655696202531646e-07, + "loss": 0.0059, + "step": 36200 + }, + { + "epoch": 20.03, + "learning_rate": 9.592405063291138e-07, + "loss": 0.0079, + "step": 36225 + }, + { + "epoch": 20.03, + "learning_rate": 9.529113924050634e-07, + "loss": 0.0047, + "step": 36250 + }, + { + "epoch": 20.03, + "learning_rate": 9.465822784810127e-07, + "loss": 0.0058, + "step": 36275 + }, + { + "epoch": 20.03, + "learning_rate": 9.402531645569621e-07, + "loss": 0.0039, + "step": 36300 + }, + { + "epoch": 20.03, + "learning_rate": 9.339240506329115e-07, + "loss": 0.0054, + "step": 36325 + }, + { + "epoch": 20.03, + "learning_rate": 9.275949367088609e-07, + "loss": 0.0074, + "step": 36350 + }, + { + "epoch": 20.03, + "learning_rate": 9.212658227848102e-07, + "loss": 0.0063, + "step": 36375 + }, + { + "epoch": 20.03, + "learning_rate": 9.149367088607595e-07, + "loss": 0.004, + "step": 36400 + }, + { + "epoch": 20.04, + "learning_rate": 9.086075949367088e-07, + "loss": 0.0071, + "step": 36425 + }, + { + "epoch": 20.04, + "learning_rate": 9.022784810126584e-07, + "loss": 0.0075, + "step": 36450 + }, + { + "epoch": 20.04, + "learning_rate": 8.959493670886077e-07, + "loss": 0.006, + "step": 36475 + }, + { + "epoch": 20.04, + "learning_rate": 8.89873417721519e-07, + "loss": 0.0063, + "step": 36500 + }, + { + "epoch": 20.04, + "learning_rate": 8.835443037974684e-07, + "loss": 0.0042, + "step": 36525 + }, + { + "epoch": 20.04, + "learning_rate": 8.772151898734178e-07, + "loss": 0.0038, + "step": 36550 + }, + { + "epoch": 20.04, + "learning_rate": 8.708860759493671e-07, + "loss": 0.0058, + "step": 36575 + }, + { + "epoch": 20.04, + "learning_rate": 8.645569620253165e-07, + "loss": 0.0061, + "step": 36600 + }, + { + "epoch": 20.04, + "learning_rate": 8.582278481012659e-07, + "loss": 0.0046, + "step": 36625 + }, + { + "epoch": 20.04, + "learning_rate": 8.518987341772153e-07, + "loss": 0.006, + "step": 36650 + }, + { + "epoch": 20.04, + "learning_rate": 8.455696202531646e-07, + "loss": 0.0082, + "step": 36675 + }, + { + "epoch": 20.04, + "learning_rate": 8.392405063291139e-07, + "loss": 0.0074, + "step": 36700 + }, + { + "epoch": 20.04, + "learning_rate": 8.329113924050633e-07, + "loss": 0.0107, + "step": 36725 + }, + { + "epoch": 20.04, + "learning_rate": 8.265822784810128e-07, + "loss": 0.0062, + "step": 36750 + }, + { + "epoch": 21.0, + "learning_rate": 8.202531645569621e-07, + "loss": 0.0127, + "step": 36775 + }, + { + "epoch": 21.0, + "learning_rate": 8.139240506329114e-07, + "loss": 0.0061, + "step": 36800 + }, + { + "epoch": 21.0, + "learning_rate": 8.075949367088608e-07, + "loss": 0.005, + "step": 36825 + }, + { + "epoch": 21.0, + "learning_rate": 8.012658227848103e-07, + "loss": 0.0071, + "step": 36850 + }, + { + "epoch": 21.0, + "learning_rate": 7.949367088607596e-07, + "loss": 0.008, + "step": 36875 + }, + { + "epoch": 21.0, + "learning_rate": 7.886075949367089e-07, + "loss": 0.0091, + "step": 36900 + }, + { + "epoch": 21.0, + "learning_rate": 7.822784810126583e-07, + "loss": 0.008, + "step": 36925 + }, + { + "epoch": 21.0, + "learning_rate": 7.759493670886077e-07, + "loss": 0.006, + "step": 36950 + }, + { + "epoch": 21.01, + "learning_rate": 7.69873417721519e-07, + "loss": 0.0083, + "step": 36975 + }, + { + "epoch": 21.01, + "learning_rate": 7.635443037974683e-07, + "loss": 0.0078, + "step": 37000 + }, + { + "epoch": 21.01, + "eval_loss": 0.2638327479362488, + "eval_runtime": 1492.7276, + "eval_samples_per_second": 6.974, + "eval_steps_per_second": 0.436, + "eval_wer": 23.06053756971658, + "step": 37000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.707819540553728e+20, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-37000/training_args.bin b/checkpoint-37000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-37000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-38000/config.json b/checkpoint-38000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-38000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-38000/generation_config.json b/checkpoint-38000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-38000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-38000/optimizer.pt b/checkpoint-38000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..061fcbc7302a46342783ae9ce76bb37ace19d60b --- /dev/null +++ b/checkpoint-38000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8df52767f5a0a4c8e2163b6e413c20dc190a0312e920b661a8ef8bd7b40265a +size 1934161093 diff --git a/checkpoint-38000/preprocessor_config.json b/checkpoint-38000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-38000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-38000/pytorch_model.bin b/checkpoint-38000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a4026c16298917188aca5371e58d37059579a20d --- /dev/null +++ b/checkpoint-38000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a9548c887e24b061df453d098b45e4af91947e8209e55e5f7c4b1ee73b749bd +size 967102601 diff --git a/checkpoint-38000/rng_state.pth b/checkpoint-38000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e7e97141793dc7d9ed075f1755148a1f52c60770 --- /dev/null +++ b/checkpoint-38000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3234c0c8118f623bad1b3fac4b704ae330521152beafeb17d864117522f52b9f +size 14639 diff --git a/checkpoint-38000/scaler.pt b/checkpoint-38000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0cbda1eb5b731d11c037ee2eddb9b6c278326917 --- /dev/null +++ b/checkpoint-38000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45ce26b33db97566e375c9d7868fd1fa731aeba5341a0e582e20533ec09b5cdd +size 557 diff --git a/checkpoint-38000/scheduler.pt b/checkpoint-38000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8169598942b8d747415b7431a2d30086822d40c1 --- /dev/null +++ b/checkpoint-38000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9231df3e3e9554eb3eb1b05c9ee8ad8585812b27835805aa68106aa065a33e7 +size 627 diff --git a/checkpoint-38000/trainer_state.json b/checkpoint-38000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..01fdc8c8baa1b18239dc680be47ff5ebc2db075f --- /dev/null +++ b/checkpoint-38000/trainer_state.json @@ -0,0 +1,9478 @@ +{ + "best_metric": 22.437763215662045, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-38000", + "epoch": 21.030725, + "global_step": 38000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + }, + { + "epoch": 7.02, + "learning_rate": 6.830632911392405e-06, + "loss": 0.0159, + "step": 13025 + }, + { + "epoch": 7.02, + "learning_rate": 6.824303797468354e-06, + "loss": 0.0186, + "step": 13050 + }, + { + "epoch": 7.02, + "learning_rate": 6.817974683544304e-06, + "loss": 0.0246, + "step": 13075 + }, + { + "epoch": 7.02, + "learning_rate": 6.811645569620253e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 7.02, + "learning_rate": 6.805316455696203e-06, + "loss": 0.0287, + "step": 13125 + }, + { + "epoch": 7.02, + "learning_rate": 6.798987341772152e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 7.02, + "learning_rate": 6.792658227848102e-06, + "loss": 0.035, + "step": 13175 + }, + { + "epoch": 7.02, + "learning_rate": 6.786329113924052e-06, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 7.02, + "learning_rate": 6.780000000000001e-06, + "loss": 0.0283, + "step": 13225 + }, + { + "epoch": 7.02, + "learning_rate": 6.773670886075951e-06, + "loss": 0.0203, + "step": 13250 + }, + { + "epoch": 7.03, + "learning_rate": 6.7673417721519e-06, + "loss": 0.0193, + "step": 13275 + }, + { + "epoch": 7.03, + "learning_rate": 6.7610126582278494e-06, + "loss": 0.0174, + "step": 13300 + }, + { + "epoch": 7.03, + "learning_rate": 6.754683544303798e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 7.03, + "learning_rate": 6.7483544303797474e-06, + "loss": 0.023, + "step": 13350 + }, + { + "epoch": 7.03, + "learning_rate": 6.742025316455697e-06, + "loss": 0.0266, + "step": 13375 + }, + { + "epoch": 7.03, + "learning_rate": 6.735696202531646e-06, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 7.03, + "learning_rate": 6.729367088607596e-06, + "loss": 0.0164, + "step": 13425 + }, + { + "epoch": 7.03, + "learning_rate": 6.723037974683545e-06, + "loss": 0.019, + "step": 13450 + }, + { + "epoch": 7.03, + "learning_rate": 6.716708860759495e-06, + "loss": 0.0209, + "step": 13475 + }, + { + "epoch": 7.03, + "learning_rate": 6.710379746835444e-06, + "loss": 0.0241, + "step": 13500 + }, + { + "epoch": 7.03, + "learning_rate": 6.7040506329113934e-06, + "loss": 0.0233, + "step": 13525 + }, + { + "epoch": 7.03, + "learning_rate": 6.697721518987342e-06, + "loss": 0.0204, + "step": 13550 + }, + { + "epoch": 7.03, + "learning_rate": 6.6913924050632914e-06, + "loss": 0.0131, + "step": 13575 + }, + { + "epoch": 7.03, + "learning_rate": 6.685063291139241e-06, + "loss": 0.0268, + "step": 13600 + }, + { + "epoch": 7.03, + "learning_rate": 6.67873417721519e-06, + "loss": 0.0156, + "step": 13625 + }, + { + "epoch": 7.03, + "learning_rate": 6.67240506329114e-06, + "loss": 0.0205, + "step": 13650 + }, + { + "epoch": 7.04, + "learning_rate": 6.666075949367089e-06, + "loss": 0.0217, + "step": 13675 + }, + { + "epoch": 7.04, + "learning_rate": 6.659746835443039e-06, + "loss": 0.0258, + "step": 13700 + }, + { + "epoch": 7.04, + "learning_rate": 6.653417721518988e-06, + "loss": 0.0211, + "step": 13725 + }, + { + "epoch": 7.04, + "learning_rate": 6.647088607594937e-06, + "loss": 0.0188, + "step": 13750 + }, + { + "epoch": 7.04, + "learning_rate": 6.640759493670886e-06, + "loss": 0.0234, + "step": 13775 + }, + { + "epoch": 7.04, + "learning_rate": 6.6344303797468355e-06, + "loss": 0.0202, + "step": 13800 + }, + { + "epoch": 7.04, + "learning_rate": 6.628101265822785e-06, + "loss": 0.0251, + "step": 13825 + }, + { + "epoch": 7.04, + "learning_rate": 6.621772151898734e-06, + "loss": 0.017, + "step": 13850 + }, + { + "epoch": 7.04, + "learning_rate": 6.615443037974684e-06, + "loss": 0.0254, + "step": 13875 + }, + { + "epoch": 7.04, + "learning_rate": 6.609113924050633e-06, + "loss": 0.0289, + "step": 13900 + }, + { + "epoch": 7.04, + "learning_rate": 6.602784810126583e-06, + "loss": 0.0238, + "step": 13925 + }, + { + "epoch": 7.04, + "learning_rate": 6.596455696202532e-06, + "loss": 0.0218, + "step": 13950 + }, + { + "epoch": 7.04, + "learning_rate": 6.590126582278481e-06, + "loss": 0.0289, + "step": 13975 + }, + { + "epoch": 7.04, + "learning_rate": 6.583797468354432e-06, + "loss": 0.0271, + "step": 14000 + }, + { + "epoch": 7.04, + "eval_loss": 0.26684999465942383, + "eval_runtime": 1448.0228, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "eval_wer": 42.464105107400115, + "step": 14000 + }, + { + "epoch": 8.0, + "learning_rate": 6.577468354430381e-06, + "loss": 0.0292, + "step": 14025 + }, + { + "epoch": 8.0, + "learning_rate": 6.57113924050633e-06, + "loss": 0.0192, + "step": 14050 + }, + { + "epoch": 8.0, + "learning_rate": 6.564810126582279e-06, + "loss": 0.027, + "step": 14075 + }, + { + "epoch": 8.0, + "learning_rate": 6.558481012658229e-06, + "loss": 0.03, + "step": 14100 + }, + { + "epoch": 8.0, + "learning_rate": 6.552151898734178e-06, + "loss": 0.0375, + "step": 14125 + }, + { + "epoch": 8.0, + "learning_rate": 6.5458227848101274e-06, + "loss": 0.0338, + "step": 14150 + }, + { + "epoch": 8.0, + "learning_rate": 6.539493670886077e-06, + "loss": 0.0305, + "step": 14175 + }, + { + "epoch": 8.0, + "learning_rate": 6.533164556962026e-06, + "loss": 0.0266, + "step": 14200 + }, + { + "epoch": 8.01, + "learning_rate": 6.526835443037976e-06, + "loss": 0.0378, + "step": 14225 + }, + { + "epoch": 8.01, + "learning_rate": 6.520506329113925e-06, + "loss": 0.0256, + "step": 14250 + }, + { + "epoch": 8.01, + "learning_rate": 6.514177215189874e-06, + "loss": 0.0203, + "step": 14275 + }, + { + "epoch": 8.01, + "learning_rate": 6.507848101265823e-06, + "loss": 0.0212, + "step": 14300 + }, + { + "epoch": 8.01, + "learning_rate": 6.501518987341773e-06, + "loss": 0.0204, + "step": 14325 + }, + { + "epoch": 8.01, + "learning_rate": 6.495189873417722e-06, + "loss": 0.0259, + "step": 14350 + }, + { + "epoch": 8.01, + "learning_rate": 6.4888607594936714e-06, + "loss": 0.0214, + "step": 14375 + }, + { + "epoch": 8.01, + "learning_rate": 6.482531645569621e-06, + "loss": 0.021, + "step": 14400 + }, + { + "epoch": 8.01, + "learning_rate": 6.47620253164557e-06, + "loss": 0.0241, + "step": 14425 + }, + { + "epoch": 8.01, + "learning_rate": 6.46987341772152e-06, + "loss": 0.0216, + "step": 14450 + }, + { + "epoch": 8.01, + "learning_rate": 6.463544303797469e-06, + "loss": 0.0209, + "step": 14475 + }, + { + "epoch": 8.01, + "learning_rate": 6.457215189873418e-06, + "loss": 0.0292, + "step": 14500 + }, + { + "epoch": 8.01, + "learning_rate": 6.450886075949367e-06, + "loss": 0.0286, + "step": 14525 + }, + { + "epoch": 8.01, + "learning_rate": 6.444556962025317e-06, + "loss": 0.0218, + "step": 14550 + }, + { + "epoch": 8.01, + "learning_rate": 6.438227848101266e-06, + "loss": 0.0209, + "step": 14575 + }, + { + "epoch": 8.01, + "learning_rate": 6.4318987341772154e-06, + "loss": 0.0244, + "step": 14600 + }, + { + "epoch": 8.02, + "learning_rate": 6.425569620253165e-06, + "loss": 0.0212, + "step": 14625 + }, + { + "epoch": 8.02, + "learning_rate": 6.419240506329114e-06, + "loss": 0.0155, + "step": 14650 + }, + { + "epoch": 8.02, + "learning_rate": 6.412911392405064e-06, + "loss": 0.0209, + "step": 14675 + }, + { + "epoch": 8.02, + "learning_rate": 6.406582278481013e-06, + "loss": 0.0203, + "step": 14700 + }, + { + "epoch": 8.02, + "learning_rate": 6.400253164556962e-06, + "loss": 0.0192, + "step": 14725 + }, + { + "epoch": 8.02, + "learning_rate": 6.393924050632911e-06, + "loss": 0.0216, + "step": 14750 + }, + { + "epoch": 8.02, + "learning_rate": 6.387594936708861e-06, + "loss": 0.012, + "step": 14775 + }, + { + "epoch": 8.02, + "learning_rate": 6.38126582278481e-06, + "loss": 0.0241, + "step": 14800 + }, + { + "epoch": 8.02, + "learning_rate": 6.37493670886076e-06, + "loss": 0.0192, + "step": 14825 + }, + { + "epoch": 8.02, + "learning_rate": 6.36860759493671e-06, + "loss": 0.0202, + "step": 14850 + }, + { + "epoch": 8.02, + "learning_rate": 6.362278481012659e-06, + "loss": 0.0257, + "step": 14875 + }, + { + "epoch": 8.02, + "learning_rate": 6.3559493670886086e-06, + "loss": 0.0218, + "step": 14900 + }, + { + "epoch": 8.02, + "learning_rate": 6.349620253164558e-06, + "loss": 0.0232, + "step": 14925 + }, + { + "epoch": 8.02, + "learning_rate": 6.3432911392405074e-06, + "loss": 0.03, + "step": 14950 + }, + { + "epoch": 8.02, + "learning_rate": 6.337215189873418e-06, + "loss": 0.0252, + "step": 14975 + }, + { + "epoch": 8.02, + "learning_rate": 6.3308860759493675e-06, + "loss": 0.0176, + "step": 15000 + }, + { + "epoch": 8.02, + "eval_loss": 0.2596448063850403, + "eval_runtime": 1417.6481, + "eval_samples_per_second": 7.344, + "eval_steps_per_second": 0.459, + "eval_wer": 28.52241499861786, + "step": 15000 + }, + { + "epoch": 8.03, + "learning_rate": 6.324556962025317e-06, + "loss": 0.0166, + "step": 15025 + }, + { + "epoch": 8.03, + "learning_rate": 6.318227848101266e-06, + "loss": 0.017, + "step": 15050 + }, + { + "epoch": 8.03, + "learning_rate": 6.311898734177216e-06, + "loss": 0.0291, + "step": 15075 + }, + { + "epoch": 8.03, + "learning_rate": 6.305569620253164e-06, + "loss": 0.0149, + "step": 15100 + }, + { + "epoch": 8.03, + "learning_rate": 6.299240506329114e-06, + "loss": 0.0223, + "step": 15125 + }, + { + "epoch": 8.03, + "learning_rate": 6.292911392405063e-06, + "loss": 0.0212, + "step": 15150 + }, + { + "epoch": 8.03, + "learning_rate": 6.286582278481013e-06, + "loss": 0.0125, + "step": 15175 + }, + { + "epoch": 8.03, + "learning_rate": 6.280253164556962e-06, + "loss": 0.0169, + "step": 15200 + }, + { + "epoch": 8.03, + "learning_rate": 6.273924050632912e-06, + "loss": 0.0244, + "step": 15225 + }, + { + "epoch": 8.03, + "learning_rate": 6.267594936708862e-06, + "loss": 0.015, + "step": 15250 + }, + { + "epoch": 8.03, + "learning_rate": 6.261265822784811e-06, + "loss": 0.0171, + "step": 15275 + }, + { + "epoch": 8.03, + "learning_rate": 6.254936708860761e-06, + "loss": 0.017, + "step": 15300 + }, + { + "epoch": 8.03, + "learning_rate": 6.24860759493671e-06, + "loss": 0.0162, + "step": 15325 + }, + { + "epoch": 8.03, + "learning_rate": 6.2422784810126594e-06, + "loss": 0.0133, + "step": 15350 + }, + { + "epoch": 8.03, + "learning_rate": 6.235949367088608e-06, + "loss": 0.0139, + "step": 15375 + }, + { + "epoch": 8.03, + "learning_rate": 6.2296202531645575e-06, + "loss": 0.0177, + "step": 15400 + }, + { + "epoch": 8.04, + "learning_rate": 6.223291139240507e-06, + "loss": 0.0207, + "step": 15425 + }, + { + "epoch": 8.04, + "learning_rate": 6.216962025316456e-06, + "loss": 0.0192, + "step": 15450 + }, + { + "epoch": 8.04, + "learning_rate": 6.210632911392406e-06, + "loss": 0.0176, + "step": 15475 + }, + { + "epoch": 8.04, + "learning_rate": 6.204303797468355e-06, + "loss": 0.0133, + "step": 15500 + }, + { + "epoch": 8.04, + "learning_rate": 6.197974683544305e-06, + "loss": 0.0199, + "step": 15525 + }, + { + "epoch": 8.04, + "learning_rate": 6.191645569620254e-06, + "loss": 0.0135, + "step": 15550 + }, + { + "epoch": 8.04, + "learning_rate": 6.1853164556962035e-06, + "loss": 0.0121, + "step": 15575 + }, + { + "epoch": 8.04, + "learning_rate": 6.178987341772152e-06, + "loss": 0.0152, + "step": 15600 + }, + { + "epoch": 8.04, + "learning_rate": 6.1726582278481015e-06, + "loss": 0.0201, + "step": 15625 + }, + { + "epoch": 8.04, + "learning_rate": 6.166329113924051e-06, + "loss": 0.0226, + "step": 15650 + }, + { + "epoch": 8.04, + "learning_rate": 6.16e-06, + "loss": 0.0222, + "step": 15675 + }, + { + "epoch": 8.04, + "learning_rate": 6.15367088607595e-06, + "loss": 0.0184, + "step": 15700 + }, + { + "epoch": 8.04, + "learning_rate": 6.147341772151899e-06, + "loss": 0.0219, + "step": 15725 + }, + { + "epoch": 8.04, + "learning_rate": 6.141012658227849e-06, + "loss": 0.0219, + "step": 15750 + }, + { + "epoch": 9.0, + "learning_rate": 6.134683544303798e-06, + "loss": 0.0208, + "step": 15775 + }, + { + "epoch": 9.0, + "learning_rate": 6.1283544303797475e-06, + "loss": 0.0143, + "step": 15800 + }, + { + "epoch": 9.0, + "learning_rate": 6.122025316455696e-06, + "loss": 0.0254, + "step": 15825 + }, + { + "epoch": 9.0, + "learning_rate": 6.1156962025316455e-06, + "loss": 0.0287, + "step": 15850 + }, + { + "epoch": 9.0, + "learning_rate": 6.109367088607595e-06, + "loss": 0.0252, + "step": 15875 + }, + { + "epoch": 9.0, + "learning_rate": 6.103037974683544e-06, + "loss": 0.0275, + "step": 15900 + }, + { + "epoch": 9.0, + "learning_rate": 6.096708860759494e-06, + "loss": 0.0193, + "step": 15925 + }, + { + "epoch": 9.0, + "learning_rate": 6.090379746835443e-06, + "loss": 0.0255, + "step": 15950 + }, + { + "epoch": 9.01, + "learning_rate": 6.084050632911393e-06, + "loss": 0.0248, + "step": 15975 + }, + { + "epoch": 9.01, + "learning_rate": 6.077721518987342e-06, + "loss": 0.0328, + "step": 16000 + }, + { + "epoch": 9.01, + "eval_loss": 0.2512108087539673, + "eval_runtime": 1474.2883, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.442, + "eval_wer": 35.66236849379665, + "step": 16000 + }, + { + "epoch": 9.01, + "learning_rate": 6.0713924050632915e-06, + "loss": 0.0181, + "step": 16025 + }, + { + "epoch": 9.01, + "learning_rate": 6.065063291139242e-06, + "loss": 0.0134, + "step": 16050 + }, + { + "epoch": 9.01, + "learning_rate": 6.058734177215191e-06, + "loss": 0.0203, + "step": 16075 + }, + { + "epoch": 9.01, + "learning_rate": 6.052405063291141e-06, + "loss": 0.0224, + "step": 16100 + }, + { + "epoch": 9.01, + "learning_rate": 6.046075949367089e-06, + "loss": 0.0164, + "step": 16125 + }, + { + "epoch": 9.01, + "learning_rate": 6.039746835443039e-06, + "loss": 0.019, + "step": 16150 + }, + { + "epoch": 9.01, + "learning_rate": 6.033417721518988e-06, + "loss": 0.0152, + "step": 16175 + }, + { + "epoch": 9.01, + "learning_rate": 6.0270886075949374e-06, + "loss": 0.0197, + "step": 16200 + }, + { + "epoch": 9.01, + "learning_rate": 6.020759493670887e-06, + "loss": 0.0188, + "step": 16225 + }, + { + "epoch": 9.01, + "learning_rate": 6.014430379746836e-06, + "loss": 0.0245, + "step": 16250 + }, + { + "epoch": 9.01, + "learning_rate": 6.008101265822786e-06, + "loss": 0.0202, + "step": 16275 + }, + { + "epoch": 9.01, + "learning_rate": 6.001772151898735e-06, + "loss": 0.0184, + "step": 16300 + }, + { + "epoch": 9.01, + "learning_rate": 5.995443037974685e-06, + "loss": 0.0158, + "step": 16325 + }, + { + "epoch": 9.01, + "learning_rate": 5.989113924050633e-06, + "loss": 0.0175, + "step": 16350 + }, + { + "epoch": 9.02, + "learning_rate": 5.982784810126583e-06, + "loss": 0.014, + "step": 16375 + }, + { + "epoch": 9.02, + "learning_rate": 5.976455696202532e-06, + "loss": 0.0149, + "step": 16400 + }, + { + "epoch": 9.02, + "learning_rate": 5.9701265822784815e-06, + "loss": 0.0188, + "step": 16425 + }, + { + "epoch": 9.02, + "learning_rate": 5.963797468354431e-06, + "loss": 0.0215, + "step": 16450 + }, + { + "epoch": 9.02, + "learning_rate": 5.95746835443038e-06, + "loss": 0.017, + "step": 16475 + }, + { + "epoch": 9.02, + "learning_rate": 5.95113924050633e-06, + "loss": 0.0095, + "step": 16500 + }, + { + "epoch": 9.02, + "learning_rate": 5.944810126582279e-06, + "loss": 0.0112, + "step": 16525 + }, + { + "epoch": 9.02, + "learning_rate": 5.938481012658229e-06, + "loss": 0.0198, + "step": 16550 + }, + { + "epoch": 9.02, + "learning_rate": 5.932151898734177e-06, + "loss": 0.0252, + "step": 16575 + }, + { + "epoch": 9.02, + "learning_rate": 5.925822784810127e-06, + "loss": 0.0226, + "step": 16600 + }, + { + "epoch": 9.02, + "learning_rate": 5.919493670886076e-06, + "loss": 0.0167, + "step": 16625 + }, + { + "epoch": 9.02, + "learning_rate": 5.9131645569620255e-06, + "loss": 0.0259, + "step": 16650 + }, + { + "epoch": 9.02, + "learning_rate": 5.906835443037975e-06, + "loss": 0.0229, + "step": 16675 + }, + { + "epoch": 9.02, + "learning_rate": 5.900506329113924e-06, + "loss": 0.0238, + "step": 16700 + }, + { + "epoch": 9.02, + "learning_rate": 5.894177215189874e-06, + "loss": 0.02, + "step": 16725 + }, + { + "epoch": 9.02, + "learning_rate": 5.887848101265823e-06, + "loss": 0.0182, + "step": 16750 + }, + { + "epoch": 9.03, + "learning_rate": 5.881518987341773e-06, + "loss": 0.0157, + "step": 16775 + }, + { + "epoch": 9.03, + "learning_rate": 5.875189873417721e-06, + "loss": 0.0145, + "step": 16800 + }, + { + "epoch": 9.03, + "learning_rate": 5.868860759493671e-06, + "loss": 0.0166, + "step": 16825 + }, + { + "epoch": 9.03, + "learning_rate": 5.86253164556962e-06, + "loss": 0.0108, + "step": 16850 + }, + { + "epoch": 9.03, + "learning_rate": 5.85620253164557e-06, + "loss": 0.014, + "step": 16875 + }, + { + "epoch": 9.03, + "learning_rate": 5.84987341772152e-06, + "loss": 0.0167, + "step": 16900 + }, + { + "epoch": 9.03, + "learning_rate": 5.843544303797469e-06, + "loss": 0.0121, + "step": 16925 + }, + { + "epoch": 9.03, + "learning_rate": 5.837215189873419e-06, + "loss": 0.0149, + "step": 16950 + }, + { + "epoch": 9.03, + "learning_rate": 5.831139240506329e-06, + "loss": 0.0143, + "step": 16975 + }, + { + "epoch": 9.03, + "learning_rate": 5.824810126582279e-06, + "loss": 0.0139, + "step": 17000 + }, + { + "epoch": 9.03, + "eval_loss": 0.2688957154750824, + "eval_runtime": 1423.1196, + "eval_samples_per_second": 7.316, + "eval_steps_per_second": 0.457, + "eval_wer": 25.920746678807788, + "step": 17000 + }, + { + "epoch": 9.03, + "learning_rate": 5.818481012658228e-06, + "loss": 0.0169, + "step": 17025 + }, + { + "epoch": 9.03, + "learning_rate": 5.8121518987341775e-06, + "loss": 0.013, + "step": 17050 + }, + { + "epoch": 9.03, + "learning_rate": 5.805822784810127e-06, + "loss": 0.0123, + "step": 17075 + }, + { + "epoch": 9.03, + "learning_rate": 5.799493670886076e-06, + "loss": 0.0161, + "step": 17100 + }, + { + "epoch": 9.03, + "learning_rate": 5.793164556962026e-06, + "loss": 0.0114, + "step": 17125 + }, + { + "epoch": 9.03, + "learning_rate": 5.786835443037974e-06, + "loss": 0.0194, + "step": 17150 + }, + { + "epoch": 9.04, + "learning_rate": 5.780506329113924e-06, + "loss": 0.0179, + "step": 17175 + }, + { + "epoch": 9.04, + "learning_rate": 5.774177215189873e-06, + "loss": 0.0165, + "step": 17200 + }, + { + "epoch": 9.04, + "learning_rate": 5.767848101265823e-06, + "loss": 0.0165, + "step": 17225 + }, + { + "epoch": 9.04, + "learning_rate": 5.761518987341773e-06, + "loss": 0.0125, + "step": 17250 + }, + { + "epoch": 9.04, + "learning_rate": 5.755189873417722e-06, + "loss": 0.0172, + "step": 17275 + }, + { + "epoch": 9.04, + "learning_rate": 5.748860759493672e-06, + "loss": 0.0158, + "step": 17300 + }, + { + "epoch": 9.04, + "learning_rate": 5.742531645569621e-06, + "loss": 0.0174, + "step": 17325 + }, + { + "epoch": 9.04, + "learning_rate": 5.736202531645571e-06, + "loss": 0.0162, + "step": 17350 + }, + { + "epoch": 9.04, + "learning_rate": 5.72987341772152e-06, + "loss": 0.0203, + "step": 17375 + }, + { + "epoch": 9.04, + "learning_rate": 5.7235443037974695e-06, + "loss": 0.0157, + "step": 17400 + }, + { + "epoch": 9.04, + "learning_rate": 5.717215189873419e-06, + "loss": 0.018, + "step": 17425 + }, + { + "epoch": 9.04, + "learning_rate": 5.7108860759493675e-06, + "loss": 0.0199, + "step": 17450 + }, + { + "epoch": 9.04, + "learning_rate": 5.704556962025317e-06, + "loss": 0.0153, + "step": 17475 + }, + { + "epoch": 9.04, + "learning_rate": 5.698227848101266e-06, + "loss": 0.0234, + "step": 17500 + }, + { + "epoch": 10.0, + "learning_rate": 5.691898734177216e-06, + "loss": 0.0168, + "step": 17525 + }, + { + "epoch": 10.0, + "learning_rate": 5.685569620253165e-06, + "loss": 0.0214, + "step": 17550 + }, + { + "epoch": 10.0, + "learning_rate": 5.679240506329115e-06, + "loss": 0.0185, + "step": 17575 + }, + { + "epoch": 10.0, + "learning_rate": 5.672911392405064e-06, + "loss": 0.0228, + "step": 17600 + }, + { + "epoch": 10.0, + "learning_rate": 5.6665822784810135e-06, + "loss": 0.024, + "step": 17625 + }, + { + "epoch": 10.0, + "learning_rate": 5.660253164556963e-06, + "loss": 0.0235, + "step": 17650 + }, + { + "epoch": 10.0, + "learning_rate": 5.6539240506329115e-06, + "loss": 0.0262, + "step": 17675 + }, + { + "epoch": 10.0, + "learning_rate": 5.647594936708861e-06, + "loss": 0.0214, + "step": 17700 + }, + { + "epoch": 10.01, + "learning_rate": 5.64126582278481e-06, + "loss": 0.0279, + "step": 17725 + }, + { + "epoch": 10.01, + "learning_rate": 5.63493670886076e-06, + "loss": 0.0248, + "step": 17750 + }, + { + "epoch": 10.01, + "learning_rate": 5.628607594936709e-06, + "loss": 0.0155, + "step": 17775 + }, + { + "epoch": 10.01, + "learning_rate": 5.622278481012659e-06, + "loss": 0.017, + "step": 17800 + }, + { + "epoch": 10.01, + "learning_rate": 5.615949367088608e-06, + "loss": 0.0205, + "step": 17825 + }, + { + "epoch": 10.01, + "learning_rate": 5.6096202531645575e-06, + "loss": 0.021, + "step": 17850 + }, + { + "epoch": 10.01, + "learning_rate": 5.603291139240507e-06, + "loss": 0.0178, + "step": 17875 + }, + { + "epoch": 10.01, + "learning_rate": 5.5969620253164555e-06, + "loss": 0.0169, + "step": 17900 + }, + { + "epoch": 10.01, + "learning_rate": 5.590632911392405e-06, + "loss": 0.0158, + "step": 17925 + }, + { + "epoch": 10.01, + "learning_rate": 5.584303797468354e-06, + "loss": 0.0195, + "step": 17950 + }, + { + "epoch": 10.01, + "learning_rate": 5.577974683544304e-06, + "loss": 0.0195, + "step": 17975 + }, + { + "epoch": 10.01, + "learning_rate": 5.571645569620253e-06, + "loss": 0.0187, + "step": 18000 + }, + { + "epoch": 10.01, + "eval_loss": 0.2605174481868744, + "eval_runtime": 1553.7854, + "eval_samples_per_second": 6.7, + "eval_steps_per_second": 0.419, + "eval_wer": 25.330493178750874, + "step": 18000 + }, + { + "epoch": 10.01, + "learning_rate": 5.565316455696203e-06, + "loss": 0.0214, + "step": 18025 + }, + { + "epoch": 10.01, + "learning_rate": 5.558987341772152e-06, + "loss": 0.0193, + "step": 18050 + }, + { + "epoch": 10.01, + "learning_rate": 5.552658227848102e-06, + "loss": 0.0168, + "step": 18075 + }, + { + "epoch": 10.01, + "learning_rate": 5.546329113924052e-06, + "loss": 0.0147, + "step": 18100 + }, + { + "epoch": 10.02, + "learning_rate": 5.540000000000001e-06, + "loss": 0.0192, + "step": 18125 + }, + { + "epoch": 10.02, + "learning_rate": 5.533670886075951e-06, + "loss": 0.014, + "step": 18150 + }, + { + "epoch": 10.02, + "learning_rate": 5.527341772151899e-06, + "loss": 0.0163, + "step": 18175 + }, + { + "epoch": 10.02, + "learning_rate": 5.521012658227849e-06, + "loss": 0.0139, + "step": 18200 + }, + { + "epoch": 10.02, + "learning_rate": 5.514683544303798e-06, + "loss": 0.0168, + "step": 18225 + }, + { + "epoch": 10.02, + "learning_rate": 5.5083544303797475e-06, + "loss": 0.0118, + "step": 18250 + }, + { + "epoch": 10.02, + "learning_rate": 5.502025316455697e-06, + "loss": 0.0119, + "step": 18275 + }, + { + "epoch": 10.02, + "learning_rate": 5.495696202531646e-06, + "loss": 0.0136, + "step": 18300 + }, + { + "epoch": 10.02, + "learning_rate": 5.489367088607596e-06, + "loss": 0.0132, + "step": 18325 + }, + { + "epoch": 10.02, + "learning_rate": 5.483037974683545e-06, + "loss": 0.0148, + "step": 18350 + }, + { + "epoch": 10.02, + "learning_rate": 5.476708860759495e-06, + "loss": 0.015, + "step": 18375 + }, + { + "epoch": 10.02, + "learning_rate": 5.470379746835443e-06, + "loss": 0.0154, + "step": 18400 + }, + { + "epoch": 10.02, + "learning_rate": 5.464050632911393e-06, + "loss": 0.0139, + "step": 18425 + }, + { + "epoch": 10.02, + "learning_rate": 5.457721518987342e-06, + "loss": 0.0239, + "step": 18450 + }, + { + "epoch": 10.02, + "learning_rate": 5.4513924050632915e-06, + "loss": 0.0203, + "step": 18475 + }, + { + "epoch": 10.02, + "learning_rate": 5.445063291139241e-06, + "loss": 0.0199, + "step": 18500 + }, + { + "epoch": 10.03, + "learning_rate": 5.43873417721519e-06, + "loss": 0.013, + "step": 18525 + }, + { + "epoch": 10.03, + "learning_rate": 5.43240506329114e-06, + "loss": 0.0128, + "step": 18550 + }, + { + "epoch": 10.03, + "learning_rate": 5.426075949367089e-06, + "loss": 0.0161, + "step": 18575 + }, + { + "epoch": 10.03, + "learning_rate": 5.419746835443039e-06, + "loss": 0.0129, + "step": 18600 + }, + { + "epoch": 10.03, + "learning_rate": 5.413417721518987e-06, + "loss": 0.0124, + "step": 18625 + }, + { + "epoch": 10.03, + "learning_rate": 5.407088607594937e-06, + "loss": 0.0161, + "step": 18650 + }, + { + "epoch": 10.03, + "learning_rate": 5.400759493670886e-06, + "loss": 0.0104, + "step": 18675 + }, + { + "epoch": 10.03, + "learning_rate": 5.3944303797468355e-06, + "loss": 0.0109, + "step": 18700 + }, + { + "epoch": 10.03, + "learning_rate": 5.388101265822785e-06, + "loss": 0.0128, + "step": 18725 + }, + { + "epoch": 10.03, + "learning_rate": 5.381772151898734e-06, + "loss": 0.014, + "step": 18750 + }, + { + "epoch": 10.03, + "learning_rate": 5.375443037974684e-06, + "loss": 0.0159, + "step": 18775 + }, + { + "epoch": 10.03, + "learning_rate": 5.369113924050633e-06, + "loss": 0.017, + "step": 18800 + }, + { + "epoch": 10.03, + "learning_rate": 5.362784810126583e-06, + "loss": 0.012, + "step": 18825 + }, + { + "epoch": 10.03, + "learning_rate": 5.356455696202531e-06, + "loss": 0.0165, + "step": 18850 + }, + { + "epoch": 10.03, + "learning_rate": 5.350126582278481e-06, + "loss": 0.0153, + "step": 18875 + }, + { + "epoch": 10.03, + "learning_rate": 5.343797468354432e-06, + "loss": 0.0182, + "step": 18900 + }, + { + "epoch": 10.04, + "learning_rate": 5.33746835443038e-06, + "loss": 0.0194, + "step": 18925 + }, + { + "epoch": 10.04, + "learning_rate": 5.33113924050633e-06, + "loss": 0.0144, + "step": 18950 + }, + { + "epoch": 10.04, + "learning_rate": 5.324810126582279e-06, + "loss": 0.0157, + "step": 18975 + }, + { + "epoch": 10.04, + "learning_rate": 5.318481012658229e-06, + "loss": 0.0111, + "step": 19000 + }, + { + "epoch": 10.04, + "eval_loss": 0.2696678638458252, + "eval_runtime": 1463.5986, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.445, + "eval_wer": 24.79877721588969, + "step": 19000 + }, + { + "epoch": 10.04, + "learning_rate": 5.312151898734178e-06, + "loss": 0.0176, + "step": 19025 + }, + { + "epoch": 10.04, + "learning_rate": 5.3058227848101275e-06, + "loss": 0.0144, + "step": 19050 + }, + { + "epoch": 10.04, + "learning_rate": 5.299493670886077e-06, + "loss": 0.0129, + "step": 19075 + }, + { + "epoch": 10.04, + "learning_rate": 5.293164556962026e-06, + "loss": 0.0162, + "step": 19100 + }, + { + "epoch": 10.04, + "learning_rate": 5.286835443037976e-06, + "loss": 0.0128, + "step": 19125 + }, + { + "epoch": 10.04, + "learning_rate": 5.280506329113924e-06, + "loss": 0.0152, + "step": 19150 + }, + { + "epoch": 10.04, + "learning_rate": 5.274177215189874e-06, + "loss": 0.0183, + "step": 19175 + }, + { + "epoch": 10.04, + "learning_rate": 5.267848101265823e-06, + "loss": 0.0198, + "step": 19200 + }, + { + "epoch": 10.04, + "learning_rate": 5.261518987341773e-06, + "loss": 0.0179, + "step": 19225 + }, + { + "epoch": 10.04, + "learning_rate": 5.255443037974683e-06, + "loss": 0.019, + "step": 19250 + }, + { + "epoch": 11.0, + "learning_rate": 5.249113924050633e-06, + "loss": 0.0159, + "step": 19275 + }, + { + "epoch": 11.0, + "learning_rate": 5.242784810126583e-06, + "loss": 0.0156, + "step": 19300 + }, + { + "epoch": 11.0, + "learning_rate": 5.236455696202532e-06, + "loss": 0.015, + "step": 19325 + }, + { + "epoch": 11.0, + "learning_rate": 5.230126582278482e-06, + "loss": 0.0162, + "step": 19350 + }, + { + "epoch": 11.0, + "learning_rate": 5.223797468354431e-06, + "loss": 0.0262, + "step": 19375 + }, + { + "epoch": 11.0, + "learning_rate": 5.217468354430381e-06, + "loss": 0.0207, + "step": 19400 + }, + { + "epoch": 11.0, + "learning_rate": 5.21113924050633e-06, + "loss": 0.0222, + "step": 19425 + }, + { + "epoch": 11.0, + "learning_rate": 5.2048101265822795e-06, + "loss": 0.016, + "step": 19450 + }, + { + "epoch": 11.01, + "learning_rate": 5.198481012658229e-06, + "loss": 0.0176, + "step": 19475 + }, + { + "epoch": 11.01, + "learning_rate": 5.192151898734178e-06, + "loss": 0.0209, + "step": 19500 + }, + { + "epoch": 11.01, + "learning_rate": 5.185822784810127e-06, + "loss": 0.0212, + "step": 19525 + }, + { + "epoch": 11.01, + "learning_rate": 5.179493670886076e-06, + "loss": 0.0143, + "step": 19550 + }, + { + "epoch": 11.01, + "learning_rate": 5.173164556962026e-06, + "loss": 0.0224, + "step": 19575 + }, + { + "epoch": 11.01, + "learning_rate": 5.166835443037975e-06, + "loss": 0.0171, + "step": 19600 + }, + { + "epoch": 11.01, + "learning_rate": 5.160506329113925e-06, + "loss": 0.0166, + "step": 19625 + }, + { + "epoch": 11.01, + "learning_rate": 5.154177215189874e-06, + "loss": 0.0135, + "step": 19650 + }, + { + "epoch": 11.01, + "learning_rate": 5.1478481012658235e-06, + "loss": 0.0164, + "step": 19675 + }, + { + "epoch": 11.01, + "learning_rate": 5.141518987341773e-06, + "loss": 0.0154, + "step": 19700 + }, + { + "epoch": 11.01, + "learning_rate": 5.135189873417722e-06, + "loss": 0.0183, + "step": 19725 + }, + { + "epoch": 11.01, + "learning_rate": 5.128860759493671e-06, + "loss": 0.0149, + "step": 19750 + }, + { + "epoch": 11.01, + "learning_rate": 5.12253164556962e-06, + "loss": 0.0176, + "step": 19775 + }, + { + "epoch": 11.01, + "learning_rate": 5.11620253164557e-06, + "loss": 0.0135, + "step": 19800 + }, + { + "epoch": 11.01, + "learning_rate": 5.109873417721519e-06, + "loss": 0.0121, + "step": 19825 + }, + { + "epoch": 11.01, + "learning_rate": 5.103544303797469e-06, + "loss": 0.0158, + "step": 19850 + }, + { + "epoch": 11.02, + "learning_rate": 5.097215189873418e-06, + "loss": 0.0191, + "step": 19875 + }, + { + "epoch": 11.02, + "learning_rate": 5.0908860759493675e-06, + "loss": 0.0127, + "step": 19900 + }, + { + "epoch": 11.02, + "learning_rate": 5.084556962025317e-06, + "loss": 0.0149, + "step": 19925 + }, + { + "epoch": 11.02, + "learning_rate": 5.0782278481012655e-06, + "loss": 0.0152, + "step": 19950 + }, + { + "epoch": 11.02, + "learning_rate": 5.071898734177215e-06, + "loss": 0.0139, + "step": 19975 + }, + { + "epoch": 11.02, + "learning_rate": 5.065569620253164e-06, + "loss": 0.015, + "step": 20000 + }, + { + "epoch": 11.02, + "eval_loss": 0.2623814344406128, + "eval_runtime": 1462.1621, + "eval_samples_per_second": 7.12, + "eval_steps_per_second": 0.445, + "eval_wer": 24.392266540919362, + "step": 20000 + }, + { + "epoch": 11.02, + "learning_rate": 5.059240506329114e-06, + "loss": 0.0094, + "step": 20025 + }, + { + "epoch": 11.02, + "learning_rate": 5.052911392405063e-06, + "loss": 0.0108, + "step": 20050 + }, + { + "epoch": 11.02, + "learning_rate": 5.046582278481013e-06, + "loss": 0.0155, + "step": 20075 + }, + { + "epoch": 11.02, + "learning_rate": 5.040253164556962e-06, + "loss": 0.0103, + "step": 20100 + }, + { + "epoch": 11.02, + "learning_rate": 5.033924050632912e-06, + "loss": 0.0109, + "step": 20125 + }, + { + "epoch": 11.02, + "learning_rate": 5.027594936708862e-06, + "loss": 0.0188, + "step": 20150 + }, + { + "epoch": 11.02, + "learning_rate": 5.021265822784811e-06, + "loss": 0.0178, + "step": 20175 + }, + { + "epoch": 11.02, + "learning_rate": 5.014936708860761e-06, + "loss": 0.0262, + "step": 20200 + }, + { + "epoch": 11.02, + "learning_rate": 5.00860759493671e-06, + "loss": 0.0194, + "step": 20225 + }, + { + "epoch": 11.02, + "learning_rate": 5.002278481012659e-06, + "loss": 0.0107, + "step": 20250 + }, + { + "epoch": 11.03, + "learning_rate": 4.995949367088608e-06, + "loss": 0.0147, + "step": 20275 + }, + { + "epoch": 11.03, + "learning_rate": 4.9896202531645575e-06, + "loss": 0.0084, + "step": 20300 + }, + { + "epoch": 11.03, + "learning_rate": 4.983291139240507e-06, + "loss": 0.0214, + "step": 20325 + }, + { + "epoch": 11.03, + "learning_rate": 4.976962025316456e-06, + "loss": 0.014, + "step": 20350 + }, + { + "epoch": 11.03, + "learning_rate": 4.970632911392406e-06, + "loss": 0.0129, + "step": 20375 + }, + { + "epoch": 11.03, + "learning_rate": 4.964303797468355e-06, + "loss": 0.0145, + "step": 20400 + }, + { + "epoch": 11.03, + "learning_rate": 4.957974683544305e-06, + "loss": 0.0109, + "step": 20425 + }, + { + "epoch": 11.03, + "learning_rate": 4.951645569620254e-06, + "loss": 0.0117, + "step": 20450 + }, + { + "epoch": 11.03, + "learning_rate": 4.945316455696203e-06, + "loss": 0.0137, + "step": 20475 + }, + { + "epoch": 11.03, + "learning_rate": 4.938987341772152e-06, + "loss": 0.0086, + "step": 20500 + }, + { + "epoch": 11.03, + "learning_rate": 4.9326582278481015e-06, + "loss": 0.0128, + "step": 20525 + }, + { + "epoch": 11.03, + "learning_rate": 4.926329113924051e-06, + "loss": 0.0101, + "step": 20550 + }, + { + "epoch": 11.03, + "learning_rate": 4.92e-06, + "loss": 0.0132, + "step": 20575 + }, + { + "epoch": 11.03, + "learning_rate": 4.91367088607595e-06, + "loss": 0.0147, + "step": 20600 + }, + { + "epoch": 11.03, + "learning_rate": 4.907341772151899e-06, + "loss": 0.0077, + "step": 20625 + }, + { + "epoch": 11.03, + "learning_rate": 4.901012658227849e-06, + "loss": 0.0125, + "step": 20650 + }, + { + "epoch": 11.04, + "learning_rate": 4.894683544303798e-06, + "loss": 0.0115, + "step": 20675 + }, + { + "epoch": 11.04, + "learning_rate": 4.888354430379747e-06, + "loss": 0.0136, + "step": 20700 + }, + { + "epoch": 11.04, + "learning_rate": 4.882025316455696e-06, + "loss": 0.0098, + "step": 20725 + }, + { + "epoch": 11.04, + "learning_rate": 4.875696202531646e-06, + "loss": 0.0154, + "step": 20750 + }, + { + "epoch": 11.04, + "learning_rate": 4.869367088607596e-06, + "loss": 0.0126, + "step": 20775 + }, + { + "epoch": 11.04, + "learning_rate": 4.863037974683545e-06, + "loss": 0.0116, + "step": 20800 + }, + { + "epoch": 11.04, + "learning_rate": 4.856708860759495e-06, + "loss": 0.0133, + "step": 20825 + }, + { + "epoch": 11.04, + "learning_rate": 4.850379746835443e-06, + "loss": 0.0106, + "step": 20850 + }, + { + "epoch": 11.04, + "learning_rate": 4.844050632911393e-06, + "loss": 0.0151, + "step": 20875 + }, + { + "epoch": 11.04, + "learning_rate": 4.837721518987342e-06, + "loss": 0.0148, + "step": 20900 + }, + { + "epoch": 11.04, + "learning_rate": 4.8313924050632915e-06, + "loss": 0.0173, + "step": 20925 + }, + { + "epoch": 11.04, + "learning_rate": 4.825063291139241e-06, + "loss": 0.0126, + "step": 20950 + }, + { + "epoch": 11.04, + "learning_rate": 4.81873417721519e-06, + "loss": 0.0163, + "step": 20975 + }, + { + "epoch": 11.04, + "learning_rate": 4.81240506329114e-06, + "loss": 0.0203, + "step": 21000 + }, + { + "epoch": 11.04, + "eval_loss": 0.27788785099983215, + "eval_runtime": 1546.852, + "eval_samples_per_second": 6.73, + "eval_steps_per_second": 0.421, + "eval_wer": 25.284963983154196, + "step": 21000 + }, + { + "epoch": 12.0, + "learning_rate": 4.806075949367089e-06, + "loss": 0.0176, + "step": 21025 + }, + { + "epoch": 12.0, + "learning_rate": 4.799746835443039e-06, + "loss": 0.0157, + "step": 21050 + }, + { + "epoch": 12.0, + "learning_rate": 4.793417721518987e-06, + "loss": 0.0192, + "step": 21075 + }, + { + "epoch": 12.0, + "learning_rate": 4.787088607594937e-06, + "loss": 0.0124, + "step": 21100 + }, + { + "epoch": 12.0, + "learning_rate": 4.780759493670886e-06, + "loss": 0.0194, + "step": 21125 + }, + { + "epoch": 12.0, + "learning_rate": 4.774430379746836e-06, + "loss": 0.0138, + "step": 21150 + }, + { + "epoch": 12.0, + "learning_rate": 4.768101265822786e-06, + "loss": 0.0191, + "step": 21175 + }, + { + "epoch": 12.0, + "learning_rate": 4.761772151898735e-06, + "loss": 0.0124, + "step": 21200 + }, + { + "epoch": 12.01, + "learning_rate": 4.755443037974684e-06, + "loss": 0.0175, + "step": 21225 + }, + { + "epoch": 12.01, + "learning_rate": 4.749113924050633e-06, + "loss": 0.0118, + "step": 21250 + }, + { + "epoch": 12.01, + "learning_rate": 4.742784810126583e-06, + "loss": 0.0172, + "step": 21275 + }, + { + "epoch": 12.01, + "learning_rate": 4.736455696202532e-06, + "loss": 0.0136, + "step": 21300 + }, + { + "epoch": 12.01, + "learning_rate": 4.7301265822784815e-06, + "loss": 0.018, + "step": 21325 + }, + { + "epoch": 12.01, + "learning_rate": 4.723797468354431e-06, + "loss": 0.0184, + "step": 21350 + }, + { + "epoch": 12.01, + "learning_rate": 4.71746835443038e-06, + "loss": 0.0166, + "step": 21375 + }, + { + "epoch": 12.01, + "learning_rate": 4.71113924050633e-06, + "loss": 0.0107, + "step": 21400 + }, + { + "epoch": 12.01, + "learning_rate": 4.704810126582279e-06, + "loss": 0.0125, + "step": 21425 + }, + { + "epoch": 12.01, + "learning_rate": 4.698481012658228e-06, + "loss": 0.0098, + "step": 21450 + }, + { + "epoch": 12.01, + "learning_rate": 4.692151898734177e-06, + "loss": 0.0103, + "step": 21475 + }, + { + "epoch": 12.01, + "learning_rate": 4.685822784810127e-06, + "loss": 0.0211, + "step": 21500 + }, + { + "epoch": 12.01, + "learning_rate": 4.679493670886076e-06, + "loss": 0.0125, + "step": 21525 + }, + { + "epoch": 12.01, + "learning_rate": 4.6731645569620255e-06, + "loss": 0.0083, + "step": 21550 + }, + { + "epoch": 12.01, + "learning_rate": 4.666835443037975e-06, + "loss": 0.0156, + "step": 21575 + }, + { + "epoch": 12.01, + "learning_rate": 4.660506329113924e-06, + "loss": 0.0137, + "step": 21600 + }, + { + "epoch": 12.02, + "learning_rate": 4.654177215189874e-06, + "loss": 0.0137, + "step": 21625 + }, + { + "epoch": 12.02, + "learning_rate": 4.647848101265823e-06, + "loss": 0.0092, + "step": 21650 + }, + { + "epoch": 12.02, + "learning_rate": 4.641518987341773e-06, + "loss": 0.0101, + "step": 21675 + }, + { + "epoch": 12.02, + "learning_rate": 4.635189873417722e-06, + "loss": 0.0118, + "step": 21700 + }, + { + "epoch": 12.02, + "learning_rate": 4.6288607594936715e-06, + "loss": 0.0119, + "step": 21725 + }, + { + "epoch": 12.02, + "learning_rate": 4.622531645569621e-06, + "loss": 0.009, + "step": 21750 + }, + { + "epoch": 12.02, + "learning_rate": 4.61620253164557e-06, + "loss": 0.014, + "step": 21775 + }, + { + "epoch": 12.02, + "learning_rate": 4.609873417721519e-06, + "loss": 0.0108, + "step": 21800 + }, + { + "epoch": 12.02, + "learning_rate": 4.603544303797468e-06, + "loss": 0.0117, + "step": 21825 + }, + { + "epoch": 12.02, + "learning_rate": 4.597215189873418e-06, + "loss": 0.0147, + "step": 21850 + }, + { + "epoch": 12.02, + "learning_rate": 4.590886075949367e-06, + "loss": 0.0133, + "step": 21875 + }, + { + "epoch": 12.02, + "learning_rate": 4.584556962025317e-06, + "loss": 0.0131, + "step": 21900 + }, + { + "epoch": 12.02, + "learning_rate": 4.578227848101266e-06, + "loss": 0.0156, + "step": 21925 + }, + { + "epoch": 12.02, + "learning_rate": 4.5718987341772155e-06, + "loss": 0.0211, + "step": 21950 + }, + { + "epoch": 12.02, + "learning_rate": 4.565569620253165e-06, + "loss": 0.0127, + "step": 21975 + }, + { + "epoch": 12.02, + "learning_rate": 4.559240506329114e-06, + "loss": 0.011, + "step": 22000 + }, + { + "epoch": 12.02, + "eval_loss": 0.26152506470680237, + "eval_runtime": 1497.9019, + "eval_samples_per_second": 6.95, + "eval_steps_per_second": 0.435, + "eval_wer": 26.236198962584755, + "step": 22000 + }, + { + "epoch": 12.03, + "learning_rate": 4.552911392405064e-06, + "loss": 0.0164, + "step": 22025 + }, + { + "epoch": 12.03, + "learning_rate": 4.546582278481013e-06, + "loss": 0.01, + "step": 22050 + }, + { + "epoch": 12.03, + "learning_rate": 4.540253164556963e-06, + "loss": 0.0127, + "step": 22075 + }, + { + "epoch": 12.03, + "learning_rate": 4.533924050632912e-06, + "loss": 0.0089, + "step": 22100 + }, + { + "epoch": 12.03, + "learning_rate": 4.5275949367088615e-06, + "loss": 0.0077, + "step": 22125 + }, + { + "epoch": 12.03, + "learning_rate": 4.521265822784811e-06, + "loss": 0.0098, + "step": 22150 + }, + { + "epoch": 12.03, + "learning_rate": 4.5149367088607595e-06, + "loss": 0.0084, + "step": 22175 + }, + { + "epoch": 12.03, + "learning_rate": 4.508607594936709e-06, + "loss": 0.0096, + "step": 22200 + }, + { + "epoch": 12.03, + "learning_rate": 4.502278481012658e-06, + "loss": 0.0097, + "step": 22225 + }, + { + "epoch": 12.03, + "learning_rate": 4.495949367088608e-06, + "loss": 0.0114, + "step": 22250 + }, + { + "epoch": 12.03, + "learning_rate": 4.489620253164557e-06, + "loss": 0.0109, + "step": 22275 + }, + { + "epoch": 12.03, + "learning_rate": 4.483291139240507e-06, + "loss": 0.0096, + "step": 22300 + }, + { + "epoch": 12.03, + "learning_rate": 4.476962025316456e-06, + "loss": 0.0117, + "step": 22325 + }, + { + "epoch": 12.03, + "learning_rate": 4.4706329113924055e-06, + "loss": 0.0105, + "step": 22350 + }, + { + "epoch": 12.03, + "learning_rate": 4.464303797468355e-06, + "loss": 0.0103, + "step": 22375 + }, + { + "epoch": 12.03, + "learning_rate": 4.457974683544304e-06, + "loss": 0.0085, + "step": 22400 + }, + { + "epoch": 12.04, + "learning_rate": 4.451645569620254e-06, + "loss": 0.0129, + "step": 22425 + }, + { + "epoch": 12.04, + "learning_rate": 4.445316455696203e-06, + "loss": 0.0188, + "step": 22450 + }, + { + "epoch": 12.04, + "learning_rate": 4.438987341772153e-06, + "loss": 0.013, + "step": 22475 + }, + { + "epoch": 12.04, + "learning_rate": 4.432658227848102e-06, + "loss": 0.0095, + "step": 22500 + }, + { + "epoch": 12.04, + "learning_rate": 4.4263291139240515e-06, + "loss": 0.0111, + "step": 22525 + }, + { + "epoch": 12.04, + "learning_rate": 4.42e-06, + "loss": 0.0115, + "step": 22550 + }, + { + "epoch": 12.04, + "learning_rate": 4.4136708860759495e-06, + "loss": 0.0096, + "step": 22575 + }, + { + "epoch": 12.04, + "learning_rate": 4.407341772151899e-06, + "loss": 0.0108, + "step": 22600 + }, + { + "epoch": 12.04, + "learning_rate": 4.401012658227848e-06, + "loss": 0.0118, + "step": 22625 + }, + { + "epoch": 12.04, + "learning_rate": 4.394683544303798e-06, + "loss": 0.0136, + "step": 22650 + }, + { + "epoch": 12.04, + "learning_rate": 4.388354430379747e-06, + "loss": 0.0176, + "step": 22675 + }, + { + "epoch": 12.04, + "learning_rate": 4.382025316455697e-06, + "loss": 0.011, + "step": 22700 + }, + { + "epoch": 12.04, + "learning_rate": 4.375696202531646e-06, + "loss": 0.0125, + "step": 22725 + }, + { + "epoch": 12.04, + "learning_rate": 4.3693670886075955e-06, + "loss": 0.0164, + "step": 22750 + }, + { + "epoch": 13.0, + "learning_rate": 4.363037974683544e-06, + "loss": 0.0192, + "step": 22775 + }, + { + "epoch": 13.0, + "learning_rate": 4.356708860759494e-06, + "loss": 0.0145, + "step": 22800 + }, + { + "epoch": 13.0, + "learning_rate": 4.350379746835444e-06, + "loss": 0.0174, + "step": 22825 + }, + { + "epoch": 13.0, + "learning_rate": 4.344050632911393e-06, + "loss": 0.0142, + "step": 22850 + }, + { + "epoch": 13.0, + "learning_rate": 4.337721518987343e-06, + "loss": 0.0228, + "step": 22875 + }, + { + "epoch": 13.0, + "learning_rate": 4.331392405063292e-06, + "loss": 0.0201, + "step": 22900 + }, + { + "epoch": 13.0, + "learning_rate": 4.325063291139241e-06, + "loss": 0.0188, + "step": 22925 + }, + { + "epoch": 13.0, + "learning_rate": 4.31873417721519e-06, + "loss": 0.0185, + "step": 22950 + }, + { + "epoch": 13.01, + "learning_rate": 4.3124050632911395e-06, + "loss": 0.0186, + "step": 22975 + }, + { + "epoch": 13.01, + "learning_rate": 4.306075949367089e-06, + "loss": 0.0191, + "step": 23000 + }, + { + "epoch": 13.01, + "eval_loss": 0.25467297434806824, + "eval_runtime": 1486.1182, + "eval_samples_per_second": 7.005, + "eval_steps_per_second": 0.438, + "eval_wer": 23.982503780549276, + "step": 23000 + }, + { + "epoch": 13.01, + "learning_rate": 4.299746835443038e-06, + "loss": 0.0145, + "step": 23025 + }, + { + "epoch": 13.01, + "learning_rate": 4.293417721518988e-06, + "loss": 0.0106, + "step": 23050 + }, + { + "epoch": 13.01, + "learning_rate": 4.287088607594937e-06, + "loss": 0.0135, + "step": 23075 + }, + { + "epoch": 13.01, + "learning_rate": 4.280759493670887e-06, + "loss": 0.0173, + "step": 23100 + }, + { + "epoch": 13.01, + "learning_rate": 4.274430379746836e-06, + "loss": 0.0126, + "step": 23125 + }, + { + "epoch": 13.01, + "learning_rate": 4.268101265822785e-06, + "loss": 0.0166, + "step": 23150 + }, + { + "epoch": 13.01, + "learning_rate": 4.261772151898734e-06, + "loss": 0.0097, + "step": 23175 + }, + { + "epoch": 13.01, + "learning_rate": 4.255443037974684e-06, + "loss": 0.0094, + "step": 23200 + }, + { + "epoch": 13.01, + "learning_rate": 4.249113924050634e-06, + "loss": 0.012, + "step": 23225 + }, + { + "epoch": 13.01, + "learning_rate": 4.242784810126583e-06, + "loss": 0.0153, + "step": 23250 + }, + { + "epoch": 13.01, + "learning_rate": 4.236455696202532e-06, + "loss": 0.0169, + "step": 23275 + }, + { + "epoch": 13.01, + "learning_rate": 4.230379746835443e-06, + "loss": 0.0097, + "step": 23300 + }, + { + "epoch": 13.01, + "learning_rate": 4.224050632911393e-06, + "loss": 0.0098, + "step": 23325 + }, + { + "epoch": 13.01, + "learning_rate": 4.217721518987342e-06, + "loss": 0.0129, + "step": 23350 + }, + { + "epoch": 13.02, + "learning_rate": 4.2113924050632915e-06, + "loss": 0.0157, + "step": 23375 + }, + { + "epoch": 13.02, + "learning_rate": 4.205063291139241e-06, + "loss": 0.0088, + "step": 23400 + }, + { + "epoch": 13.02, + "learning_rate": 4.19873417721519e-06, + "loss": 0.0105, + "step": 23425 + }, + { + "epoch": 13.02, + "learning_rate": 4.19240506329114e-06, + "loss": 0.0131, + "step": 23450 + }, + { + "epoch": 13.02, + "learning_rate": 4.186075949367089e-06, + "loss": 0.0133, + "step": 23475 + }, + { + "epoch": 13.02, + "learning_rate": 4.179746835443038e-06, + "loss": 0.0054, + "step": 23500 + }, + { + "epoch": 13.02, + "learning_rate": 4.173417721518987e-06, + "loss": 0.01, + "step": 23525 + }, + { + "epoch": 13.02, + "learning_rate": 4.167088607594937e-06, + "loss": 0.013, + "step": 23550 + }, + { + "epoch": 13.02, + "learning_rate": 4.160759493670886e-06, + "loss": 0.0141, + "step": 23575 + }, + { + "epoch": 13.02, + "learning_rate": 4.154430379746836e-06, + "loss": 0.0126, + "step": 23600 + }, + { + "epoch": 13.02, + "learning_rate": 4.148101265822786e-06, + "loss": 0.015, + "step": 23625 + }, + { + "epoch": 13.02, + "learning_rate": 4.141772151898734e-06, + "loss": 0.0164, + "step": 23650 + }, + { + "epoch": 13.02, + "learning_rate": 4.135443037974684e-06, + "loss": 0.0135, + "step": 23675 + }, + { + "epoch": 13.02, + "learning_rate": 4.129113924050633e-06, + "loss": 0.0151, + "step": 23700 + }, + { + "epoch": 13.02, + "learning_rate": 4.122784810126583e-06, + "loss": 0.0157, + "step": 23725 + }, + { + "epoch": 13.02, + "learning_rate": 4.116455696202532e-06, + "loss": 0.0087, + "step": 23750 + }, + { + "epoch": 13.03, + "learning_rate": 4.1101265822784815e-06, + "loss": 0.0097, + "step": 23775 + }, + { + "epoch": 13.03, + "learning_rate": 4.103797468354431e-06, + "loss": 0.0141, + "step": 23800 + }, + { + "epoch": 13.03, + "learning_rate": 4.09746835443038e-06, + "loss": 0.0079, + "step": 23825 + }, + { + "epoch": 13.03, + "learning_rate": 4.09113924050633e-06, + "loss": 0.0116, + "step": 23850 + }, + { + "epoch": 13.03, + "learning_rate": 4.084810126582278e-06, + "loss": 0.0093, + "step": 23875 + }, + { + "epoch": 13.03, + "learning_rate": 4.078481012658228e-06, + "loss": 0.0149, + "step": 23900 + }, + { + "epoch": 13.03, + "learning_rate": 4.072151898734177e-06, + "loss": 0.0125, + "step": 23925 + }, + { + "epoch": 13.03, + "learning_rate": 4.065822784810127e-06, + "loss": 0.0106, + "step": 23950 + }, + { + "epoch": 13.03, + "learning_rate": 4.059493670886076e-06, + "loss": 0.0124, + "step": 23975 + }, + { + "epoch": 13.03, + "learning_rate": 4.0531645569620255e-06, + "loss": 0.0074, + "step": 24000 + }, + { + "epoch": 13.03, + "eval_loss": 0.27036768198013306, + "eval_runtime": 1458.9195, + "eval_samples_per_second": 7.136, + "eval_steps_per_second": 0.446, + "eval_wer": 24.153238264036816, + "step": 24000 + }, + { + "epoch": 13.03, + "learning_rate": 4.046835443037975e-06, + "loss": 0.0128, + "step": 24025 + }, + { + "epoch": 13.03, + "learning_rate": 4.040506329113924e-06, + "loss": 0.012, + "step": 24050 + }, + { + "epoch": 13.03, + "learning_rate": 4.034177215189874e-06, + "loss": 0.0098, + "step": 24075 + }, + { + "epoch": 13.03, + "learning_rate": 4.027848101265823e-06, + "loss": 0.0091, + "step": 24100 + }, + { + "epoch": 13.03, + "learning_rate": 4.021518987341773e-06, + "loss": 0.0077, + "step": 24125 + }, + { + "epoch": 13.03, + "learning_rate": 4.015189873417722e-06, + "loss": 0.0135, + "step": 24150 + }, + { + "epoch": 13.04, + "learning_rate": 4.0088607594936715e-06, + "loss": 0.0124, + "step": 24175 + }, + { + "epoch": 13.04, + "learning_rate": 4.002531645569621e-06, + "loss": 0.0152, + "step": 24200 + }, + { + "epoch": 13.04, + "learning_rate": 3.99620253164557e-06, + "loss": 0.0127, + "step": 24225 + }, + { + "epoch": 13.04, + "learning_rate": 3.989873417721519e-06, + "loss": 0.0066, + "step": 24250 + }, + { + "epoch": 13.04, + "learning_rate": 3.983544303797468e-06, + "loss": 0.0163, + "step": 24275 + }, + { + "epoch": 13.04, + "learning_rate": 3.977215189873418e-06, + "loss": 0.0095, + "step": 24300 + }, + { + "epoch": 13.04, + "learning_rate": 3.970886075949367e-06, + "loss": 0.0103, + "step": 24325 + }, + { + "epoch": 13.04, + "learning_rate": 3.964556962025317e-06, + "loss": 0.0112, + "step": 24350 + }, + { + "epoch": 13.04, + "learning_rate": 3.958227848101266e-06, + "loss": 0.01, + "step": 24375 + }, + { + "epoch": 13.04, + "learning_rate": 3.9518987341772155e-06, + "loss": 0.0131, + "step": 24400 + }, + { + "epoch": 13.04, + "learning_rate": 3.945569620253165e-06, + "loss": 0.0143, + "step": 24425 + }, + { + "epoch": 13.04, + "learning_rate": 3.939240506329114e-06, + "loss": 0.0151, + "step": 24450 + }, + { + "epoch": 13.04, + "learning_rate": 3.932911392405064e-06, + "loss": 0.0129, + "step": 24475 + }, + { + "epoch": 13.04, + "learning_rate": 3.926582278481013e-06, + "loss": 0.0125, + "step": 24500 + }, + { + "epoch": 14.0, + "learning_rate": 3.920506329113925e-06, + "loss": 0.0165, + "step": 24525 + }, + { + "epoch": 14.0, + "learning_rate": 3.914177215189874e-06, + "loss": 0.0114, + "step": 24550 + }, + { + "epoch": 14.0, + "learning_rate": 3.9078481012658235e-06, + "loss": 0.0111, + "step": 24575 + }, + { + "epoch": 14.0, + "learning_rate": 3.901518987341772e-06, + "loss": 0.0134, + "step": 24600 + }, + { + "epoch": 14.0, + "learning_rate": 3.8951898734177215e-06, + "loss": 0.0138, + "step": 24625 + }, + { + "epoch": 14.0, + "learning_rate": 3.888860759493671e-06, + "loss": 0.0151, + "step": 24650 + }, + { + "epoch": 14.0, + "learning_rate": 3.88253164556962e-06, + "loss": 0.0101, + "step": 24675 + }, + { + "epoch": 14.0, + "learning_rate": 3.87620253164557e-06, + "loss": 0.0108, + "step": 24700 + }, + { + "epoch": 14.01, + "learning_rate": 3.869873417721519e-06, + "loss": 0.0151, + "step": 24725 + }, + { + "epoch": 14.01, + "learning_rate": 3.863544303797469e-06, + "loss": 0.0132, + "step": 24750 + }, + { + "epoch": 14.01, + "learning_rate": 3.857215189873418e-06, + "loss": 0.012, + "step": 24775 + }, + { + "epoch": 14.01, + "learning_rate": 3.8508860759493675e-06, + "loss": 0.017, + "step": 24800 + }, + { + "epoch": 14.01, + "learning_rate": 3.844556962025317e-06, + "loss": 0.0141, + "step": 24825 + }, + { + "epoch": 14.01, + "learning_rate": 3.838227848101266e-06, + "loss": 0.0166, + "step": 24850 + }, + { + "epoch": 14.01, + "learning_rate": 3.831898734177216e-06, + "loss": 0.0112, + "step": 24875 + }, + { + "epoch": 14.01, + "learning_rate": 3.825569620253165e-06, + "loss": 0.0097, + "step": 24900 + }, + { + "epoch": 14.01, + "learning_rate": 3.819240506329115e-06, + "loss": 0.0138, + "step": 24925 + }, + { + "epoch": 14.01, + "learning_rate": 3.8129113924050636e-06, + "loss": 0.0069, + "step": 24950 + }, + { + "epoch": 14.01, + "learning_rate": 3.806582278481013e-06, + "loss": 0.0111, + "step": 24975 + }, + { + "epoch": 14.01, + "learning_rate": 3.8002531645569625e-06, + "loss": 0.0149, + "step": 25000 + }, + { + "epoch": 14.01, + "eval_loss": 0.2640444040298462, + "eval_runtime": 1478.4977, + "eval_samples_per_second": 7.042, + "eval_steps_per_second": 0.44, + "eval_wer": 24.063805915543345, + "step": 25000 + }, + { + "epoch": 14.01, + "learning_rate": 3.7939240506329115e-06, + "loss": 0.0176, + "step": 25025 + }, + { + "epoch": 14.01, + "learning_rate": 3.787594936708861e-06, + "loss": 0.0104, + "step": 25050 + }, + { + "epoch": 14.01, + "learning_rate": 3.7812658227848103e-06, + "loss": 0.0084, + "step": 25075 + }, + { + "epoch": 14.01, + "learning_rate": 3.7749367088607598e-06, + "loss": 0.0118, + "step": 25100 + }, + { + "epoch": 14.02, + "learning_rate": 3.768607594936709e-06, + "loss": 0.0099, + "step": 25125 + }, + { + "epoch": 14.02, + "learning_rate": 3.762278481012658e-06, + "loss": 0.0089, + "step": 25150 + }, + { + "epoch": 14.02, + "learning_rate": 3.7559493670886076e-06, + "loss": 0.0119, + "step": 25175 + }, + { + "epoch": 14.02, + "learning_rate": 3.749620253164557e-06, + "loss": 0.0118, + "step": 25200 + }, + { + "epoch": 14.02, + "learning_rate": 3.743291139240507e-06, + "loss": 0.0108, + "step": 25225 + }, + { + "epoch": 14.02, + "learning_rate": 3.7369620253164563e-06, + "loss": 0.0101, + "step": 25250 + }, + { + "epoch": 14.02, + "learning_rate": 3.7306329113924058e-06, + "loss": 0.0095, + "step": 25275 + }, + { + "epoch": 14.02, + "learning_rate": 3.7243037974683548e-06, + "loss": 0.0106, + "step": 25300 + }, + { + "epoch": 14.02, + "learning_rate": 3.717974683544304e-06, + "loss": 0.0135, + "step": 25325 + }, + { + "epoch": 14.02, + "learning_rate": 3.7116455696202536e-06, + "loss": 0.0115, + "step": 25350 + }, + { + "epoch": 14.02, + "learning_rate": 3.705316455696203e-06, + "loss": 0.0114, + "step": 25375 + }, + { + "epoch": 14.02, + "learning_rate": 3.698987341772152e-06, + "loss": 0.0119, + "step": 25400 + }, + { + "epoch": 14.02, + "learning_rate": 3.6926582278481015e-06, + "loss": 0.0123, + "step": 25425 + }, + { + "epoch": 14.02, + "learning_rate": 3.686329113924051e-06, + "loss": 0.0138, + "step": 25450 + }, + { + "epoch": 14.02, + "learning_rate": 3.6800000000000003e-06, + "loss": 0.0133, + "step": 25475 + }, + { + "epoch": 14.02, + "learning_rate": 3.6736708860759498e-06, + "loss": 0.0106, + "step": 25500 + }, + { + "epoch": 14.03, + "learning_rate": 3.6673417721518988e-06, + "loss": 0.0099, + "step": 25525 + }, + { + "epoch": 14.03, + "learning_rate": 3.661012658227848e-06, + "loss": 0.0104, + "step": 25550 + }, + { + "epoch": 14.03, + "learning_rate": 3.6546835443037976e-06, + "loss": 0.0143, + "step": 25575 + }, + { + "epoch": 14.03, + "learning_rate": 3.648354430379747e-06, + "loss": 0.0096, + "step": 25600 + }, + { + "epoch": 14.03, + "learning_rate": 3.642025316455696e-06, + "loss": 0.0094, + "step": 25625 + }, + { + "epoch": 14.03, + "learning_rate": 3.6356962025316463e-06, + "loss": 0.0162, + "step": 25650 + }, + { + "epoch": 14.03, + "learning_rate": 3.6293670886075953e-06, + "loss": 0.008, + "step": 25675 + }, + { + "epoch": 14.03, + "learning_rate": 3.6230379746835448e-06, + "loss": 0.0094, + "step": 25700 + }, + { + "epoch": 14.03, + "learning_rate": 3.616708860759494e-06, + "loss": 0.0122, + "step": 25725 + }, + { + "epoch": 14.03, + "learning_rate": 3.6103797468354436e-06, + "loss": 0.0128, + "step": 25750 + }, + { + "epoch": 14.03, + "learning_rate": 3.6040506329113926e-06, + "loss": 0.0117, + "step": 25775 + }, + { + "epoch": 14.03, + "learning_rate": 3.597721518987342e-06, + "loss": 0.0067, + "step": 25800 + }, + { + "epoch": 14.03, + "learning_rate": 3.5913924050632915e-06, + "loss": 0.0078, + "step": 25825 + }, + { + "epoch": 14.03, + "learning_rate": 3.585063291139241e-06, + "loss": 0.0071, + "step": 25850 + }, + { + "epoch": 14.03, + "learning_rate": 3.57873417721519e-06, + "loss": 0.0093, + "step": 25875 + }, + { + "epoch": 14.03, + "learning_rate": 3.5724050632911393e-06, + "loss": 0.0064, + "step": 25900 + }, + { + "epoch": 14.04, + "learning_rate": 3.5660759493670888e-06, + "loss": 0.0067, + "step": 25925 + }, + { + "epoch": 14.04, + "learning_rate": 3.559746835443038e-06, + "loss": 0.0076, + "step": 25950 + }, + { + "epoch": 14.04, + "learning_rate": 3.5534177215189876e-06, + "loss": 0.0099, + "step": 25975 + }, + { + "epoch": 14.04, + "learning_rate": 3.5470886075949366e-06, + "loss": 0.0111, + "step": 26000 + }, + { + "epoch": 14.04, + "eval_loss": 0.2734878659248352, + "eval_runtime": 1456.7487, + "eval_samples_per_second": 7.147, + "eval_steps_per_second": 0.447, + "eval_wer": 24.359745686921737, + "step": 26000 + }, + { + "epoch": 14.04, + "learning_rate": 3.540759493670886e-06, + "loss": 0.0134, + "step": 26025 + }, + { + "epoch": 14.04, + "learning_rate": 3.534430379746836e-06, + "loss": 0.008, + "step": 26050 + }, + { + "epoch": 14.04, + "learning_rate": 3.5281012658227853e-06, + "loss": 0.0138, + "step": 26075 + }, + { + "epoch": 14.04, + "learning_rate": 3.5217721518987348e-06, + "loss": 0.0075, + "step": 26100 + }, + { + "epoch": 14.04, + "learning_rate": 3.515443037974684e-06, + "loss": 0.0097, + "step": 26125 + }, + { + "epoch": 14.04, + "learning_rate": 3.509113924050633e-06, + "loss": 0.0139, + "step": 26150 + }, + { + "epoch": 14.04, + "learning_rate": 3.5027848101265826e-06, + "loss": 0.0099, + "step": 26175 + }, + { + "epoch": 14.04, + "learning_rate": 3.496455696202532e-06, + "loss": 0.0148, + "step": 26200 + }, + { + "epoch": 14.04, + "learning_rate": 3.4901265822784815e-06, + "loss": 0.0119, + "step": 26225 + }, + { + "epoch": 14.04, + "learning_rate": 3.4837974683544305e-06, + "loss": 0.0086, + "step": 26250 + }, + { + "epoch": 15.0, + "learning_rate": 3.47746835443038e-06, + "loss": 0.0091, + "step": 26275 + }, + { + "epoch": 15.0, + "learning_rate": 3.4711392405063293e-06, + "loss": 0.0105, + "step": 26300 + }, + { + "epoch": 15.0, + "learning_rate": 3.465063291139241e-06, + "loss": 0.0138, + "step": 26325 + }, + { + "epoch": 15.0, + "learning_rate": 3.45873417721519e-06, + "loss": 0.0185, + "step": 26350 + }, + { + "epoch": 15.0, + "learning_rate": 3.4524050632911392e-06, + "loss": 0.015, + "step": 26375 + }, + { + "epoch": 15.0, + "learning_rate": 3.4460759493670886e-06, + "loss": 0.0177, + "step": 26400 + }, + { + "epoch": 15.0, + "learning_rate": 3.439746835443038e-06, + "loss": 0.014, + "step": 26425 + }, + { + "epoch": 15.0, + "learning_rate": 3.433417721518988e-06, + "loss": 0.0127, + "step": 26450 + }, + { + "epoch": 15.01, + "learning_rate": 3.4270886075949374e-06, + "loss": 0.0135, + "step": 26475 + }, + { + "epoch": 15.01, + "learning_rate": 3.4207594936708864e-06, + "loss": 0.0171, + "step": 26500 + }, + { + "epoch": 15.01, + "learning_rate": 3.4144303797468358e-06, + "loss": 0.0114, + "step": 26525 + }, + { + "epoch": 15.01, + "learning_rate": 3.4081012658227852e-06, + "loss": 0.0099, + "step": 26550 + }, + { + "epoch": 15.01, + "learning_rate": 3.4017721518987346e-06, + "loss": 0.0126, + "step": 26575 + }, + { + "epoch": 15.01, + "learning_rate": 3.395443037974684e-06, + "loss": 0.0116, + "step": 26600 + }, + { + "epoch": 15.01, + "learning_rate": 3.389113924050633e-06, + "loss": 0.0087, + "step": 26625 + }, + { + "epoch": 15.01, + "learning_rate": 3.3827848101265825e-06, + "loss": 0.0073, + "step": 26650 + }, + { + "epoch": 15.01, + "learning_rate": 3.376455696202532e-06, + "loss": 0.0102, + "step": 26675 + }, + { + "epoch": 15.01, + "learning_rate": 3.3701265822784814e-06, + "loss": 0.0106, + "step": 26700 + }, + { + "epoch": 15.01, + "learning_rate": 3.3637974683544304e-06, + "loss": 0.0094, + "step": 26725 + }, + { + "epoch": 15.01, + "learning_rate": 3.35746835443038e-06, + "loss": 0.0113, + "step": 26750 + }, + { + "epoch": 15.01, + "learning_rate": 3.3511392405063292e-06, + "loss": 0.0169, + "step": 26775 + }, + { + "epoch": 15.01, + "learning_rate": 3.3448101265822786e-06, + "loss": 0.0084, + "step": 26800 + }, + { + "epoch": 15.01, + "learning_rate": 3.338481012658228e-06, + "loss": 0.0153, + "step": 26825 + }, + { + "epoch": 15.01, + "learning_rate": 3.332151898734177e-06, + "loss": 0.012, + "step": 26850 + }, + { + "epoch": 15.02, + "learning_rate": 3.325822784810127e-06, + "loss": 0.0106, + "step": 26875 + }, + { + "epoch": 15.02, + "learning_rate": 3.3194936708860764e-06, + "loss": 0.0099, + "step": 26900 + }, + { + "epoch": 15.02, + "learning_rate": 3.3131645569620258e-06, + "loss": 0.008, + "step": 26925 + }, + { + "epoch": 15.02, + "learning_rate": 3.306835443037975e-06, + "loss": 0.0076, + "step": 26950 + }, + { + "epoch": 15.02, + "learning_rate": 3.3005063291139246e-06, + "loss": 0.0088, + "step": 26975 + }, + { + "epoch": 15.02, + "learning_rate": 3.2941772151898736e-06, + "loss": 0.009, + "step": 27000 + }, + { + "epoch": 15.02, + "eval_loss": 0.2693029046058655, + "eval_runtime": 1479.1374, + "eval_samples_per_second": 7.039, + "eval_steps_per_second": 0.44, + "eval_wer": 24.28819980812696, + "step": 27000 + }, + { + "epoch": 15.02, + "learning_rate": 3.287848101265823e-06, + "loss": 0.0115, + "step": 27025 + }, + { + "epoch": 15.02, + "learning_rate": 3.2815189873417725e-06, + "loss": 0.0099, + "step": 27050 + }, + { + "epoch": 15.02, + "learning_rate": 3.275189873417722e-06, + "loss": 0.0073, + "step": 27075 + }, + { + "epoch": 15.02, + "learning_rate": 3.268860759493671e-06, + "loss": 0.0061, + "step": 27100 + }, + { + "epoch": 15.02, + "learning_rate": 3.2625316455696204e-06, + "loss": 0.007, + "step": 27125 + }, + { + "epoch": 15.02, + "learning_rate": 3.2562025316455698e-06, + "loss": 0.0109, + "step": 27150 + }, + { + "epoch": 15.02, + "learning_rate": 3.2498734177215192e-06, + "loss": 0.0123, + "step": 27175 + }, + { + "epoch": 15.02, + "learning_rate": 3.2435443037974686e-06, + "loss": 0.0121, + "step": 27200 + }, + { + "epoch": 15.02, + "learning_rate": 3.2372151898734176e-06, + "loss": 0.0129, + "step": 27225 + }, + { + "epoch": 15.02, + "learning_rate": 3.230886075949367e-06, + "loss": 0.0128, + "step": 27250 + }, + { + "epoch": 15.03, + "learning_rate": 3.224556962025317e-06, + "loss": 0.0094, + "step": 27275 + }, + { + "epoch": 15.03, + "learning_rate": 3.2182278481012664e-06, + "loss": 0.0081, + "step": 27300 + }, + { + "epoch": 15.03, + "learning_rate": 3.2118987341772158e-06, + "loss": 0.0087, + "step": 27325 + }, + { + "epoch": 15.03, + "learning_rate": 3.2055696202531648e-06, + "loss": 0.0131, + "step": 27350 + }, + { + "epoch": 15.03, + "learning_rate": 3.199240506329114e-06, + "loss": 0.0073, + "step": 27375 + }, + { + "epoch": 15.03, + "learning_rate": 3.1929113924050636e-06, + "loss": 0.0086, + "step": 27400 + }, + { + "epoch": 15.03, + "learning_rate": 3.186582278481013e-06, + "loss": 0.0108, + "step": 27425 + }, + { + "epoch": 15.03, + "learning_rate": 3.1802531645569625e-06, + "loss": 0.01, + "step": 27450 + }, + { + "epoch": 15.03, + "learning_rate": 3.1739240506329115e-06, + "loss": 0.0123, + "step": 27475 + }, + { + "epoch": 15.03, + "learning_rate": 3.167594936708861e-06, + "loss": 0.0081, + "step": 27500 + }, + { + "epoch": 15.03, + "learning_rate": 3.1612658227848104e-06, + "loss": 0.0123, + "step": 27525 + }, + { + "epoch": 15.03, + "learning_rate": 3.1549367088607598e-06, + "loss": 0.0096, + "step": 27550 + }, + { + "epoch": 15.03, + "learning_rate": 3.1486075949367088e-06, + "loss": 0.0054, + "step": 27575 + }, + { + "epoch": 15.03, + "learning_rate": 3.1422784810126582e-06, + "loss": 0.006, + "step": 27600 + }, + { + "epoch": 15.03, + "learning_rate": 3.1359493670886076e-06, + "loss": 0.0089, + "step": 27625 + }, + { + "epoch": 15.03, + "learning_rate": 3.129620253164557e-06, + "loss": 0.0109, + "step": 27650 + }, + { + "epoch": 15.04, + "learning_rate": 3.1232911392405065e-06, + "loss": 0.0089, + "step": 27675 + }, + { + "epoch": 15.04, + "learning_rate": 3.1169620253164563e-06, + "loss": 0.0094, + "step": 27700 + }, + { + "epoch": 15.04, + "learning_rate": 3.1106329113924054e-06, + "loss": 0.0102, + "step": 27725 + }, + { + "epoch": 15.04, + "learning_rate": 3.1043037974683548e-06, + "loss": 0.0097, + "step": 27750 + }, + { + "epoch": 15.04, + "learning_rate": 3.097974683544304e-06, + "loss": 0.0078, + "step": 27775 + }, + { + "epoch": 15.04, + "learning_rate": 3.0916455696202536e-06, + "loss": 0.0073, + "step": 27800 + }, + { + "epoch": 15.04, + "learning_rate": 3.085316455696203e-06, + "loss": 0.0105, + "step": 27825 + }, + { + "epoch": 15.04, + "learning_rate": 3.078987341772152e-06, + "loss": 0.0083, + "step": 27850 + }, + { + "epoch": 15.04, + "learning_rate": 3.0726582278481015e-06, + "loss": 0.0138, + "step": 27875 + }, + { + "epoch": 15.04, + "learning_rate": 3.066329113924051e-06, + "loss": 0.0049, + "step": 27900 + }, + { + "epoch": 15.04, + "learning_rate": 3.0600000000000003e-06, + "loss": 0.0107, + "step": 27925 + }, + { + "epoch": 15.04, + "learning_rate": 3.0536708860759494e-06, + "loss": 0.0105, + "step": 27950 + }, + { + "epoch": 15.04, + "learning_rate": 3.0473417721518988e-06, + "loss": 0.0134, + "step": 27975 + }, + { + "epoch": 15.04, + "learning_rate": 3.041012658227848e-06, + "loss": 0.0137, + "step": 28000 + }, + { + "epoch": 15.04, + "eval_loss": 0.2780563235282898, + "eval_runtime": 1465.9542, + "eval_samples_per_second": 7.102, + "eval_steps_per_second": 0.444, + "eval_wer": 25.010162766874256, + "step": 28000 + }, + { + "epoch": 16.0, + "learning_rate": 3.0346835443037976e-06, + "loss": 0.009, + "step": 28025 + }, + { + "epoch": 16.0, + "learning_rate": 3.028354430379747e-06, + "loss": 0.0081, + "step": 28050 + }, + { + "epoch": 16.0, + "learning_rate": 3.022025316455696e-06, + "loss": 0.0109, + "step": 28075 + }, + { + "epoch": 16.0, + "learning_rate": 3.015696202531646e-06, + "loss": 0.0132, + "step": 28100 + }, + { + "epoch": 16.0, + "learning_rate": 3.0093670886075953e-06, + "loss": 0.0142, + "step": 28125 + }, + { + "epoch": 16.0, + "learning_rate": 3.0030379746835448e-06, + "loss": 0.0152, + "step": 28150 + }, + { + "epoch": 16.0, + "learning_rate": 2.996708860759494e-06, + "loss": 0.0074, + "step": 28175 + }, + { + "epoch": 16.0, + "learning_rate": 2.990379746835443e-06, + "loss": 0.0129, + "step": 28200 + }, + { + "epoch": 16.01, + "learning_rate": 2.9840506329113926e-06, + "loss": 0.0118, + "step": 28225 + }, + { + "epoch": 16.01, + "learning_rate": 2.977721518987342e-06, + "loss": 0.0153, + "step": 28250 + }, + { + "epoch": 16.01, + "learning_rate": 2.9713924050632915e-06, + "loss": 0.0121, + "step": 28275 + }, + { + "epoch": 16.01, + "learning_rate": 2.965063291139241e-06, + "loss": 0.0097, + "step": 28300 + }, + { + "epoch": 16.01, + "learning_rate": 2.95873417721519e-06, + "loss": 0.0083, + "step": 28325 + }, + { + "epoch": 16.01, + "learning_rate": 2.9524050632911393e-06, + "loss": 0.01, + "step": 28350 + }, + { + "epoch": 16.01, + "learning_rate": 2.9460759493670888e-06, + "loss": 0.0084, + "step": 28375 + }, + { + "epoch": 16.01, + "learning_rate": 2.939746835443038e-06, + "loss": 0.0124, + "step": 28400 + }, + { + "epoch": 16.01, + "learning_rate": 2.933417721518987e-06, + "loss": 0.0071, + "step": 28425 + }, + { + "epoch": 16.01, + "learning_rate": 2.9270886075949366e-06, + "loss": 0.0113, + "step": 28450 + }, + { + "epoch": 16.01, + "learning_rate": 2.920759493670886e-06, + "loss": 0.0085, + "step": 28475 + }, + { + "epoch": 16.01, + "learning_rate": 2.914430379746836e-06, + "loss": 0.0133, + "step": 28500 + }, + { + "epoch": 16.01, + "learning_rate": 2.9081012658227853e-06, + "loss": 0.014, + "step": 28525 + }, + { + "epoch": 16.01, + "learning_rate": 2.9017721518987348e-06, + "loss": 0.0105, + "step": 28550 + }, + { + "epoch": 16.01, + "learning_rate": 2.8954430379746838e-06, + "loss": 0.01, + "step": 28575 + }, + { + "epoch": 16.01, + "learning_rate": 2.889113924050633e-06, + "loss": 0.0135, + "step": 28600 + }, + { + "epoch": 16.02, + "learning_rate": 2.8827848101265826e-06, + "loss": 0.0118, + "step": 28625 + }, + { + "epoch": 16.02, + "learning_rate": 2.876455696202532e-06, + "loss": 0.0111, + "step": 28650 + }, + { + "epoch": 16.02, + "learning_rate": 2.8701265822784815e-06, + "loss": 0.008, + "step": 28675 + }, + { + "epoch": 16.02, + "learning_rate": 2.8637974683544305e-06, + "loss": 0.0103, + "step": 28700 + }, + { + "epoch": 16.02, + "learning_rate": 2.85746835443038e-06, + "loss": 0.0082, + "step": 28725 + }, + { + "epoch": 16.02, + "learning_rate": 2.8511392405063293e-06, + "loss": 0.0057, + "step": 28750 + }, + { + "epoch": 16.02, + "learning_rate": 2.8448101265822788e-06, + "loss": 0.0062, + "step": 28775 + }, + { + "epoch": 16.02, + "learning_rate": 2.8384810126582278e-06, + "loss": 0.0092, + "step": 28800 + }, + { + "epoch": 16.02, + "learning_rate": 2.832151898734177e-06, + "loss": 0.0084, + "step": 28825 + }, + { + "epoch": 16.02, + "learning_rate": 2.8258227848101266e-06, + "loss": 0.0099, + "step": 28850 + }, + { + "epoch": 16.02, + "learning_rate": 2.819493670886076e-06, + "loss": 0.0117, + "step": 28875 + }, + { + "epoch": 16.02, + "learning_rate": 2.8131645569620255e-06, + "loss": 0.0117, + "step": 28900 + }, + { + "epoch": 16.02, + "learning_rate": 2.8068354430379753e-06, + "loss": 0.0127, + "step": 28925 + }, + { + "epoch": 16.02, + "learning_rate": 2.8005063291139243e-06, + "loss": 0.0123, + "step": 28950 + }, + { + "epoch": 16.02, + "learning_rate": 2.7941772151898738e-06, + "loss": 0.0128, + "step": 28975 + }, + { + "epoch": 16.02, + "learning_rate": 2.787848101265823e-06, + "loss": 0.0073, + "step": 29000 + }, + { + "epoch": 16.02, + "eval_loss": 0.2685891091823578, + "eval_runtime": 1456.562, + "eval_samples_per_second": 7.148, + "eval_steps_per_second": 0.447, + "eval_wer": 23.278427291500677, + "step": 29000 + }, + { + "epoch": 16.03, + "learning_rate": 2.7815189873417726e-06, + "loss": 0.0068, + "step": 29025 + }, + { + "epoch": 16.03, + "learning_rate": 2.7751898734177216e-06, + "loss": 0.0087, + "step": 29050 + }, + { + "epoch": 16.03, + "learning_rate": 2.768860759493671e-06, + "loss": 0.0084, + "step": 29075 + }, + { + "epoch": 16.03, + "learning_rate": 2.7625316455696205e-06, + "loss": 0.0039, + "step": 29100 + }, + { + "epoch": 16.03, + "learning_rate": 2.75620253164557e-06, + "loss": 0.0066, + "step": 29125 + }, + { + "epoch": 16.03, + "learning_rate": 2.7498734177215193e-06, + "loss": 0.0108, + "step": 29150 + }, + { + "epoch": 16.03, + "learning_rate": 2.7435443037974683e-06, + "loss": 0.0052, + "step": 29175 + }, + { + "epoch": 16.03, + "learning_rate": 2.7372151898734178e-06, + "loss": 0.0061, + "step": 29200 + }, + { + "epoch": 16.03, + "learning_rate": 2.730886075949367e-06, + "loss": 0.0083, + "step": 29225 + }, + { + "epoch": 16.03, + "learning_rate": 2.7245569620253166e-06, + "loss": 0.0062, + "step": 29250 + }, + { + "epoch": 16.03, + "learning_rate": 2.7182278481012656e-06, + "loss": 0.0078, + "step": 29275 + }, + { + "epoch": 16.03, + "learning_rate": 2.711898734177215e-06, + "loss": 0.0127, + "step": 29300 + }, + { + "epoch": 16.03, + "learning_rate": 2.705569620253165e-06, + "loss": 0.0092, + "step": 29325 + }, + { + "epoch": 16.03, + "learning_rate": 2.6992405063291143e-06, + "loss": 0.0087, + "step": 29350 + }, + { + "epoch": 16.03, + "learning_rate": 2.6929113924050638e-06, + "loss": 0.0062, + "step": 29375 + }, + { + "epoch": 16.03, + "learning_rate": 2.686582278481013e-06, + "loss": 0.0065, + "step": 29400 + }, + { + "epoch": 16.04, + "learning_rate": 2.680253164556962e-06, + "loss": 0.0057, + "step": 29425 + }, + { + "epoch": 16.04, + "learning_rate": 2.6739240506329116e-06, + "loss": 0.0071, + "step": 29450 + }, + { + "epoch": 16.04, + "learning_rate": 2.667594936708861e-06, + "loss": 0.01, + "step": 29475 + }, + { + "epoch": 16.04, + "learning_rate": 2.6612658227848105e-06, + "loss": 0.009, + "step": 29500 + }, + { + "epoch": 16.04, + "learning_rate": 2.65493670886076e-06, + "loss": 0.0091, + "step": 29525 + }, + { + "epoch": 16.04, + "learning_rate": 2.648607594936709e-06, + "loss": 0.0104, + "step": 29550 + }, + { + "epoch": 16.04, + "learning_rate": 2.6422784810126583e-06, + "loss": 0.0071, + "step": 29575 + }, + { + "epoch": 16.04, + "learning_rate": 2.6359493670886078e-06, + "loss": 0.0081, + "step": 29600 + }, + { + "epoch": 16.04, + "learning_rate": 2.629620253164557e-06, + "loss": 0.008, + "step": 29625 + }, + { + "epoch": 16.04, + "learning_rate": 2.623291139240506e-06, + "loss": 0.0067, + "step": 29650 + }, + { + "epoch": 16.04, + "learning_rate": 2.6169620253164556e-06, + "loss": 0.0066, + "step": 29675 + }, + { + "epoch": 16.04, + "learning_rate": 2.610632911392405e-06, + "loss": 0.0121, + "step": 29700 + }, + { + "epoch": 16.04, + "learning_rate": 2.6043037974683545e-06, + "loss": 0.0075, + "step": 29725 + }, + { + "epoch": 16.04, + "learning_rate": 2.5979746835443043e-06, + "loss": 0.0111, + "step": 29750 + }, + { + "epoch": 17.0, + "learning_rate": 2.5916455696202538e-06, + "loss": 0.0095, + "step": 29775 + }, + { + "epoch": 17.0, + "learning_rate": 2.5853164556962028e-06, + "loss": 0.0077, + "step": 29800 + }, + { + "epoch": 17.0, + "learning_rate": 2.578987341772152e-06, + "loss": 0.0047, + "step": 29825 + }, + { + "epoch": 17.0, + "learning_rate": 2.5726582278481016e-06, + "loss": 0.0092, + "step": 29850 + }, + { + "epoch": 17.0, + "learning_rate": 2.566329113924051e-06, + "loss": 0.0191, + "step": 29875 + }, + { + "epoch": 17.0, + "learning_rate": 2.56e-06, + "loss": 0.0102, + "step": 29900 + }, + { + "epoch": 17.0, + "learning_rate": 2.5536708860759495e-06, + "loss": 0.0146, + "step": 29925 + }, + { + "epoch": 17.0, + "learning_rate": 2.547341772151899e-06, + "loss": 0.0096, + "step": 29950 + }, + { + "epoch": 17.01, + "learning_rate": 2.5410126582278483e-06, + "loss": 0.0159, + "step": 29975 + }, + { + "epoch": 17.01, + "learning_rate": 2.5346835443037978e-06, + "loss": 0.0094, + "step": 30000 + }, + { + "epoch": 17.01, + "eval_loss": 0.2636851966381073, + "eval_runtime": 1508.7196, + "eval_samples_per_second": 6.901, + "eval_steps_per_second": 0.431, + "eval_wer": 23.029642758418838, + "step": 30000 + }, + { + "epoch": 17.01, + "learning_rate": 2.5283544303797468e-06, + "loss": 0.0078, + "step": 30025 + }, + { + "epoch": 17.01, + "learning_rate": 2.522025316455696e-06, + "loss": 0.01, + "step": 30050 + }, + { + "epoch": 17.01, + "learning_rate": 2.5156962025316456e-06, + "loss": 0.0058, + "step": 30075 + }, + { + "epoch": 17.01, + "learning_rate": 2.509367088607595e-06, + "loss": 0.0092, + "step": 30100 + }, + { + "epoch": 17.01, + "learning_rate": 2.503037974683544e-06, + "loss": 0.0084, + "step": 30125 + }, + { + "epoch": 17.01, + "learning_rate": 2.496708860759494e-06, + "loss": 0.0069, + "step": 30150 + }, + { + "epoch": 17.01, + "learning_rate": 2.4903797468354433e-06, + "loss": 0.0059, + "step": 30175 + }, + { + "epoch": 17.01, + "learning_rate": 2.4840506329113923e-06, + "loss": 0.0097, + "step": 30200 + }, + { + "epoch": 17.01, + "learning_rate": 2.4777215189873418e-06, + "loss": 0.0126, + "step": 30225 + }, + { + "epoch": 17.01, + "learning_rate": 2.4713924050632916e-06, + "loss": 0.0096, + "step": 30250 + }, + { + "epoch": 17.01, + "learning_rate": 2.4650632911392406e-06, + "loss": 0.0094, + "step": 30275 + }, + { + "epoch": 17.01, + "learning_rate": 2.45873417721519e-06, + "loss": 0.0111, + "step": 30300 + }, + { + "epoch": 17.01, + "learning_rate": 2.4524050632911395e-06, + "loss": 0.0062, + "step": 30325 + }, + { + "epoch": 17.01, + "learning_rate": 2.446075949367089e-06, + "loss": 0.0078, + "step": 30350 + }, + { + "epoch": 17.02, + "learning_rate": 2.4397468354430383e-06, + "loss": 0.0043, + "step": 30375 + }, + { + "epoch": 17.02, + "learning_rate": 2.4336708860759494e-06, + "loss": 0.0102, + "step": 30400 + }, + { + "epoch": 17.02, + "learning_rate": 2.4273417721518988e-06, + "loss": 0.0101, + "step": 30425 + }, + { + "epoch": 17.02, + "learning_rate": 2.4210126582278482e-06, + "loss": 0.0066, + "step": 30450 + }, + { + "epoch": 17.02, + "learning_rate": 2.4146835443037976e-06, + "loss": 0.0078, + "step": 30475 + }, + { + "epoch": 17.02, + "learning_rate": 2.408354430379747e-06, + "loss": 0.0074, + "step": 30500 + }, + { + "epoch": 17.02, + "learning_rate": 2.4020253164556965e-06, + "loss": 0.0072, + "step": 30525 + }, + { + "epoch": 17.02, + "learning_rate": 2.395696202531646e-06, + "loss": 0.0109, + "step": 30550 + }, + { + "epoch": 17.02, + "learning_rate": 2.389367088607595e-06, + "loss": 0.0113, + "step": 30575 + }, + { + "epoch": 17.02, + "learning_rate": 2.3830379746835444e-06, + "loss": 0.0109, + "step": 30600 + }, + { + "epoch": 17.02, + "learning_rate": 2.3767088607594938e-06, + "loss": 0.0089, + "step": 30625 + }, + { + "epoch": 17.02, + "learning_rate": 2.3703797468354432e-06, + "loss": 0.0062, + "step": 30650 + }, + { + "epoch": 17.02, + "learning_rate": 2.3640506329113926e-06, + "loss": 0.0101, + "step": 30675 + }, + { + "epoch": 17.02, + "learning_rate": 2.357721518987342e-06, + "loss": 0.0096, + "step": 30700 + }, + { + "epoch": 17.02, + "learning_rate": 2.3513924050632915e-06, + "loss": 0.0093, + "step": 30725 + }, + { + "epoch": 17.02, + "learning_rate": 2.3450632911392405e-06, + "loss": 0.0113, + "step": 30750 + }, + { + "epoch": 17.03, + "learning_rate": 2.33873417721519e-06, + "loss": 0.0094, + "step": 30775 + }, + { + "epoch": 17.03, + "learning_rate": 2.3324050632911394e-06, + "loss": 0.0086, + "step": 30800 + }, + { + "epoch": 17.03, + "learning_rate": 2.3260759493670888e-06, + "loss": 0.0096, + "step": 30825 + }, + { + "epoch": 17.03, + "learning_rate": 2.319746835443038e-06, + "loss": 0.0086, + "step": 30850 + }, + { + "epoch": 17.03, + "learning_rate": 2.3134177215189876e-06, + "loss": 0.0064, + "step": 30875 + }, + { + "epoch": 17.03, + "learning_rate": 2.307088607594937e-06, + "loss": 0.0107, + "step": 30900 + }, + { + "epoch": 17.03, + "learning_rate": 2.3007594936708865e-06, + "loss": 0.0073, + "step": 30925 + }, + { + "epoch": 17.03, + "learning_rate": 2.2944303797468355e-06, + "loss": 0.005, + "step": 30950 + }, + { + "epoch": 17.03, + "learning_rate": 2.288101265822785e-06, + "loss": 0.0055, + "step": 30975 + }, + { + "epoch": 17.03, + "learning_rate": 2.2817721518987344e-06, + "loss": 0.006, + "step": 31000 + }, + { + "epoch": 17.03, + "eval_loss": 0.2709895372390747, + "eval_runtime": 1461.5544, + "eval_samples_per_second": 7.123, + "eval_steps_per_second": 0.445, + "eval_wer": 23.29468771849949, + "step": 31000 + }, + { + "epoch": 17.03, + "learning_rate": 2.2754430379746838e-06, + "loss": 0.0094, + "step": 31025 + }, + { + "epoch": 17.03, + "learning_rate": 2.2691139240506328e-06, + "loss": 0.0088, + "step": 31050 + }, + { + "epoch": 17.03, + "learning_rate": 2.2627848101265826e-06, + "loss": 0.0057, + "step": 31075 + }, + { + "epoch": 17.03, + "learning_rate": 2.256455696202532e-06, + "loss": 0.0066, + "step": 31100 + }, + { + "epoch": 17.03, + "learning_rate": 2.250126582278481e-06, + "loss": 0.008, + "step": 31125 + }, + { + "epoch": 17.03, + "learning_rate": 2.2437974683544305e-06, + "loss": 0.0048, + "step": 31150 + }, + { + "epoch": 17.04, + "learning_rate": 2.23746835443038e-06, + "loss": 0.0074, + "step": 31175 + }, + { + "epoch": 17.04, + "learning_rate": 2.2311392405063294e-06, + "loss": 0.0061, + "step": 31200 + }, + { + "epoch": 17.04, + "learning_rate": 2.2248101265822788e-06, + "loss": 0.0088, + "step": 31225 + }, + { + "epoch": 17.04, + "learning_rate": 2.2184810126582278e-06, + "loss": 0.0087, + "step": 31250 + }, + { + "epoch": 17.04, + "learning_rate": 2.2121518987341776e-06, + "loss": 0.0089, + "step": 31275 + }, + { + "epoch": 17.04, + "learning_rate": 2.2058227848101266e-06, + "loss": 0.0061, + "step": 31300 + }, + { + "epoch": 17.04, + "learning_rate": 2.199493670886076e-06, + "loss": 0.0097, + "step": 31325 + }, + { + "epoch": 17.04, + "learning_rate": 2.1931645569620255e-06, + "loss": 0.0063, + "step": 31350 + }, + { + "epoch": 17.04, + "learning_rate": 2.186835443037975e-06, + "loss": 0.0084, + "step": 31375 + }, + { + "epoch": 17.04, + "learning_rate": 2.1805063291139243e-06, + "loss": 0.0089, + "step": 31400 + }, + { + "epoch": 17.04, + "learning_rate": 2.1741772151898734e-06, + "loss": 0.0077, + "step": 31425 + }, + { + "epoch": 17.04, + "learning_rate": 2.1678481012658228e-06, + "loss": 0.0153, + "step": 31450 + }, + { + "epoch": 17.04, + "learning_rate": 2.1615189873417726e-06, + "loss": 0.0088, + "step": 31475 + }, + { + "epoch": 17.04, + "learning_rate": 2.1551898734177216e-06, + "loss": 0.0095, + "step": 31500 + }, + { + "epoch": 18.0, + "learning_rate": 2.148860759493671e-06, + "loss": 0.0084, + "step": 31525 + }, + { + "epoch": 18.0, + "learning_rate": 2.1425316455696205e-06, + "loss": 0.0054, + "step": 31550 + }, + { + "epoch": 18.0, + "learning_rate": 2.13620253164557e-06, + "loss": 0.013, + "step": 31575 + }, + { + "epoch": 18.0, + "learning_rate": 2.129873417721519e-06, + "loss": 0.0074, + "step": 31600 + }, + { + "epoch": 18.0, + "learning_rate": 2.1235443037974684e-06, + "loss": 0.0083, + "step": 31625 + }, + { + "epoch": 18.0, + "learning_rate": 2.1172151898734178e-06, + "loss": 0.0118, + "step": 31650 + }, + { + "epoch": 18.0, + "learning_rate": 2.110886075949367e-06, + "loss": 0.0095, + "step": 31675 + }, + { + "epoch": 18.0, + "learning_rate": 2.1045569620253166e-06, + "loss": 0.0075, + "step": 31700 + }, + { + "epoch": 18.01, + "learning_rate": 2.098227848101266e-06, + "loss": 0.0149, + "step": 31725 + }, + { + "epoch": 18.01, + "learning_rate": 2.0918987341772155e-06, + "loss": 0.0095, + "step": 31750 + }, + { + "epoch": 18.01, + "learning_rate": 2.085569620253165e-06, + "loss": 0.0098, + "step": 31775 + }, + { + "epoch": 18.01, + "learning_rate": 2.079240506329114e-06, + "loss": 0.0058, + "step": 31800 + }, + { + "epoch": 18.01, + "learning_rate": 2.0729113924050633e-06, + "loss": 0.0072, + "step": 31825 + }, + { + "epoch": 18.01, + "learning_rate": 2.0665822784810128e-06, + "loss": 0.0103, + "step": 31850 + }, + { + "epoch": 18.01, + "learning_rate": 2.060253164556962e-06, + "loss": 0.0085, + "step": 31875 + }, + { + "epoch": 18.01, + "learning_rate": 2.0539240506329116e-06, + "loss": 0.0064, + "step": 31900 + }, + { + "epoch": 18.01, + "learning_rate": 2.047594936708861e-06, + "loss": 0.0077, + "step": 31925 + }, + { + "epoch": 18.01, + "learning_rate": 2.0412658227848105e-06, + "loss": 0.0086, + "step": 31950 + }, + { + "epoch": 18.01, + "learning_rate": 2.0349367088607595e-06, + "loss": 0.0136, + "step": 31975 + }, + { + "epoch": 18.01, + "learning_rate": 2.028607594936709e-06, + "loss": 0.0085, + "step": 32000 + }, + { + "epoch": 18.01, + "eval_loss": 0.26488059759140015, + "eval_runtime": 1466.953, + "eval_samples_per_second": 7.097, + "eval_steps_per_second": 0.444, + "eval_wer": 23.089806338314446, + "step": 32000 + }, + { + "epoch": 18.01, + "learning_rate": 2.0222784810126583e-06, + "loss": 0.011, + "step": 32025 + }, + { + "epoch": 18.01, + "learning_rate": 2.0159493670886078e-06, + "loss": 0.0086, + "step": 32050 + }, + { + "epoch": 18.01, + "learning_rate": 2.009620253164557e-06, + "loss": 0.008, + "step": 32075 + }, + { + "epoch": 18.01, + "learning_rate": 2.0032911392405066e-06, + "loss": 0.0083, + "step": 32100 + }, + { + "epoch": 18.02, + "learning_rate": 1.996962025316456e-06, + "loss": 0.0073, + "step": 32125 + }, + { + "epoch": 18.02, + "learning_rate": 1.9906329113924055e-06, + "loss": 0.0036, + "step": 32150 + }, + { + "epoch": 18.02, + "learning_rate": 1.9843037974683545e-06, + "loss": 0.0096, + "step": 32175 + }, + { + "epoch": 18.02, + "learning_rate": 1.977974683544304e-06, + "loss": 0.0071, + "step": 32200 + }, + { + "epoch": 18.02, + "learning_rate": 1.9716455696202533e-06, + "loss": 0.009, + "step": 32225 + }, + { + "epoch": 18.02, + "learning_rate": 1.9653164556962028e-06, + "loss": 0.0093, + "step": 32250 + }, + { + "epoch": 18.02, + "learning_rate": 1.9589873417721518e-06, + "loss": 0.0071, + "step": 32275 + }, + { + "epoch": 18.02, + "learning_rate": 1.9526582278481016e-06, + "loss": 0.0037, + "step": 32300 + }, + { + "epoch": 18.02, + "learning_rate": 1.946329113924051e-06, + "loss": 0.007, + "step": 32325 + }, + { + "epoch": 18.02, + "learning_rate": 1.94e-06, + "loss": 0.0104, + "step": 32350 + }, + { + "epoch": 18.02, + "learning_rate": 1.9336708860759495e-06, + "loss": 0.0082, + "step": 32375 + }, + { + "epoch": 18.02, + "learning_rate": 1.927341772151899e-06, + "loss": 0.0095, + "step": 32400 + }, + { + "epoch": 18.02, + "learning_rate": 1.9210126582278483e-06, + "loss": 0.0101, + "step": 32425 + }, + { + "epoch": 18.02, + "learning_rate": 1.9146835443037973e-06, + "loss": 0.0122, + "step": 32450 + }, + { + "epoch": 18.02, + "learning_rate": 1.9083544303797468e-06, + "loss": 0.0082, + "step": 32475 + }, + { + "epoch": 18.02, + "learning_rate": 1.9020253164556964e-06, + "loss": 0.0093, + "step": 32500 + }, + { + "epoch": 18.03, + "learning_rate": 1.8956962025316458e-06, + "loss": 0.0073, + "step": 32525 + }, + { + "epoch": 18.03, + "learning_rate": 1.889367088607595e-06, + "loss": 0.0061, + "step": 32550 + }, + { + "epoch": 18.03, + "learning_rate": 1.8830379746835445e-06, + "loss": 0.0072, + "step": 32575 + }, + { + "epoch": 18.03, + "learning_rate": 1.876708860759494e-06, + "loss": 0.0064, + "step": 32600 + }, + { + "epoch": 18.03, + "learning_rate": 1.8703797468354431e-06, + "loss": 0.0084, + "step": 32625 + }, + { + "epoch": 18.03, + "learning_rate": 1.8640506329113926e-06, + "loss": 0.0067, + "step": 32650 + }, + { + "epoch": 18.03, + "learning_rate": 1.8577215189873418e-06, + "loss": 0.0052, + "step": 32675 + }, + { + "epoch": 18.03, + "learning_rate": 1.8513924050632912e-06, + "loss": 0.0085, + "step": 32700 + }, + { + "epoch": 18.03, + "learning_rate": 1.8450632911392408e-06, + "loss": 0.0077, + "step": 32725 + }, + { + "epoch": 18.03, + "learning_rate": 1.83873417721519e-06, + "loss": 0.0068, + "step": 32750 + }, + { + "epoch": 18.03, + "learning_rate": 1.8324050632911395e-06, + "loss": 0.0079, + "step": 32775 + }, + { + "epoch": 18.03, + "learning_rate": 1.8260759493670887e-06, + "loss": 0.008, + "step": 32800 + }, + { + "epoch": 18.03, + "learning_rate": 1.8197468354430381e-06, + "loss": 0.0071, + "step": 32825 + }, + { + "epoch": 18.03, + "learning_rate": 1.8134177215189873e-06, + "loss": 0.0091, + "step": 32850 + }, + { + "epoch": 18.03, + "learning_rate": 1.8070886075949368e-06, + "loss": 0.0059, + "step": 32875 + }, + { + "epoch": 18.03, + "learning_rate": 1.8007594936708862e-06, + "loss": 0.0079, + "step": 32900 + }, + { + "epoch": 18.04, + "learning_rate": 1.7944303797468356e-06, + "loss": 0.0055, + "step": 32925 + }, + { + "epoch": 18.04, + "learning_rate": 1.788101265822785e-06, + "loss": 0.0095, + "step": 32950 + }, + { + "epoch": 18.04, + "learning_rate": 1.7817721518987343e-06, + "loss": 0.0086, + "step": 32975 + }, + { + "epoch": 18.04, + "learning_rate": 1.7754430379746837e-06, + "loss": 0.0057, + "step": 33000 + }, + { + "epoch": 18.04, + "eval_loss": 0.2678743898868561, + "eval_runtime": 1484.126, + "eval_samples_per_second": 7.015, + "eval_steps_per_second": 0.439, + "eval_wer": 23.405258622091416, + "step": 33000 + }, + { + "epoch": 18.04, + "learning_rate": 1.7691139240506331e-06, + "loss": 0.0085, + "step": 33025 + }, + { + "epoch": 18.04, + "learning_rate": 1.7627848101265823e-06, + "loss": 0.0073, + "step": 33050 + }, + { + "epoch": 18.04, + "learning_rate": 1.7564556962025318e-06, + "loss": 0.0045, + "step": 33075 + }, + { + "epoch": 18.04, + "learning_rate": 1.750126582278481e-06, + "loss": 0.0065, + "step": 33100 + }, + { + "epoch": 18.04, + "learning_rate": 1.7437974683544306e-06, + "loss": 0.0076, + "step": 33125 + }, + { + "epoch": 18.04, + "learning_rate": 1.73746835443038e-06, + "loss": 0.007, + "step": 33150 + }, + { + "epoch": 18.04, + "learning_rate": 1.7311392405063293e-06, + "loss": 0.0109, + "step": 33175 + }, + { + "epoch": 18.04, + "learning_rate": 1.7248101265822787e-06, + "loss": 0.0095, + "step": 33200 + }, + { + "epoch": 18.04, + "learning_rate": 1.718481012658228e-06, + "loss": 0.0109, + "step": 33225 + }, + { + "epoch": 18.04, + "learning_rate": 1.7121518987341773e-06, + "loss": 0.0117, + "step": 33250 + }, + { + "epoch": 19.0, + "learning_rate": 1.7058227848101266e-06, + "loss": 0.0038, + "step": 33275 + }, + { + "epoch": 19.0, + "learning_rate": 1.699493670886076e-06, + "loss": 0.013, + "step": 33300 + }, + { + "epoch": 19.0, + "learning_rate": 1.6931645569620256e-06, + "loss": 0.0104, + "step": 33325 + }, + { + "epoch": 19.0, + "learning_rate": 1.6868354430379748e-06, + "loss": 0.0064, + "step": 33350 + }, + { + "epoch": 19.0, + "learning_rate": 1.6805063291139243e-06, + "loss": 0.0137, + "step": 33375 + }, + { + "epoch": 19.0, + "learning_rate": 1.6741772151898735e-06, + "loss": 0.012, + "step": 33400 + }, + { + "epoch": 19.0, + "learning_rate": 1.667848101265823e-06, + "loss": 0.0106, + "step": 33425 + }, + { + "epoch": 19.0, + "learning_rate": 1.6615189873417723e-06, + "loss": 0.0072, + "step": 33450 + }, + { + "epoch": 19.01, + "learning_rate": 1.6551898734177216e-06, + "loss": 0.0154, + "step": 33475 + }, + { + "epoch": 19.01, + "learning_rate": 1.648860759493671e-06, + "loss": 0.0129, + "step": 33500 + }, + { + "epoch": 19.01, + "learning_rate": 1.6425316455696206e-06, + "loss": 0.008, + "step": 33525 + }, + { + "epoch": 19.01, + "learning_rate": 1.6362025316455698e-06, + "loss": 0.0071, + "step": 33550 + }, + { + "epoch": 19.01, + "learning_rate": 1.6298734177215193e-06, + "loss": 0.0088, + "step": 33575 + }, + { + "epoch": 19.01, + "learning_rate": 1.6235443037974685e-06, + "loss": 0.0102, + "step": 33600 + }, + { + "epoch": 19.01, + "learning_rate": 1.617215189873418e-06, + "loss": 0.0064, + "step": 33625 + }, + { + "epoch": 19.01, + "learning_rate": 1.6108860759493671e-06, + "loss": 0.0098, + "step": 33650 + }, + { + "epoch": 19.01, + "learning_rate": 1.6045569620253166e-06, + "loss": 0.009, + "step": 33675 + }, + { + "epoch": 19.01, + "learning_rate": 1.5982278481012658e-06, + "loss": 0.0074, + "step": 33700 + }, + { + "epoch": 19.01, + "learning_rate": 1.5918987341772152e-06, + "loss": 0.0055, + "step": 33725 + }, + { + "epoch": 19.01, + "learning_rate": 1.5855696202531648e-06, + "loss": 0.0059, + "step": 33750 + }, + { + "epoch": 19.01, + "learning_rate": 1.579240506329114e-06, + "loss": 0.0102, + "step": 33775 + }, + { + "epoch": 19.01, + "learning_rate": 1.5729113924050635e-06, + "loss": 0.0111, + "step": 33800 + }, + { + "epoch": 19.01, + "learning_rate": 1.5665822784810127e-06, + "loss": 0.0033, + "step": 33825 + }, + { + "epoch": 19.01, + "learning_rate": 1.5602531645569621e-06, + "loss": 0.0057, + "step": 33850 + }, + { + "epoch": 19.02, + "learning_rate": 1.5539240506329115e-06, + "loss": 0.0074, + "step": 33875 + }, + { + "epoch": 19.02, + "learning_rate": 1.5475949367088608e-06, + "loss": 0.0056, + "step": 33900 + }, + { + "epoch": 19.02, + "learning_rate": 1.5412658227848102e-06, + "loss": 0.0053, + "step": 33925 + }, + { + "epoch": 19.02, + "learning_rate": 1.5349367088607598e-06, + "loss": 0.0049, + "step": 33950 + }, + { + "epoch": 19.02, + "learning_rate": 1.528607594936709e-06, + "loss": 0.0065, + "step": 33975 + }, + { + "epoch": 19.02, + "learning_rate": 1.5222784810126585e-06, + "loss": 0.007, + "step": 34000 + }, + { + "epoch": 19.02, + "eval_loss": 0.2660910487174988, + "eval_runtime": 1471.5922, + "eval_samples_per_second": 7.075, + "eval_steps_per_second": 0.442, + "eval_wer": 23.14834387551017, + "step": 34000 + }, + { + "epoch": 19.02, + "learning_rate": 1.5159493670886077e-06, + "loss": 0.0027, + "step": 34025 + }, + { + "epoch": 19.02, + "learning_rate": 1.5096202531645571e-06, + "loss": 0.0081, + "step": 34050 + }, + { + "epoch": 19.02, + "learning_rate": 1.5032911392405063e-06, + "loss": 0.0044, + "step": 34075 + }, + { + "epoch": 19.02, + "learning_rate": 1.4969620253164558e-06, + "loss": 0.0093, + "step": 34100 + }, + { + "epoch": 19.02, + "learning_rate": 1.490632911392405e-06, + "loss": 0.0067, + "step": 34125 + }, + { + "epoch": 19.02, + "learning_rate": 1.4843037974683546e-06, + "loss": 0.0047, + "step": 34150 + }, + { + "epoch": 19.02, + "learning_rate": 1.477974683544304e-06, + "loss": 0.0119, + "step": 34175 + }, + { + "epoch": 19.02, + "learning_rate": 1.4716455696202533e-06, + "loss": 0.0091, + "step": 34200 + }, + { + "epoch": 19.02, + "learning_rate": 1.4653164556962027e-06, + "loss": 0.0094, + "step": 34225 + }, + { + "epoch": 19.02, + "learning_rate": 1.458987341772152e-06, + "loss": 0.0069, + "step": 34250 + }, + { + "epoch": 19.03, + "learning_rate": 1.4526582278481013e-06, + "loss": 0.0066, + "step": 34275 + }, + { + "epoch": 19.03, + "learning_rate": 1.4463291139240508e-06, + "loss": 0.0072, + "step": 34300 + }, + { + "epoch": 19.03, + "learning_rate": 1.44e-06, + "loss": 0.0067, + "step": 34325 + }, + { + "epoch": 19.03, + "learning_rate": 1.4336708860759496e-06, + "loss": 0.0076, + "step": 34350 + }, + { + "epoch": 19.03, + "learning_rate": 1.427341772151899e-06, + "loss": 0.0083, + "step": 34375 + }, + { + "epoch": 19.03, + "learning_rate": 1.4212658227848103e-06, + "loss": 0.0072, + "step": 34400 + }, + { + "epoch": 19.03, + "learning_rate": 1.4149367088607597e-06, + "loss": 0.0086, + "step": 34425 + }, + { + "epoch": 19.03, + "learning_rate": 1.408607594936709e-06, + "loss": 0.0057, + "step": 34450 + }, + { + "epoch": 19.03, + "learning_rate": 1.4022784810126584e-06, + "loss": 0.0111, + "step": 34475 + }, + { + "epoch": 19.03, + "learning_rate": 1.3959493670886076e-06, + "loss": 0.01, + "step": 34500 + }, + { + "epoch": 19.03, + "learning_rate": 1.389620253164557e-06, + "loss": 0.008, + "step": 34525 + }, + { + "epoch": 19.03, + "learning_rate": 1.3832911392405066e-06, + "loss": 0.0082, + "step": 34550 + }, + { + "epoch": 19.03, + "learning_rate": 1.3769620253164559e-06, + "loss": 0.0062, + "step": 34575 + }, + { + "epoch": 19.03, + "learning_rate": 1.3706329113924053e-06, + "loss": 0.0052, + "step": 34600 + }, + { + "epoch": 19.03, + "learning_rate": 1.3643037974683545e-06, + "loss": 0.007, + "step": 34625 + }, + { + "epoch": 19.03, + "learning_rate": 1.357974683544304e-06, + "loss": 0.0045, + "step": 34650 + }, + { + "epoch": 19.04, + "learning_rate": 1.3516455696202531e-06, + "loss": 0.0089, + "step": 34675 + }, + { + "epoch": 19.04, + "learning_rate": 1.3453164556962026e-06, + "loss": 0.0081, + "step": 34700 + }, + { + "epoch": 19.04, + "learning_rate": 1.338987341772152e-06, + "loss": 0.0101, + "step": 34725 + }, + { + "epoch": 19.04, + "learning_rate": 1.3326582278481014e-06, + "loss": 0.0072, + "step": 34750 + }, + { + "epoch": 19.04, + "learning_rate": 1.3263291139240509e-06, + "loss": 0.005, + "step": 34775 + }, + { + "epoch": 19.04, + "learning_rate": 1.32e-06, + "loss": 0.0083, + "step": 34800 + }, + { + "epoch": 19.04, + "learning_rate": 1.3136708860759495e-06, + "loss": 0.0087, + "step": 34825 + }, + { + "epoch": 19.04, + "learning_rate": 1.307341772151899e-06, + "loss": 0.0067, + "step": 34850 + }, + { + "epoch": 19.04, + "learning_rate": 1.3010126582278481e-06, + "loss": 0.0048, + "step": 34875 + }, + { + "epoch": 19.04, + "learning_rate": 1.2946835443037976e-06, + "loss": 0.0065, + "step": 34900 + }, + { + "epoch": 19.04, + "learning_rate": 1.2883544303797468e-06, + "loss": 0.0076, + "step": 34925 + }, + { + "epoch": 19.04, + "learning_rate": 1.2820253164556964e-06, + "loss": 0.007, + "step": 34950 + }, + { + "epoch": 19.04, + "learning_rate": 1.2756962025316458e-06, + "loss": 0.0044, + "step": 34975 + }, + { + "epoch": 19.04, + "learning_rate": 1.269367088607595e-06, + "loss": 0.0082, + "step": 35000 + }, + { + "epoch": 19.04, + "eval_loss": 0.26719748973846436, + "eval_runtime": 1465.7868, + "eval_samples_per_second": 7.103, + "eval_steps_per_second": 0.444, + "eval_wer": 24.169498691035628, + "step": 35000 + }, + { + "epoch": 20.0, + "learning_rate": 1.2630379746835445e-06, + "loss": 0.0072, + "step": 35025 + }, + { + "epoch": 20.0, + "learning_rate": 1.2567088607594937e-06, + "loss": 0.0078, + "step": 35050 + }, + { + "epoch": 20.0, + "learning_rate": 1.2503797468354431e-06, + "loss": 0.0069, + "step": 35075 + }, + { + "epoch": 20.0, + "learning_rate": 1.2440506329113924e-06, + "loss": 0.0063, + "step": 35100 + }, + { + "epoch": 20.0, + "learning_rate": 1.237721518987342e-06, + "loss": 0.0073, + "step": 35125 + }, + { + "epoch": 20.0, + "learning_rate": 1.2313924050632912e-06, + "loss": 0.0076, + "step": 35150 + }, + { + "epoch": 20.0, + "learning_rate": 1.2250632911392406e-06, + "loss": 0.0084, + "step": 35175 + }, + { + "epoch": 20.0, + "learning_rate": 1.2187341772151899e-06, + "loss": 0.0061, + "step": 35200 + }, + { + "epoch": 20.01, + "learning_rate": 1.2124050632911393e-06, + "loss": 0.0074, + "step": 35225 + }, + { + "epoch": 20.01, + "learning_rate": 1.2060759493670887e-06, + "loss": 0.011, + "step": 35250 + }, + { + "epoch": 20.01, + "learning_rate": 1.1997468354430381e-06, + "loss": 0.0076, + "step": 35275 + }, + { + "epoch": 20.01, + "learning_rate": 1.1934177215189874e-06, + "loss": 0.007, + "step": 35300 + }, + { + "epoch": 20.01, + "learning_rate": 1.1870886075949368e-06, + "loss": 0.0074, + "step": 35325 + }, + { + "epoch": 20.01, + "learning_rate": 1.1807594936708862e-06, + "loss": 0.0077, + "step": 35350 + }, + { + "epoch": 20.01, + "learning_rate": 1.1744303797468354e-06, + "loss": 0.0074, + "step": 35375 + }, + { + "epoch": 20.01, + "learning_rate": 1.1681012658227848e-06, + "loss": 0.007, + "step": 35400 + }, + { + "epoch": 20.01, + "learning_rate": 1.1617721518987343e-06, + "loss": 0.0068, + "step": 35425 + }, + { + "epoch": 20.01, + "learning_rate": 1.1554430379746837e-06, + "loss": 0.0049, + "step": 35450 + }, + { + "epoch": 20.01, + "learning_rate": 1.149113924050633e-06, + "loss": 0.0074, + "step": 35475 + }, + { + "epoch": 20.01, + "learning_rate": 1.1427848101265823e-06, + "loss": 0.0097, + "step": 35500 + }, + { + "epoch": 20.01, + "learning_rate": 1.1364556962025318e-06, + "loss": 0.0065, + "step": 35525 + }, + { + "epoch": 20.01, + "learning_rate": 1.1301265822784812e-06, + "loss": 0.0056, + "step": 35550 + }, + { + "epoch": 20.01, + "learning_rate": 1.1237974683544304e-06, + "loss": 0.0056, + "step": 35575 + }, + { + "epoch": 20.01, + "learning_rate": 1.1174683544303798e-06, + "loss": 0.0056, + "step": 35600 + }, + { + "epoch": 20.02, + "learning_rate": 1.1111392405063293e-06, + "loss": 0.0104, + "step": 35625 + }, + { + "epoch": 20.02, + "learning_rate": 1.1048101265822787e-06, + "loss": 0.0071, + "step": 35650 + }, + { + "epoch": 20.02, + "learning_rate": 1.098481012658228e-06, + "loss": 0.0079, + "step": 35675 + }, + { + "epoch": 20.02, + "learning_rate": 1.0921518987341773e-06, + "loss": 0.0077, + "step": 35700 + }, + { + "epoch": 20.02, + "learning_rate": 1.0858227848101268e-06, + "loss": 0.0038, + "step": 35725 + }, + { + "epoch": 20.02, + "learning_rate": 1.079493670886076e-06, + "loss": 0.0035, + "step": 35750 + }, + { + "epoch": 20.02, + "learning_rate": 1.0731645569620254e-06, + "loss": 0.0047, + "step": 35775 + }, + { + "epoch": 20.02, + "learning_rate": 1.0668354430379746e-06, + "loss": 0.0079, + "step": 35800 + }, + { + "epoch": 20.02, + "learning_rate": 1.0605063291139243e-06, + "loss": 0.006, + "step": 35825 + }, + { + "epoch": 20.02, + "learning_rate": 1.0541772151898735e-06, + "loss": 0.0084, + "step": 35850 + }, + { + "epoch": 20.02, + "learning_rate": 1.047848101265823e-06, + "loss": 0.0073, + "step": 35875 + }, + { + "epoch": 20.02, + "learning_rate": 1.0415189873417721e-06, + "loss": 0.0049, + "step": 35900 + }, + { + "epoch": 20.02, + "learning_rate": 1.0351898734177216e-06, + "loss": 0.0114, + "step": 35925 + }, + { + "epoch": 20.02, + "learning_rate": 1.028860759493671e-06, + "loss": 0.0086, + "step": 35950 + }, + { + "epoch": 20.02, + "learning_rate": 1.0225316455696204e-06, + "loss": 0.0088, + "step": 35975 + }, + { + "epoch": 20.02, + "learning_rate": 1.0162025316455696e-06, + "loss": 0.0054, + "step": 36000 + }, + { + "epoch": 20.02, + "eval_loss": 0.26699596643447876, + "eval_runtime": 1450.3878, + "eval_samples_per_second": 7.178, + "eval_steps_per_second": 0.449, + "eval_wer": 22.689799834143646, + "step": 36000 + }, + { + "epoch": 20.03, + "learning_rate": 1.009873417721519e-06, + "loss": 0.005, + "step": 36025 + }, + { + "epoch": 20.03, + "learning_rate": 1.0035443037974685e-06, + "loss": 0.0058, + "step": 36050 + }, + { + "epoch": 20.03, + "learning_rate": 9.97215189873418e-07, + "loss": 0.0068, + "step": 36075 + }, + { + "epoch": 20.03, + "learning_rate": 9.908860759493671e-07, + "loss": 0.0074, + "step": 36100 + }, + { + "epoch": 20.03, + "learning_rate": 9.845569620253166e-07, + "loss": 0.0081, + "step": 36125 + }, + { + "epoch": 20.03, + "learning_rate": 9.78227848101266e-07, + "loss": 0.0063, + "step": 36150 + }, + { + "epoch": 20.03, + "learning_rate": 9.718987341772152e-07, + "loss": 0.0085, + "step": 36175 + }, + { + "epoch": 20.03, + "learning_rate": 9.655696202531646e-07, + "loss": 0.0059, + "step": 36200 + }, + { + "epoch": 20.03, + "learning_rate": 9.592405063291138e-07, + "loss": 0.0079, + "step": 36225 + }, + { + "epoch": 20.03, + "learning_rate": 9.529113924050634e-07, + "loss": 0.0047, + "step": 36250 + }, + { + "epoch": 20.03, + "learning_rate": 9.465822784810127e-07, + "loss": 0.0058, + "step": 36275 + }, + { + "epoch": 20.03, + "learning_rate": 9.402531645569621e-07, + "loss": 0.0039, + "step": 36300 + }, + { + "epoch": 20.03, + "learning_rate": 9.339240506329115e-07, + "loss": 0.0054, + "step": 36325 + }, + { + "epoch": 20.03, + "learning_rate": 9.275949367088609e-07, + "loss": 0.0074, + "step": 36350 + }, + { + "epoch": 20.03, + "learning_rate": 9.212658227848102e-07, + "loss": 0.0063, + "step": 36375 + }, + { + "epoch": 20.03, + "learning_rate": 9.149367088607595e-07, + "loss": 0.004, + "step": 36400 + }, + { + "epoch": 20.04, + "learning_rate": 9.086075949367088e-07, + "loss": 0.0071, + "step": 36425 + }, + { + "epoch": 20.04, + "learning_rate": 9.022784810126584e-07, + "loss": 0.0075, + "step": 36450 + }, + { + "epoch": 20.04, + "learning_rate": 8.959493670886077e-07, + "loss": 0.006, + "step": 36475 + }, + { + "epoch": 20.04, + "learning_rate": 8.89873417721519e-07, + "loss": 0.0063, + "step": 36500 + }, + { + "epoch": 20.04, + "learning_rate": 8.835443037974684e-07, + "loss": 0.0042, + "step": 36525 + }, + { + "epoch": 20.04, + "learning_rate": 8.772151898734178e-07, + "loss": 0.0038, + "step": 36550 + }, + { + "epoch": 20.04, + "learning_rate": 8.708860759493671e-07, + "loss": 0.0058, + "step": 36575 + }, + { + "epoch": 20.04, + "learning_rate": 8.645569620253165e-07, + "loss": 0.0061, + "step": 36600 + }, + { + "epoch": 20.04, + "learning_rate": 8.582278481012659e-07, + "loss": 0.0046, + "step": 36625 + }, + { + "epoch": 20.04, + "learning_rate": 8.518987341772153e-07, + "loss": 0.006, + "step": 36650 + }, + { + "epoch": 20.04, + "learning_rate": 8.455696202531646e-07, + "loss": 0.0082, + "step": 36675 + }, + { + "epoch": 20.04, + "learning_rate": 8.392405063291139e-07, + "loss": 0.0074, + "step": 36700 + }, + { + "epoch": 20.04, + "learning_rate": 8.329113924050633e-07, + "loss": 0.0107, + "step": 36725 + }, + { + "epoch": 20.04, + "learning_rate": 8.265822784810128e-07, + "loss": 0.0062, + "step": 36750 + }, + { + "epoch": 21.0, + "learning_rate": 8.202531645569621e-07, + "loss": 0.0127, + "step": 36775 + }, + { + "epoch": 21.0, + "learning_rate": 8.139240506329114e-07, + "loss": 0.0061, + "step": 36800 + }, + { + "epoch": 21.0, + "learning_rate": 8.075949367088608e-07, + "loss": 0.005, + "step": 36825 + }, + { + "epoch": 21.0, + "learning_rate": 8.012658227848103e-07, + "loss": 0.0071, + "step": 36850 + }, + { + "epoch": 21.0, + "learning_rate": 7.949367088607596e-07, + "loss": 0.008, + "step": 36875 + }, + { + "epoch": 21.0, + "learning_rate": 7.886075949367089e-07, + "loss": 0.0091, + "step": 36900 + }, + { + "epoch": 21.0, + "learning_rate": 7.822784810126583e-07, + "loss": 0.008, + "step": 36925 + }, + { + "epoch": 21.0, + "learning_rate": 7.759493670886077e-07, + "loss": 0.006, + "step": 36950 + }, + { + "epoch": 21.01, + "learning_rate": 7.69873417721519e-07, + "loss": 0.0083, + "step": 36975 + }, + { + "epoch": 21.01, + "learning_rate": 7.635443037974683e-07, + "loss": 0.0078, + "step": 37000 + }, + { + "epoch": 21.01, + "eval_loss": 0.2638327479362488, + "eval_runtime": 1492.7276, + "eval_samples_per_second": 6.974, + "eval_steps_per_second": 0.436, + "eval_wer": 23.06053756971658, + "step": 37000 + }, + { + "epoch": 21.01, + "learning_rate": 7.572151898734177e-07, + "loss": 0.0122, + "step": 37025 + }, + { + "epoch": 21.01, + "learning_rate": 7.508860759493672e-07, + "loss": 0.0093, + "step": 37050 + }, + { + "epoch": 21.01, + "learning_rate": 7.445569620253165e-07, + "loss": 0.0061, + "step": 37075 + }, + { + "epoch": 21.01, + "learning_rate": 7.382278481012658e-07, + "loss": 0.0063, + "step": 37100 + }, + { + "epoch": 21.01, + "learning_rate": 7.318987341772152e-07, + "loss": 0.011, + "step": 37125 + }, + { + "epoch": 21.01, + "learning_rate": 7.255696202531647e-07, + "loss": 0.0075, + "step": 37150 + }, + { + "epoch": 21.01, + "learning_rate": 7.19240506329114e-07, + "loss": 0.0056, + "step": 37175 + }, + { + "epoch": 21.01, + "learning_rate": 7.129113924050633e-07, + "loss": 0.0044, + "step": 37200 + }, + { + "epoch": 21.01, + "learning_rate": 7.065822784810127e-07, + "loss": 0.0068, + "step": 37225 + }, + { + "epoch": 21.01, + "learning_rate": 7.002531645569621e-07, + "loss": 0.0114, + "step": 37250 + }, + { + "epoch": 21.01, + "learning_rate": 6.939240506329114e-07, + "loss": 0.0104, + "step": 37275 + }, + { + "epoch": 21.01, + "learning_rate": 6.875949367088608e-07, + "loss": 0.0098, + "step": 37300 + }, + { + "epoch": 21.01, + "learning_rate": 6.812658227848102e-07, + "loss": 0.0078, + "step": 37325 + }, + { + "epoch": 21.01, + "learning_rate": 6.749367088607596e-07, + "loss": 0.0075, + "step": 37350 + }, + { + "epoch": 21.02, + "learning_rate": 6.686075949367089e-07, + "loss": 0.0081, + "step": 37375 + }, + { + "epoch": 21.02, + "learning_rate": 6.622784810126582e-07, + "loss": 0.0039, + "step": 37400 + }, + { + "epoch": 21.02, + "learning_rate": 6.559493670886076e-07, + "loss": 0.0081, + "step": 37425 + }, + { + "epoch": 21.02, + "learning_rate": 6.496202531645571e-07, + "loss": 0.0066, + "step": 37450 + }, + { + "epoch": 21.02, + "learning_rate": 6.432911392405064e-07, + "loss": 0.0076, + "step": 37475 + }, + { + "epoch": 21.02, + "learning_rate": 6.369620253164557e-07, + "loss": 0.0067, + "step": 37500 + }, + { + "epoch": 21.02, + "learning_rate": 6.306329113924051e-07, + "loss": 0.0081, + "step": 37525 + }, + { + "epoch": 21.02, + "learning_rate": 6.243037974683545e-07, + "loss": 0.0059, + "step": 37550 + }, + { + "epoch": 21.02, + "learning_rate": 6.179746835443039e-07, + "loss": 0.0072, + "step": 37575 + }, + { + "epoch": 21.02, + "learning_rate": 6.116455696202532e-07, + "loss": 0.0095, + "step": 37600 + }, + { + "epoch": 21.02, + "learning_rate": 6.053164556962026e-07, + "loss": 0.0078, + "step": 37625 + }, + { + "epoch": 21.02, + "learning_rate": 5.98987341772152e-07, + "loss": 0.0072, + "step": 37650 + }, + { + "epoch": 21.02, + "learning_rate": 5.926582278481013e-07, + "loss": 0.0057, + "step": 37675 + }, + { + "epoch": 21.02, + "learning_rate": 5.863291139240506e-07, + "loss": 0.0064, + "step": 37700 + }, + { + "epoch": 21.02, + "learning_rate": 5.800000000000001e-07, + "loss": 0.0069, + "step": 37725 + }, + { + "epoch": 21.02, + "learning_rate": 5.736708860759494e-07, + "loss": 0.0063, + "step": 37750 + }, + { + "epoch": 21.03, + "learning_rate": 5.673417721518988e-07, + "loss": 0.0073, + "step": 37775 + }, + { + "epoch": 21.03, + "learning_rate": 5.610126582278481e-07, + "loss": 0.0058, + "step": 37800 + }, + { + "epoch": 21.03, + "learning_rate": 5.546835443037976e-07, + "loss": 0.0056, + "step": 37825 + }, + { + "epoch": 21.03, + "learning_rate": 5.483544303797469e-07, + "loss": 0.0061, + "step": 37850 + }, + { + "epoch": 21.03, + "learning_rate": 5.420253164556962e-07, + "loss": 0.0039, + "step": 37875 + }, + { + "epoch": 21.03, + "learning_rate": 5.356962025316456e-07, + "loss": 0.0082, + "step": 37900 + }, + { + "epoch": 21.03, + "learning_rate": 5.29367088607595e-07, + "loss": 0.0067, + "step": 37925 + }, + { + "epoch": 21.03, + "learning_rate": 5.230379746835444e-07, + "loss": 0.0096, + "step": 37950 + }, + { + "epoch": 21.03, + "learning_rate": 5.167088607594937e-07, + "loss": 0.004, + "step": 37975 + }, + { + "epoch": 21.03, + "learning_rate": 5.103797468354431e-07, + "loss": 0.0055, + "step": 38000 + }, + { + "epoch": 21.03, + "eval_loss": 0.2642187774181366, + "eval_runtime": 1455.8371, + "eval_samples_per_second": 7.151, + "eval_steps_per_second": 0.447, + "eval_wer": 22.437763215662045, + "step": 38000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.753993204604928e+20, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-38000/training_args.bin b/checkpoint-38000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-38000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-39000/config.json b/checkpoint-39000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-39000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-39000/generation_config.json b/checkpoint-39000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-39000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-39000/optimizer.pt b/checkpoint-39000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb1f4dfd79d75603eb5ddd8d01dbb1d38a4863a3 --- /dev/null +++ b/checkpoint-39000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce473fbee11060a4848aa0bda951d9322f6fc5dfa057beab4abb91b34b038983 +size 1934161093 diff --git a/checkpoint-39000/preprocessor_config.json b/checkpoint-39000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-39000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-39000/pytorch_model.bin b/checkpoint-39000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8b9f64e2b09e0787b0b61b5abd033aecd71f2d71 --- /dev/null +++ b/checkpoint-39000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b92a1f2198edcf4d6313ae1a087e389c2baf2d2adc1afe5ad1db350f8d91475a +size 967102601 diff --git a/checkpoint-39000/rng_state.pth b/checkpoint-39000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..603d2cea8a654597be40628ccb8d742e0c0b2ba9 --- /dev/null +++ b/checkpoint-39000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ae006fe40747a817312454632309da54463221068d8ce1d8ffd7e362baaa5b6 +size 14575 diff --git a/checkpoint-39000/scaler.pt b/checkpoint-39000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8950d3e34e383c81b533709c67ae695cd17c1b3 --- /dev/null +++ b/checkpoint-39000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a8a7369e856114f7bcc0569f9dbdb364d872104983e7e59b9734956939b8ebd +size 557 diff --git a/checkpoint-39000/scheduler.pt b/checkpoint-39000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b7abeed68cc96c107ca846be517adb24ccc09c18 --- /dev/null +++ b/checkpoint-39000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92569053834c8dcd9e7a40f1b82b7246ef4467902dabaf4beec691321c8a1afe +size 627 diff --git a/checkpoint-39000/trainer_state.json b/checkpoint-39000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c942677bd1358046567249c04b2a8bd5a694a9f3 --- /dev/null +++ b/checkpoint-39000/trainer_state.json @@ -0,0 +1,9727 @@ +{ + "best_metric": 22.437763215662045, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-38000", + "epoch": 22.01195, + "global_step": 39000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + }, + { + "epoch": 7.02, + "learning_rate": 6.830632911392405e-06, + "loss": 0.0159, + "step": 13025 + }, + { + "epoch": 7.02, + "learning_rate": 6.824303797468354e-06, + "loss": 0.0186, + "step": 13050 + }, + { + "epoch": 7.02, + "learning_rate": 6.817974683544304e-06, + "loss": 0.0246, + "step": 13075 + }, + { + "epoch": 7.02, + "learning_rate": 6.811645569620253e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 7.02, + "learning_rate": 6.805316455696203e-06, + "loss": 0.0287, + "step": 13125 + }, + { + "epoch": 7.02, + "learning_rate": 6.798987341772152e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 7.02, + "learning_rate": 6.792658227848102e-06, + "loss": 0.035, + "step": 13175 + }, + { + "epoch": 7.02, + "learning_rate": 6.786329113924052e-06, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 7.02, + "learning_rate": 6.780000000000001e-06, + "loss": 0.0283, + "step": 13225 + }, + { + "epoch": 7.02, + "learning_rate": 6.773670886075951e-06, + "loss": 0.0203, + "step": 13250 + }, + { + "epoch": 7.03, + "learning_rate": 6.7673417721519e-06, + "loss": 0.0193, + "step": 13275 + }, + { + "epoch": 7.03, + "learning_rate": 6.7610126582278494e-06, + "loss": 0.0174, + "step": 13300 + }, + { + "epoch": 7.03, + "learning_rate": 6.754683544303798e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 7.03, + "learning_rate": 6.7483544303797474e-06, + "loss": 0.023, + "step": 13350 + }, + { + "epoch": 7.03, + "learning_rate": 6.742025316455697e-06, + "loss": 0.0266, + "step": 13375 + }, + { + "epoch": 7.03, + "learning_rate": 6.735696202531646e-06, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 7.03, + "learning_rate": 6.729367088607596e-06, + "loss": 0.0164, + "step": 13425 + }, + { + "epoch": 7.03, + "learning_rate": 6.723037974683545e-06, + "loss": 0.019, + "step": 13450 + }, + { + "epoch": 7.03, + "learning_rate": 6.716708860759495e-06, + "loss": 0.0209, + "step": 13475 + }, + { + "epoch": 7.03, + "learning_rate": 6.710379746835444e-06, + "loss": 0.0241, + "step": 13500 + }, + { + "epoch": 7.03, + "learning_rate": 6.7040506329113934e-06, + "loss": 0.0233, + "step": 13525 + }, + { + "epoch": 7.03, + "learning_rate": 6.697721518987342e-06, + "loss": 0.0204, + "step": 13550 + }, + { + "epoch": 7.03, + "learning_rate": 6.6913924050632914e-06, + "loss": 0.0131, + "step": 13575 + }, + { + "epoch": 7.03, + "learning_rate": 6.685063291139241e-06, + "loss": 0.0268, + "step": 13600 + }, + { + "epoch": 7.03, + "learning_rate": 6.67873417721519e-06, + "loss": 0.0156, + "step": 13625 + }, + { + "epoch": 7.03, + "learning_rate": 6.67240506329114e-06, + "loss": 0.0205, + "step": 13650 + }, + { + "epoch": 7.04, + "learning_rate": 6.666075949367089e-06, + "loss": 0.0217, + "step": 13675 + }, + { + "epoch": 7.04, + "learning_rate": 6.659746835443039e-06, + "loss": 0.0258, + "step": 13700 + }, + { + "epoch": 7.04, + "learning_rate": 6.653417721518988e-06, + "loss": 0.0211, + "step": 13725 + }, + { + "epoch": 7.04, + "learning_rate": 6.647088607594937e-06, + "loss": 0.0188, + "step": 13750 + }, + { + "epoch": 7.04, + "learning_rate": 6.640759493670886e-06, + "loss": 0.0234, + "step": 13775 + }, + { + "epoch": 7.04, + "learning_rate": 6.6344303797468355e-06, + "loss": 0.0202, + "step": 13800 + }, + { + "epoch": 7.04, + "learning_rate": 6.628101265822785e-06, + "loss": 0.0251, + "step": 13825 + }, + { + "epoch": 7.04, + "learning_rate": 6.621772151898734e-06, + "loss": 0.017, + "step": 13850 + }, + { + "epoch": 7.04, + "learning_rate": 6.615443037974684e-06, + "loss": 0.0254, + "step": 13875 + }, + { + "epoch": 7.04, + "learning_rate": 6.609113924050633e-06, + "loss": 0.0289, + "step": 13900 + }, + { + "epoch": 7.04, + "learning_rate": 6.602784810126583e-06, + "loss": 0.0238, + "step": 13925 + }, + { + "epoch": 7.04, + "learning_rate": 6.596455696202532e-06, + "loss": 0.0218, + "step": 13950 + }, + { + "epoch": 7.04, + "learning_rate": 6.590126582278481e-06, + "loss": 0.0289, + "step": 13975 + }, + { + "epoch": 7.04, + "learning_rate": 6.583797468354432e-06, + "loss": 0.0271, + "step": 14000 + }, + { + "epoch": 7.04, + "eval_loss": 0.26684999465942383, + "eval_runtime": 1448.0228, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "eval_wer": 42.464105107400115, + "step": 14000 + }, + { + "epoch": 8.0, + "learning_rate": 6.577468354430381e-06, + "loss": 0.0292, + "step": 14025 + }, + { + "epoch": 8.0, + "learning_rate": 6.57113924050633e-06, + "loss": 0.0192, + "step": 14050 + }, + { + "epoch": 8.0, + "learning_rate": 6.564810126582279e-06, + "loss": 0.027, + "step": 14075 + }, + { + "epoch": 8.0, + "learning_rate": 6.558481012658229e-06, + "loss": 0.03, + "step": 14100 + }, + { + "epoch": 8.0, + "learning_rate": 6.552151898734178e-06, + "loss": 0.0375, + "step": 14125 + }, + { + "epoch": 8.0, + "learning_rate": 6.5458227848101274e-06, + "loss": 0.0338, + "step": 14150 + }, + { + "epoch": 8.0, + "learning_rate": 6.539493670886077e-06, + "loss": 0.0305, + "step": 14175 + }, + { + "epoch": 8.0, + "learning_rate": 6.533164556962026e-06, + "loss": 0.0266, + "step": 14200 + }, + { + "epoch": 8.01, + "learning_rate": 6.526835443037976e-06, + "loss": 0.0378, + "step": 14225 + }, + { + "epoch": 8.01, + "learning_rate": 6.520506329113925e-06, + "loss": 0.0256, + "step": 14250 + }, + { + "epoch": 8.01, + "learning_rate": 6.514177215189874e-06, + "loss": 0.0203, + "step": 14275 + }, + { + "epoch": 8.01, + "learning_rate": 6.507848101265823e-06, + "loss": 0.0212, + "step": 14300 + }, + { + "epoch": 8.01, + "learning_rate": 6.501518987341773e-06, + "loss": 0.0204, + "step": 14325 + }, + { + "epoch": 8.01, + "learning_rate": 6.495189873417722e-06, + "loss": 0.0259, + "step": 14350 + }, + { + "epoch": 8.01, + "learning_rate": 6.4888607594936714e-06, + "loss": 0.0214, + "step": 14375 + }, + { + "epoch": 8.01, + "learning_rate": 6.482531645569621e-06, + "loss": 0.021, + "step": 14400 + }, + { + "epoch": 8.01, + "learning_rate": 6.47620253164557e-06, + "loss": 0.0241, + "step": 14425 + }, + { + "epoch": 8.01, + "learning_rate": 6.46987341772152e-06, + "loss": 0.0216, + "step": 14450 + }, + { + "epoch": 8.01, + "learning_rate": 6.463544303797469e-06, + "loss": 0.0209, + "step": 14475 + }, + { + "epoch": 8.01, + "learning_rate": 6.457215189873418e-06, + "loss": 0.0292, + "step": 14500 + }, + { + "epoch": 8.01, + "learning_rate": 6.450886075949367e-06, + "loss": 0.0286, + "step": 14525 + }, + { + "epoch": 8.01, + "learning_rate": 6.444556962025317e-06, + "loss": 0.0218, + "step": 14550 + }, + { + "epoch": 8.01, + "learning_rate": 6.438227848101266e-06, + "loss": 0.0209, + "step": 14575 + }, + { + "epoch": 8.01, + "learning_rate": 6.4318987341772154e-06, + "loss": 0.0244, + "step": 14600 + }, + { + "epoch": 8.02, + "learning_rate": 6.425569620253165e-06, + "loss": 0.0212, + "step": 14625 + }, + { + "epoch": 8.02, + "learning_rate": 6.419240506329114e-06, + "loss": 0.0155, + "step": 14650 + }, + { + "epoch": 8.02, + "learning_rate": 6.412911392405064e-06, + "loss": 0.0209, + "step": 14675 + }, + { + "epoch": 8.02, + "learning_rate": 6.406582278481013e-06, + "loss": 0.0203, + "step": 14700 + }, + { + "epoch": 8.02, + "learning_rate": 6.400253164556962e-06, + "loss": 0.0192, + "step": 14725 + }, + { + "epoch": 8.02, + "learning_rate": 6.393924050632911e-06, + "loss": 0.0216, + "step": 14750 + }, + { + "epoch": 8.02, + "learning_rate": 6.387594936708861e-06, + "loss": 0.012, + "step": 14775 + }, + { + "epoch": 8.02, + "learning_rate": 6.38126582278481e-06, + "loss": 0.0241, + "step": 14800 + }, + { + "epoch": 8.02, + "learning_rate": 6.37493670886076e-06, + "loss": 0.0192, + "step": 14825 + }, + { + "epoch": 8.02, + "learning_rate": 6.36860759493671e-06, + "loss": 0.0202, + "step": 14850 + }, + { + "epoch": 8.02, + "learning_rate": 6.362278481012659e-06, + "loss": 0.0257, + "step": 14875 + }, + { + "epoch": 8.02, + "learning_rate": 6.3559493670886086e-06, + "loss": 0.0218, + "step": 14900 + }, + { + "epoch": 8.02, + "learning_rate": 6.349620253164558e-06, + "loss": 0.0232, + "step": 14925 + }, + { + "epoch": 8.02, + "learning_rate": 6.3432911392405074e-06, + "loss": 0.03, + "step": 14950 + }, + { + "epoch": 8.02, + "learning_rate": 6.337215189873418e-06, + "loss": 0.0252, + "step": 14975 + }, + { + "epoch": 8.02, + "learning_rate": 6.3308860759493675e-06, + "loss": 0.0176, + "step": 15000 + }, + { + "epoch": 8.02, + "eval_loss": 0.2596448063850403, + "eval_runtime": 1417.6481, + "eval_samples_per_second": 7.344, + "eval_steps_per_second": 0.459, + "eval_wer": 28.52241499861786, + "step": 15000 + }, + { + "epoch": 8.03, + "learning_rate": 6.324556962025317e-06, + "loss": 0.0166, + "step": 15025 + }, + { + "epoch": 8.03, + "learning_rate": 6.318227848101266e-06, + "loss": 0.017, + "step": 15050 + }, + { + "epoch": 8.03, + "learning_rate": 6.311898734177216e-06, + "loss": 0.0291, + "step": 15075 + }, + { + "epoch": 8.03, + "learning_rate": 6.305569620253164e-06, + "loss": 0.0149, + "step": 15100 + }, + { + "epoch": 8.03, + "learning_rate": 6.299240506329114e-06, + "loss": 0.0223, + "step": 15125 + }, + { + "epoch": 8.03, + "learning_rate": 6.292911392405063e-06, + "loss": 0.0212, + "step": 15150 + }, + { + "epoch": 8.03, + "learning_rate": 6.286582278481013e-06, + "loss": 0.0125, + "step": 15175 + }, + { + "epoch": 8.03, + "learning_rate": 6.280253164556962e-06, + "loss": 0.0169, + "step": 15200 + }, + { + "epoch": 8.03, + "learning_rate": 6.273924050632912e-06, + "loss": 0.0244, + "step": 15225 + }, + { + "epoch": 8.03, + "learning_rate": 6.267594936708862e-06, + "loss": 0.015, + "step": 15250 + }, + { + "epoch": 8.03, + "learning_rate": 6.261265822784811e-06, + "loss": 0.0171, + "step": 15275 + }, + { + "epoch": 8.03, + "learning_rate": 6.254936708860761e-06, + "loss": 0.017, + "step": 15300 + }, + { + "epoch": 8.03, + "learning_rate": 6.24860759493671e-06, + "loss": 0.0162, + "step": 15325 + }, + { + "epoch": 8.03, + "learning_rate": 6.2422784810126594e-06, + "loss": 0.0133, + "step": 15350 + }, + { + "epoch": 8.03, + "learning_rate": 6.235949367088608e-06, + "loss": 0.0139, + "step": 15375 + }, + { + "epoch": 8.03, + "learning_rate": 6.2296202531645575e-06, + "loss": 0.0177, + "step": 15400 + }, + { + "epoch": 8.04, + "learning_rate": 6.223291139240507e-06, + "loss": 0.0207, + "step": 15425 + }, + { + "epoch": 8.04, + "learning_rate": 6.216962025316456e-06, + "loss": 0.0192, + "step": 15450 + }, + { + "epoch": 8.04, + "learning_rate": 6.210632911392406e-06, + "loss": 0.0176, + "step": 15475 + }, + { + "epoch": 8.04, + "learning_rate": 6.204303797468355e-06, + "loss": 0.0133, + "step": 15500 + }, + { + "epoch": 8.04, + "learning_rate": 6.197974683544305e-06, + "loss": 0.0199, + "step": 15525 + }, + { + "epoch": 8.04, + "learning_rate": 6.191645569620254e-06, + "loss": 0.0135, + "step": 15550 + }, + { + "epoch": 8.04, + "learning_rate": 6.1853164556962035e-06, + "loss": 0.0121, + "step": 15575 + }, + { + "epoch": 8.04, + "learning_rate": 6.178987341772152e-06, + "loss": 0.0152, + "step": 15600 + }, + { + "epoch": 8.04, + "learning_rate": 6.1726582278481015e-06, + "loss": 0.0201, + "step": 15625 + }, + { + "epoch": 8.04, + "learning_rate": 6.166329113924051e-06, + "loss": 0.0226, + "step": 15650 + }, + { + "epoch": 8.04, + "learning_rate": 6.16e-06, + "loss": 0.0222, + "step": 15675 + }, + { + "epoch": 8.04, + "learning_rate": 6.15367088607595e-06, + "loss": 0.0184, + "step": 15700 + }, + { + "epoch": 8.04, + "learning_rate": 6.147341772151899e-06, + "loss": 0.0219, + "step": 15725 + }, + { + "epoch": 8.04, + "learning_rate": 6.141012658227849e-06, + "loss": 0.0219, + "step": 15750 + }, + { + "epoch": 9.0, + "learning_rate": 6.134683544303798e-06, + "loss": 0.0208, + "step": 15775 + }, + { + "epoch": 9.0, + "learning_rate": 6.1283544303797475e-06, + "loss": 0.0143, + "step": 15800 + }, + { + "epoch": 9.0, + "learning_rate": 6.122025316455696e-06, + "loss": 0.0254, + "step": 15825 + }, + { + "epoch": 9.0, + "learning_rate": 6.1156962025316455e-06, + "loss": 0.0287, + "step": 15850 + }, + { + "epoch": 9.0, + "learning_rate": 6.109367088607595e-06, + "loss": 0.0252, + "step": 15875 + }, + { + "epoch": 9.0, + "learning_rate": 6.103037974683544e-06, + "loss": 0.0275, + "step": 15900 + }, + { + "epoch": 9.0, + "learning_rate": 6.096708860759494e-06, + "loss": 0.0193, + "step": 15925 + }, + { + "epoch": 9.0, + "learning_rate": 6.090379746835443e-06, + "loss": 0.0255, + "step": 15950 + }, + { + "epoch": 9.01, + "learning_rate": 6.084050632911393e-06, + "loss": 0.0248, + "step": 15975 + }, + { + "epoch": 9.01, + "learning_rate": 6.077721518987342e-06, + "loss": 0.0328, + "step": 16000 + }, + { + "epoch": 9.01, + "eval_loss": 0.2512108087539673, + "eval_runtime": 1474.2883, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.442, + "eval_wer": 35.66236849379665, + "step": 16000 + }, + { + "epoch": 9.01, + "learning_rate": 6.0713924050632915e-06, + "loss": 0.0181, + "step": 16025 + }, + { + "epoch": 9.01, + "learning_rate": 6.065063291139242e-06, + "loss": 0.0134, + "step": 16050 + }, + { + "epoch": 9.01, + "learning_rate": 6.058734177215191e-06, + "loss": 0.0203, + "step": 16075 + }, + { + "epoch": 9.01, + "learning_rate": 6.052405063291141e-06, + "loss": 0.0224, + "step": 16100 + }, + { + "epoch": 9.01, + "learning_rate": 6.046075949367089e-06, + "loss": 0.0164, + "step": 16125 + }, + { + "epoch": 9.01, + "learning_rate": 6.039746835443039e-06, + "loss": 0.019, + "step": 16150 + }, + { + "epoch": 9.01, + "learning_rate": 6.033417721518988e-06, + "loss": 0.0152, + "step": 16175 + }, + { + "epoch": 9.01, + "learning_rate": 6.0270886075949374e-06, + "loss": 0.0197, + "step": 16200 + }, + { + "epoch": 9.01, + "learning_rate": 6.020759493670887e-06, + "loss": 0.0188, + "step": 16225 + }, + { + "epoch": 9.01, + "learning_rate": 6.014430379746836e-06, + "loss": 0.0245, + "step": 16250 + }, + { + "epoch": 9.01, + "learning_rate": 6.008101265822786e-06, + "loss": 0.0202, + "step": 16275 + }, + { + "epoch": 9.01, + "learning_rate": 6.001772151898735e-06, + "loss": 0.0184, + "step": 16300 + }, + { + "epoch": 9.01, + "learning_rate": 5.995443037974685e-06, + "loss": 0.0158, + "step": 16325 + }, + { + "epoch": 9.01, + "learning_rate": 5.989113924050633e-06, + "loss": 0.0175, + "step": 16350 + }, + { + "epoch": 9.02, + "learning_rate": 5.982784810126583e-06, + "loss": 0.014, + "step": 16375 + }, + { + "epoch": 9.02, + "learning_rate": 5.976455696202532e-06, + "loss": 0.0149, + "step": 16400 + }, + { + "epoch": 9.02, + "learning_rate": 5.9701265822784815e-06, + "loss": 0.0188, + "step": 16425 + }, + { + "epoch": 9.02, + "learning_rate": 5.963797468354431e-06, + "loss": 0.0215, + "step": 16450 + }, + { + "epoch": 9.02, + "learning_rate": 5.95746835443038e-06, + "loss": 0.017, + "step": 16475 + }, + { + "epoch": 9.02, + "learning_rate": 5.95113924050633e-06, + "loss": 0.0095, + "step": 16500 + }, + { + "epoch": 9.02, + "learning_rate": 5.944810126582279e-06, + "loss": 0.0112, + "step": 16525 + }, + { + "epoch": 9.02, + "learning_rate": 5.938481012658229e-06, + "loss": 0.0198, + "step": 16550 + }, + { + "epoch": 9.02, + "learning_rate": 5.932151898734177e-06, + "loss": 0.0252, + "step": 16575 + }, + { + "epoch": 9.02, + "learning_rate": 5.925822784810127e-06, + "loss": 0.0226, + "step": 16600 + }, + { + "epoch": 9.02, + "learning_rate": 5.919493670886076e-06, + "loss": 0.0167, + "step": 16625 + }, + { + "epoch": 9.02, + "learning_rate": 5.9131645569620255e-06, + "loss": 0.0259, + "step": 16650 + }, + { + "epoch": 9.02, + "learning_rate": 5.906835443037975e-06, + "loss": 0.0229, + "step": 16675 + }, + { + "epoch": 9.02, + "learning_rate": 5.900506329113924e-06, + "loss": 0.0238, + "step": 16700 + }, + { + "epoch": 9.02, + "learning_rate": 5.894177215189874e-06, + "loss": 0.02, + "step": 16725 + }, + { + "epoch": 9.02, + "learning_rate": 5.887848101265823e-06, + "loss": 0.0182, + "step": 16750 + }, + { + "epoch": 9.03, + "learning_rate": 5.881518987341773e-06, + "loss": 0.0157, + "step": 16775 + }, + { + "epoch": 9.03, + "learning_rate": 5.875189873417721e-06, + "loss": 0.0145, + "step": 16800 + }, + { + "epoch": 9.03, + "learning_rate": 5.868860759493671e-06, + "loss": 0.0166, + "step": 16825 + }, + { + "epoch": 9.03, + "learning_rate": 5.86253164556962e-06, + "loss": 0.0108, + "step": 16850 + }, + { + "epoch": 9.03, + "learning_rate": 5.85620253164557e-06, + "loss": 0.014, + "step": 16875 + }, + { + "epoch": 9.03, + "learning_rate": 5.84987341772152e-06, + "loss": 0.0167, + "step": 16900 + }, + { + "epoch": 9.03, + "learning_rate": 5.843544303797469e-06, + "loss": 0.0121, + "step": 16925 + }, + { + "epoch": 9.03, + "learning_rate": 5.837215189873419e-06, + "loss": 0.0149, + "step": 16950 + }, + { + "epoch": 9.03, + "learning_rate": 5.831139240506329e-06, + "loss": 0.0143, + "step": 16975 + }, + { + "epoch": 9.03, + "learning_rate": 5.824810126582279e-06, + "loss": 0.0139, + "step": 17000 + }, + { + "epoch": 9.03, + "eval_loss": 0.2688957154750824, + "eval_runtime": 1423.1196, + "eval_samples_per_second": 7.316, + "eval_steps_per_second": 0.457, + "eval_wer": 25.920746678807788, + "step": 17000 + }, + { + "epoch": 9.03, + "learning_rate": 5.818481012658228e-06, + "loss": 0.0169, + "step": 17025 + }, + { + "epoch": 9.03, + "learning_rate": 5.8121518987341775e-06, + "loss": 0.013, + "step": 17050 + }, + { + "epoch": 9.03, + "learning_rate": 5.805822784810127e-06, + "loss": 0.0123, + "step": 17075 + }, + { + "epoch": 9.03, + "learning_rate": 5.799493670886076e-06, + "loss": 0.0161, + "step": 17100 + }, + { + "epoch": 9.03, + "learning_rate": 5.793164556962026e-06, + "loss": 0.0114, + "step": 17125 + }, + { + "epoch": 9.03, + "learning_rate": 5.786835443037974e-06, + "loss": 0.0194, + "step": 17150 + }, + { + "epoch": 9.04, + "learning_rate": 5.780506329113924e-06, + "loss": 0.0179, + "step": 17175 + }, + { + "epoch": 9.04, + "learning_rate": 5.774177215189873e-06, + "loss": 0.0165, + "step": 17200 + }, + { + "epoch": 9.04, + "learning_rate": 5.767848101265823e-06, + "loss": 0.0165, + "step": 17225 + }, + { + "epoch": 9.04, + "learning_rate": 5.761518987341773e-06, + "loss": 0.0125, + "step": 17250 + }, + { + "epoch": 9.04, + "learning_rate": 5.755189873417722e-06, + "loss": 0.0172, + "step": 17275 + }, + { + "epoch": 9.04, + "learning_rate": 5.748860759493672e-06, + "loss": 0.0158, + "step": 17300 + }, + { + "epoch": 9.04, + "learning_rate": 5.742531645569621e-06, + "loss": 0.0174, + "step": 17325 + }, + { + "epoch": 9.04, + "learning_rate": 5.736202531645571e-06, + "loss": 0.0162, + "step": 17350 + }, + { + "epoch": 9.04, + "learning_rate": 5.72987341772152e-06, + "loss": 0.0203, + "step": 17375 + }, + { + "epoch": 9.04, + "learning_rate": 5.7235443037974695e-06, + "loss": 0.0157, + "step": 17400 + }, + { + "epoch": 9.04, + "learning_rate": 5.717215189873419e-06, + "loss": 0.018, + "step": 17425 + }, + { + "epoch": 9.04, + "learning_rate": 5.7108860759493675e-06, + "loss": 0.0199, + "step": 17450 + }, + { + "epoch": 9.04, + "learning_rate": 5.704556962025317e-06, + "loss": 0.0153, + "step": 17475 + }, + { + "epoch": 9.04, + "learning_rate": 5.698227848101266e-06, + "loss": 0.0234, + "step": 17500 + }, + { + "epoch": 10.0, + "learning_rate": 5.691898734177216e-06, + "loss": 0.0168, + "step": 17525 + }, + { + "epoch": 10.0, + "learning_rate": 5.685569620253165e-06, + "loss": 0.0214, + "step": 17550 + }, + { + "epoch": 10.0, + "learning_rate": 5.679240506329115e-06, + "loss": 0.0185, + "step": 17575 + }, + { + "epoch": 10.0, + "learning_rate": 5.672911392405064e-06, + "loss": 0.0228, + "step": 17600 + }, + { + "epoch": 10.0, + "learning_rate": 5.6665822784810135e-06, + "loss": 0.024, + "step": 17625 + }, + { + "epoch": 10.0, + "learning_rate": 5.660253164556963e-06, + "loss": 0.0235, + "step": 17650 + }, + { + "epoch": 10.0, + "learning_rate": 5.6539240506329115e-06, + "loss": 0.0262, + "step": 17675 + }, + { + "epoch": 10.0, + "learning_rate": 5.647594936708861e-06, + "loss": 0.0214, + "step": 17700 + }, + { + "epoch": 10.01, + "learning_rate": 5.64126582278481e-06, + "loss": 0.0279, + "step": 17725 + }, + { + "epoch": 10.01, + "learning_rate": 5.63493670886076e-06, + "loss": 0.0248, + "step": 17750 + }, + { + "epoch": 10.01, + "learning_rate": 5.628607594936709e-06, + "loss": 0.0155, + "step": 17775 + }, + { + "epoch": 10.01, + "learning_rate": 5.622278481012659e-06, + "loss": 0.017, + "step": 17800 + }, + { + "epoch": 10.01, + "learning_rate": 5.615949367088608e-06, + "loss": 0.0205, + "step": 17825 + }, + { + "epoch": 10.01, + "learning_rate": 5.6096202531645575e-06, + "loss": 0.021, + "step": 17850 + }, + { + "epoch": 10.01, + "learning_rate": 5.603291139240507e-06, + "loss": 0.0178, + "step": 17875 + }, + { + "epoch": 10.01, + "learning_rate": 5.5969620253164555e-06, + "loss": 0.0169, + "step": 17900 + }, + { + "epoch": 10.01, + "learning_rate": 5.590632911392405e-06, + "loss": 0.0158, + "step": 17925 + }, + { + "epoch": 10.01, + "learning_rate": 5.584303797468354e-06, + "loss": 0.0195, + "step": 17950 + }, + { + "epoch": 10.01, + "learning_rate": 5.577974683544304e-06, + "loss": 0.0195, + "step": 17975 + }, + { + "epoch": 10.01, + "learning_rate": 5.571645569620253e-06, + "loss": 0.0187, + "step": 18000 + }, + { + "epoch": 10.01, + "eval_loss": 0.2605174481868744, + "eval_runtime": 1553.7854, + "eval_samples_per_second": 6.7, + "eval_steps_per_second": 0.419, + "eval_wer": 25.330493178750874, + "step": 18000 + }, + { + "epoch": 10.01, + "learning_rate": 5.565316455696203e-06, + "loss": 0.0214, + "step": 18025 + }, + { + "epoch": 10.01, + "learning_rate": 5.558987341772152e-06, + "loss": 0.0193, + "step": 18050 + }, + { + "epoch": 10.01, + "learning_rate": 5.552658227848102e-06, + "loss": 0.0168, + "step": 18075 + }, + { + "epoch": 10.01, + "learning_rate": 5.546329113924052e-06, + "loss": 0.0147, + "step": 18100 + }, + { + "epoch": 10.02, + "learning_rate": 5.540000000000001e-06, + "loss": 0.0192, + "step": 18125 + }, + { + "epoch": 10.02, + "learning_rate": 5.533670886075951e-06, + "loss": 0.014, + "step": 18150 + }, + { + "epoch": 10.02, + "learning_rate": 5.527341772151899e-06, + "loss": 0.0163, + "step": 18175 + }, + { + "epoch": 10.02, + "learning_rate": 5.521012658227849e-06, + "loss": 0.0139, + "step": 18200 + }, + { + "epoch": 10.02, + "learning_rate": 5.514683544303798e-06, + "loss": 0.0168, + "step": 18225 + }, + { + "epoch": 10.02, + "learning_rate": 5.5083544303797475e-06, + "loss": 0.0118, + "step": 18250 + }, + { + "epoch": 10.02, + "learning_rate": 5.502025316455697e-06, + "loss": 0.0119, + "step": 18275 + }, + { + "epoch": 10.02, + "learning_rate": 5.495696202531646e-06, + "loss": 0.0136, + "step": 18300 + }, + { + "epoch": 10.02, + "learning_rate": 5.489367088607596e-06, + "loss": 0.0132, + "step": 18325 + }, + { + "epoch": 10.02, + "learning_rate": 5.483037974683545e-06, + "loss": 0.0148, + "step": 18350 + }, + { + "epoch": 10.02, + "learning_rate": 5.476708860759495e-06, + "loss": 0.015, + "step": 18375 + }, + { + "epoch": 10.02, + "learning_rate": 5.470379746835443e-06, + "loss": 0.0154, + "step": 18400 + }, + { + "epoch": 10.02, + "learning_rate": 5.464050632911393e-06, + "loss": 0.0139, + "step": 18425 + }, + { + "epoch": 10.02, + "learning_rate": 5.457721518987342e-06, + "loss": 0.0239, + "step": 18450 + }, + { + "epoch": 10.02, + "learning_rate": 5.4513924050632915e-06, + "loss": 0.0203, + "step": 18475 + }, + { + "epoch": 10.02, + "learning_rate": 5.445063291139241e-06, + "loss": 0.0199, + "step": 18500 + }, + { + "epoch": 10.03, + "learning_rate": 5.43873417721519e-06, + "loss": 0.013, + "step": 18525 + }, + { + "epoch": 10.03, + "learning_rate": 5.43240506329114e-06, + "loss": 0.0128, + "step": 18550 + }, + { + "epoch": 10.03, + "learning_rate": 5.426075949367089e-06, + "loss": 0.0161, + "step": 18575 + }, + { + "epoch": 10.03, + "learning_rate": 5.419746835443039e-06, + "loss": 0.0129, + "step": 18600 + }, + { + "epoch": 10.03, + "learning_rate": 5.413417721518987e-06, + "loss": 0.0124, + "step": 18625 + }, + { + "epoch": 10.03, + "learning_rate": 5.407088607594937e-06, + "loss": 0.0161, + "step": 18650 + }, + { + "epoch": 10.03, + "learning_rate": 5.400759493670886e-06, + "loss": 0.0104, + "step": 18675 + }, + { + "epoch": 10.03, + "learning_rate": 5.3944303797468355e-06, + "loss": 0.0109, + "step": 18700 + }, + { + "epoch": 10.03, + "learning_rate": 5.388101265822785e-06, + "loss": 0.0128, + "step": 18725 + }, + { + "epoch": 10.03, + "learning_rate": 5.381772151898734e-06, + "loss": 0.014, + "step": 18750 + }, + { + "epoch": 10.03, + "learning_rate": 5.375443037974684e-06, + "loss": 0.0159, + "step": 18775 + }, + { + "epoch": 10.03, + "learning_rate": 5.369113924050633e-06, + "loss": 0.017, + "step": 18800 + }, + { + "epoch": 10.03, + "learning_rate": 5.362784810126583e-06, + "loss": 0.012, + "step": 18825 + }, + { + "epoch": 10.03, + "learning_rate": 5.356455696202531e-06, + "loss": 0.0165, + "step": 18850 + }, + { + "epoch": 10.03, + "learning_rate": 5.350126582278481e-06, + "loss": 0.0153, + "step": 18875 + }, + { + "epoch": 10.03, + "learning_rate": 5.343797468354432e-06, + "loss": 0.0182, + "step": 18900 + }, + { + "epoch": 10.04, + "learning_rate": 5.33746835443038e-06, + "loss": 0.0194, + "step": 18925 + }, + { + "epoch": 10.04, + "learning_rate": 5.33113924050633e-06, + "loss": 0.0144, + "step": 18950 + }, + { + "epoch": 10.04, + "learning_rate": 5.324810126582279e-06, + "loss": 0.0157, + "step": 18975 + }, + { + "epoch": 10.04, + "learning_rate": 5.318481012658229e-06, + "loss": 0.0111, + "step": 19000 + }, + { + "epoch": 10.04, + "eval_loss": 0.2696678638458252, + "eval_runtime": 1463.5986, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.445, + "eval_wer": 24.79877721588969, + "step": 19000 + }, + { + "epoch": 10.04, + "learning_rate": 5.312151898734178e-06, + "loss": 0.0176, + "step": 19025 + }, + { + "epoch": 10.04, + "learning_rate": 5.3058227848101275e-06, + "loss": 0.0144, + "step": 19050 + }, + { + "epoch": 10.04, + "learning_rate": 5.299493670886077e-06, + "loss": 0.0129, + "step": 19075 + }, + { + "epoch": 10.04, + "learning_rate": 5.293164556962026e-06, + "loss": 0.0162, + "step": 19100 + }, + { + "epoch": 10.04, + "learning_rate": 5.286835443037976e-06, + "loss": 0.0128, + "step": 19125 + }, + { + "epoch": 10.04, + "learning_rate": 5.280506329113924e-06, + "loss": 0.0152, + "step": 19150 + }, + { + "epoch": 10.04, + "learning_rate": 5.274177215189874e-06, + "loss": 0.0183, + "step": 19175 + }, + { + "epoch": 10.04, + "learning_rate": 5.267848101265823e-06, + "loss": 0.0198, + "step": 19200 + }, + { + "epoch": 10.04, + "learning_rate": 5.261518987341773e-06, + "loss": 0.0179, + "step": 19225 + }, + { + "epoch": 10.04, + "learning_rate": 5.255443037974683e-06, + "loss": 0.019, + "step": 19250 + }, + { + "epoch": 11.0, + "learning_rate": 5.249113924050633e-06, + "loss": 0.0159, + "step": 19275 + }, + { + "epoch": 11.0, + "learning_rate": 5.242784810126583e-06, + "loss": 0.0156, + "step": 19300 + }, + { + "epoch": 11.0, + "learning_rate": 5.236455696202532e-06, + "loss": 0.015, + "step": 19325 + }, + { + "epoch": 11.0, + "learning_rate": 5.230126582278482e-06, + "loss": 0.0162, + "step": 19350 + }, + { + "epoch": 11.0, + "learning_rate": 5.223797468354431e-06, + "loss": 0.0262, + "step": 19375 + }, + { + "epoch": 11.0, + "learning_rate": 5.217468354430381e-06, + "loss": 0.0207, + "step": 19400 + }, + { + "epoch": 11.0, + "learning_rate": 5.21113924050633e-06, + "loss": 0.0222, + "step": 19425 + }, + { + "epoch": 11.0, + "learning_rate": 5.2048101265822795e-06, + "loss": 0.016, + "step": 19450 + }, + { + "epoch": 11.01, + "learning_rate": 5.198481012658229e-06, + "loss": 0.0176, + "step": 19475 + }, + { + "epoch": 11.01, + "learning_rate": 5.192151898734178e-06, + "loss": 0.0209, + "step": 19500 + }, + { + "epoch": 11.01, + "learning_rate": 5.185822784810127e-06, + "loss": 0.0212, + "step": 19525 + }, + { + "epoch": 11.01, + "learning_rate": 5.179493670886076e-06, + "loss": 0.0143, + "step": 19550 + }, + { + "epoch": 11.01, + "learning_rate": 5.173164556962026e-06, + "loss": 0.0224, + "step": 19575 + }, + { + "epoch": 11.01, + "learning_rate": 5.166835443037975e-06, + "loss": 0.0171, + "step": 19600 + }, + { + "epoch": 11.01, + "learning_rate": 5.160506329113925e-06, + "loss": 0.0166, + "step": 19625 + }, + { + "epoch": 11.01, + "learning_rate": 5.154177215189874e-06, + "loss": 0.0135, + "step": 19650 + }, + { + "epoch": 11.01, + "learning_rate": 5.1478481012658235e-06, + "loss": 0.0164, + "step": 19675 + }, + { + "epoch": 11.01, + "learning_rate": 5.141518987341773e-06, + "loss": 0.0154, + "step": 19700 + }, + { + "epoch": 11.01, + "learning_rate": 5.135189873417722e-06, + "loss": 0.0183, + "step": 19725 + }, + { + "epoch": 11.01, + "learning_rate": 5.128860759493671e-06, + "loss": 0.0149, + "step": 19750 + }, + { + "epoch": 11.01, + "learning_rate": 5.12253164556962e-06, + "loss": 0.0176, + "step": 19775 + }, + { + "epoch": 11.01, + "learning_rate": 5.11620253164557e-06, + "loss": 0.0135, + "step": 19800 + }, + { + "epoch": 11.01, + "learning_rate": 5.109873417721519e-06, + "loss": 0.0121, + "step": 19825 + }, + { + "epoch": 11.01, + "learning_rate": 5.103544303797469e-06, + "loss": 0.0158, + "step": 19850 + }, + { + "epoch": 11.02, + "learning_rate": 5.097215189873418e-06, + "loss": 0.0191, + "step": 19875 + }, + { + "epoch": 11.02, + "learning_rate": 5.0908860759493675e-06, + "loss": 0.0127, + "step": 19900 + }, + { + "epoch": 11.02, + "learning_rate": 5.084556962025317e-06, + "loss": 0.0149, + "step": 19925 + }, + { + "epoch": 11.02, + "learning_rate": 5.0782278481012655e-06, + "loss": 0.0152, + "step": 19950 + }, + { + "epoch": 11.02, + "learning_rate": 5.071898734177215e-06, + "loss": 0.0139, + "step": 19975 + }, + { + "epoch": 11.02, + "learning_rate": 5.065569620253164e-06, + "loss": 0.015, + "step": 20000 + }, + { + "epoch": 11.02, + "eval_loss": 0.2623814344406128, + "eval_runtime": 1462.1621, + "eval_samples_per_second": 7.12, + "eval_steps_per_second": 0.445, + "eval_wer": 24.392266540919362, + "step": 20000 + }, + { + "epoch": 11.02, + "learning_rate": 5.059240506329114e-06, + "loss": 0.0094, + "step": 20025 + }, + { + "epoch": 11.02, + "learning_rate": 5.052911392405063e-06, + "loss": 0.0108, + "step": 20050 + }, + { + "epoch": 11.02, + "learning_rate": 5.046582278481013e-06, + "loss": 0.0155, + "step": 20075 + }, + { + "epoch": 11.02, + "learning_rate": 5.040253164556962e-06, + "loss": 0.0103, + "step": 20100 + }, + { + "epoch": 11.02, + "learning_rate": 5.033924050632912e-06, + "loss": 0.0109, + "step": 20125 + }, + { + "epoch": 11.02, + "learning_rate": 5.027594936708862e-06, + "loss": 0.0188, + "step": 20150 + }, + { + "epoch": 11.02, + "learning_rate": 5.021265822784811e-06, + "loss": 0.0178, + "step": 20175 + }, + { + "epoch": 11.02, + "learning_rate": 5.014936708860761e-06, + "loss": 0.0262, + "step": 20200 + }, + { + "epoch": 11.02, + "learning_rate": 5.00860759493671e-06, + "loss": 0.0194, + "step": 20225 + }, + { + "epoch": 11.02, + "learning_rate": 5.002278481012659e-06, + "loss": 0.0107, + "step": 20250 + }, + { + "epoch": 11.03, + "learning_rate": 4.995949367088608e-06, + "loss": 0.0147, + "step": 20275 + }, + { + "epoch": 11.03, + "learning_rate": 4.9896202531645575e-06, + "loss": 0.0084, + "step": 20300 + }, + { + "epoch": 11.03, + "learning_rate": 4.983291139240507e-06, + "loss": 0.0214, + "step": 20325 + }, + { + "epoch": 11.03, + "learning_rate": 4.976962025316456e-06, + "loss": 0.014, + "step": 20350 + }, + { + "epoch": 11.03, + "learning_rate": 4.970632911392406e-06, + "loss": 0.0129, + "step": 20375 + }, + { + "epoch": 11.03, + "learning_rate": 4.964303797468355e-06, + "loss": 0.0145, + "step": 20400 + }, + { + "epoch": 11.03, + "learning_rate": 4.957974683544305e-06, + "loss": 0.0109, + "step": 20425 + }, + { + "epoch": 11.03, + "learning_rate": 4.951645569620254e-06, + "loss": 0.0117, + "step": 20450 + }, + { + "epoch": 11.03, + "learning_rate": 4.945316455696203e-06, + "loss": 0.0137, + "step": 20475 + }, + { + "epoch": 11.03, + "learning_rate": 4.938987341772152e-06, + "loss": 0.0086, + "step": 20500 + }, + { + "epoch": 11.03, + "learning_rate": 4.9326582278481015e-06, + "loss": 0.0128, + "step": 20525 + }, + { + "epoch": 11.03, + "learning_rate": 4.926329113924051e-06, + "loss": 0.0101, + "step": 20550 + }, + { + "epoch": 11.03, + "learning_rate": 4.92e-06, + "loss": 0.0132, + "step": 20575 + }, + { + "epoch": 11.03, + "learning_rate": 4.91367088607595e-06, + "loss": 0.0147, + "step": 20600 + }, + { + "epoch": 11.03, + "learning_rate": 4.907341772151899e-06, + "loss": 0.0077, + "step": 20625 + }, + { + "epoch": 11.03, + "learning_rate": 4.901012658227849e-06, + "loss": 0.0125, + "step": 20650 + }, + { + "epoch": 11.04, + "learning_rate": 4.894683544303798e-06, + "loss": 0.0115, + "step": 20675 + }, + { + "epoch": 11.04, + "learning_rate": 4.888354430379747e-06, + "loss": 0.0136, + "step": 20700 + }, + { + "epoch": 11.04, + "learning_rate": 4.882025316455696e-06, + "loss": 0.0098, + "step": 20725 + }, + { + "epoch": 11.04, + "learning_rate": 4.875696202531646e-06, + "loss": 0.0154, + "step": 20750 + }, + { + "epoch": 11.04, + "learning_rate": 4.869367088607596e-06, + "loss": 0.0126, + "step": 20775 + }, + { + "epoch": 11.04, + "learning_rate": 4.863037974683545e-06, + "loss": 0.0116, + "step": 20800 + }, + { + "epoch": 11.04, + "learning_rate": 4.856708860759495e-06, + "loss": 0.0133, + "step": 20825 + }, + { + "epoch": 11.04, + "learning_rate": 4.850379746835443e-06, + "loss": 0.0106, + "step": 20850 + }, + { + "epoch": 11.04, + "learning_rate": 4.844050632911393e-06, + "loss": 0.0151, + "step": 20875 + }, + { + "epoch": 11.04, + "learning_rate": 4.837721518987342e-06, + "loss": 0.0148, + "step": 20900 + }, + { + "epoch": 11.04, + "learning_rate": 4.8313924050632915e-06, + "loss": 0.0173, + "step": 20925 + }, + { + "epoch": 11.04, + "learning_rate": 4.825063291139241e-06, + "loss": 0.0126, + "step": 20950 + }, + { + "epoch": 11.04, + "learning_rate": 4.81873417721519e-06, + "loss": 0.0163, + "step": 20975 + }, + { + "epoch": 11.04, + "learning_rate": 4.81240506329114e-06, + "loss": 0.0203, + "step": 21000 + }, + { + "epoch": 11.04, + "eval_loss": 0.27788785099983215, + "eval_runtime": 1546.852, + "eval_samples_per_second": 6.73, + "eval_steps_per_second": 0.421, + "eval_wer": 25.284963983154196, + "step": 21000 + }, + { + "epoch": 12.0, + "learning_rate": 4.806075949367089e-06, + "loss": 0.0176, + "step": 21025 + }, + { + "epoch": 12.0, + "learning_rate": 4.799746835443039e-06, + "loss": 0.0157, + "step": 21050 + }, + { + "epoch": 12.0, + "learning_rate": 4.793417721518987e-06, + "loss": 0.0192, + "step": 21075 + }, + { + "epoch": 12.0, + "learning_rate": 4.787088607594937e-06, + "loss": 0.0124, + "step": 21100 + }, + { + "epoch": 12.0, + "learning_rate": 4.780759493670886e-06, + "loss": 0.0194, + "step": 21125 + }, + { + "epoch": 12.0, + "learning_rate": 4.774430379746836e-06, + "loss": 0.0138, + "step": 21150 + }, + { + "epoch": 12.0, + "learning_rate": 4.768101265822786e-06, + "loss": 0.0191, + "step": 21175 + }, + { + "epoch": 12.0, + "learning_rate": 4.761772151898735e-06, + "loss": 0.0124, + "step": 21200 + }, + { + "epoch": 12.01, + "learning_rate": 4.755443037974684e-06, + "loss": 0.0175, + "step": 21225 + }, + { + "epoch": 12.01, + "learning_rate": 4.749113924050633e-06, + "loss": 0.0118, + "step": 21250 + }, + { + "epoch": 12.01, + "learning_rate": 4.742784810126583e-06, + "loss": 0.0172, + "step": 21275 + }, + { + "epoch": 12.01, + "learning_rate": 4.736455696202532e-06, + "loss": 0.0136, + "step": 21300 + }, + { + "epoch": 12.01, + "learning_rate": 4.7301265822784815e-06, + "loss": 0.018, + "step": 21325 + }, + { + "epoch": 12.01, + "learning_rate": 4.723797468354431e-06, + "loss": 0.0184, + "step": 21350 + }, + { + "epoch": 12.01, + "learning_rate": 4.71746835443038e-06, + "loss": 0.0166, + "step": 21375 + }, + { + "epoch": 12.01, + "learning_rate": 4.71113924050633e-06, + "loss": 0.0107, + "step": 21400 + }, + { + "epoch": 12.01, + "learning_rate": 4.704810126582279e-06, + "loss": 0.0125, + "step": 21425 + }, + { + "epoch": 12.01, + "learning_rate": 4.698481012658228e-06, + "loss": 0.0098, + "step": 21450 + }, + { + "epoch": 12.01, + "learning_rate": 4.692151898734177e-06, + "loss": 0.0103, + "step": 21475 + }, + { + "epoch": 12.01, + "learning_rate": 4.685822784810127e-06, + "loss": 0.0211, + "step": 21500 + }, + { + "epoch": 12.01, + "learning_rate": 4.679493670886076e-06, + "loss": 0.0125, + "step": 21525 + }, + { + "epoch": 12.01, + "learning_rate": 4.6731645569620255e-06, + "loss": 0.0083, + "step": 21550 + }, + { + "epoch": 12.01, + "learning_rate": 4.666835443037975e-06, + "loss": 0.0156, + "step": 21575 + }, + { + "epoch": 12.01, + "learning_rate": 4.660506329113924e-06, + "loss": 0.0137, + "step": 21600 + }, + { + "epoch": 12.02, + "learning_rate": 4.654177215189874e-06, + "loss": 0.0137, + "step": 21625 + }, + { + "epoch": 12.02, + "learning_rate": 4.647848101265823e-06, + "loss": 0.0092, + "step": 21650 + }, + { + "epoch": 12.02, + "learning_rate": 4.641518987341773e-06, + "loss": 0.0101, + "step": 21675 + }, + { + "epoch": 12.02, + "learning_rate": 4.635189873417722e-06, + "loss": 0.0118, + "step": 21700 + }, + { + "epoch": 12.02, + "learning_rate": 4.6288607594936715e-06, + "loss": 0.0119, + "step": 21725 + }, + { + "epoch": 12.02, + "learning_rate": 4.622531645569621e-06, + "loss": 0.009, + "step": 21750 + }, + { + "epoch": 12.02, + "learning_rate": 4.61620253164557e-06, + "loss": 0.014, + "step": 21775 + }, + { + "epoch": 12.02, + "learning_rate": 4.609873417721519e-06, + "loss": 0.0108, + "step": 21800 + }, + { + "epoch": 12.02, + "learning_rate": 4.603544303797468e-06, + "loss": 0.0117, + "step": 21825 + }, + { + "epoch": 12.02, + "learning_rate": 4.597215189873418e-06, + "loss": 0.0147, + "step": 21850 + }, + { + "epoch": 12.02, + "learning_rate": 4.590886075949367e-06, + "loss": 0.0133, + "step": 21875 + }, + { + "epoch": 12.02, + "learning_rate": 4.584556962025317e-06, + "loss": 0.0131, + "step": 21900 + }, + { + "epoch": 12.02, + "learning_rate": 4.578227848101266e-06, + "loss": 0.0156, + "step": 21925 + }, + { + "epoch": 12.02, + "learning_rate": 4.5718987341772155e-06, + "loss": 0.0211, + "step": 21950 + }, + { + "epoch": 12.02, + "learning_rate": 4.565569620253165e-06, + "loss": 0.0127, + "step": 21975 + }, + { + "epoch": 12.02, + "learning_rate": 4.559240506329114e-06, + "loss": 0.011, + "step": 22000 + }, + { + "epoch": 12.02, + "eval_loss": 0.26152506470680237, + "eval_runtime": 1497.9019, + "eval_samples_per_second": 6.95, + "eval_steps_per_second": 0.435, + "eval_wer": 26.236198962584755, + "step": 22000 + }, + { + "epoch": 12.03, + "learning_rate": 4.552911392405064e-06, + "loss": 0.0164, + "step": 22025 + }, + { + "epoch": 12.03, + "learning_rate": 4.546582278481013e-06, + "loss": 0.01, + "step": 22050 + }, + { + "epoch": 12.03, + "learning_rate": 4.540253164556963e-06, + "loss": 0.0127, + "step": 22075 + }, + { + "epoch": 12.03, + "learning_rate": 4.533924050632912e-06, + "loss": 0.0089, + "step": 22100 + }, + { + "epoch": 12.03, + "learning_rate": 4.5275949367088615e-06, + "loss": 0.0077, + "step": 22125 + }, + { + "epoch": 12.03, + "learning_rate": 4.521265822784811e-06, + "loss": 0.0098, + "step": 22150 + }, + { + "epoch": 12.03, + "learning_rate": 4.5149367088607595e-06, + "loss": 0.0084, + "step": 22175 + }, + { + "epoch": 12.03, + "learning_rate": 4.508607594936709e-06, + "loss": 0.0096, + "step": 22200 + }, + { + "epoch": 12.03, + "learning_rate": 4.502278481012658e-06, + "loss": 0.0097, + "step": 22225 + }, + { + "epoch": 12.03, + "learning_rate": 4.495949367088608e-06, + "loss": 0.0114, + "step": 22250 + }, + { + "epoch": 12.03, + "learning_rate": 4.489620253164557e-06, + "loss": 0.0109, + "step": 22275 + }, + { + "epoch": 12.03, + "learning_rate": 4.483291139240507e-06, + "loss": 0.0096, + "step": 22300 + }, + { + "epoch": 12.03, + "learning_rate": 4.476962025316456e-06, + "loss": 0.0117, + "step": 22325 + }, + { + "epoch": 12.03, + "learning_rate": 4.4706329113924055e-06, + "loss": 0.0105, + "step": 22350 + }, + { + "epoch": 12.03, + "learning_rate": 4.464303797468355e-06, + "loss": 0.0103, + "step": 22375 + }, + { + "epoch": 12.03, + "learning_rate": 4.457974683544304e-06, + "loss": 0.0085, + "step": 22400 + }, + { + "epoch": 12.04, + "learning_rate": 4.451645569620254e-06, + "loss": 0.0129, + "step": 22425 + }, + { + "epoch": 12.04, + "learning_rate": 4.445316455696203e-06, + "loss": 0.0188, + "step": 22450 + }, + { + "epoch": 12.04, + "learning_rate": 4.438987341772153e-06, + "loss": 0.013, + "step": 22475 + }, + { + "epoch": 12.04, + "learning_rate": 4.432658227848102e-06, + "loss": 0.0095, + "step": 22500 + }, + { + "epoch": 12.04, + "learning_rate": 4.4263291139240515e-06, + "loss": 0.0111, + "step": 22525 + }, + { + "epoch": 12.04, + "learning_rate": 4.42e-06, + "loss": 0.0115, + "step": 22550 + }, + { + "epoch": 12.04, + "learning_rate": 4.4136708860759495e-06, + "loss": 0.0096, + "step": 22575 + }, + { + "epoch": 12.04, + "learning_rate": 4.407341772151899e-06, + "loss": 0.0108, + "step": 22600 + }, + { + "epoch": 12.04, + "learning_rate": 4.401012658227848e-06, + "loss": 0.0118, + "step": 22625 + }, + { + "epoch": 12.04, + "learning_rate": 4.394683544303798e-06, + "loss": 0.0136, + "step": 22650 + }, + { + "epoch": 12.04, + "learning_rate": 4.388354430379747e-06, + "loss": 0.0176, + "step": 22675 + }, + { + "epoch": 12.04, + "learning_rate": 4.382025316455697e-06, + "loss": 0.011, + "step": 22700 + }, + { + "epoch": 12.04, + "learning_rate": 4.375696202531646e-06, + "loss": 0.0125, + "step": 22725 + }, + { + "epoch": 12.04, + "learning_rate": 4.3693670886075955e-06, + "loss": 0.0164, + "step": 22750 + }, + { + "epoch": 13.0, + "learning_rate": 4.363037974683544e-06, + "loss": 0.0192, + "step": 22775 + }, + { + "epoch": 13.0, + "learning_rate": 4.356708860759494e-06, + "loss": 0.0145, + "step": 22800 + }, + { + "epoch": 13.0, + "learning_rate": 4.350379746835444e-06, + "loss": 0.0174, + "step": 22825 + }, + { + "epoch": 13.0, + "learning_rate": 4.344050632911393e-06, + "loss": 0.0142, + "step": 22850 + }, + { + "epoch": 13.0, + "learning_rate": 4.337721518987343e-06, + "loss": 0.0228, + "step": 22875 + }, + { + "epoch": 13.0, + "learning_rate": 4.331392405063292e-06, + "loss": 0.0201, + "step": 22900 + }, + { + "epoch": 13.0, + "learning_rate": 4.325063291139241e-06, + "loss": 0.0188, + "step": 22925 + }, + { + "epoch": 13.0, + "learning_rate": 4.31873417721519e-06, + "loss": 0.0185, + "step": 22950 + }, + { + "epoch": 13.01, + "learning_rate": 4.3124050632911395e-06, + "loss": 0.0186, + "step": 22975 + }, + { + "epoch": 13.01, + "learning_rate": 4.306075949367089e-06, + "loss": 0.0191, + "step": 23000 + }, + { + "epoch": 13.01, + "eval_loss": 0.25467297434806824, + "eval_runtime": 1486.1182, + "eval_samples_per_second": 7.005, + "eval_steps_per_second": 0.438, + "eval_wer": 23.982503780549276, + "step": 23000 + }, + { + "epoch": 13.01, + "learning_rate": 4.299746835443038e-06, + "loss": 0.0145, + "step": 23025 + }, + { + "epoch": 13.01, + "learning_rate": 4.293417721518988e-06, + "loss": 0.0106, + "step": 23050 + }, + { + "epoch": 13.01, + "learning_rate": 4.287088607594937e-06, + "loss": 0.0135, + "step": 23075 + }, + { + "epoch": 13.01, + "learning_rate": 4.280759493670887e-06, + "loss": 0.0173, + "step": 23100 + }, + { + "epoch": 13.01, + "learning_rate": 4.274430379746836e-06, + "loss": 0.0126, + "step": 23125 + }, + { + "epoch": 13.01, + "learning_rate": 4.268101265822785e-06, + "loss": 0.0166, + "step": 23150 + }, + { + "epoch": 13.01, + "learning_rate": 4.261772151898734e-06, + "loss": 0.0097, + "step": 23175 + }, + { + "epoch": 13.01, + "learning_rate": 4.255443037974684e-06, + "loss": 0.0094, + "step": 23200 + }, + { + "epoch": 13.01, + "learning_rate": 4.249113924050634e-06, + "loss": 0.012, + "step": 23225 + }, + { + "epoch": 13.01, + "learning_rate": 4.242784810126583e-06, + "loss": 0.0153, + "step": 23250 + }, + { + "epoch": 13.01, + "learning_rate": 4.236455696202532e-06, + "loss": 0.0169, + "step": 23275 + }, + { + "epoch": 13.01, + "learning_rate": 4.230379746835443e-06, + "loss": 0.0097, + "step": 23300 + }, + { + "epoch": 13.01, + "learning_rate": 4.224050632911393e-06, + "loss": 0.0098, + "step": 23325 + }, + { + "epoch": 13.01, + "learning_rate": 4.217721518987342e-06, + "loss": 0.0129, + "step": 23350 + }, + { + "epoch": 13.02, + "learning_rate": 4.2113924050632915e-06, + "loss": 0.0157, + "step": 23375 + }, + { + "epoch": 13.02, + "learning_rate": 4.205063291139241e-06, + "loss": 0.0088, + "step": 23400 + }, + { + "epoch": 13.02, + "learning_rate": 4.19873417721519e-06, + "loss": 0.0105, + "step": 23425 + }, + { + "epoch": 13.02, + "learning_rate": 4.19240506329114e-06, + "loss": 0.0131, + "step": 23450 + }, + { + "epoch": 13.02, + "learning_rate": 4.186075949367089e-06, + "loss": 0.0133, + "step": 23475 + }, + { + "epoch": 13.02, + "learning_rate": 4.179746835443038e-06, + "loss": 0.0054, + "step": 23500 + }, + { + "epoch": 13.02, + "learning_rate": 4.173417721518987e-06, + "loss": 0.01, + "step": 23525 + }, + { + "epoch": 13.02, + "learning_rate": 4.167088607594937e-06, + "loss": 0.013, + "step": 23550 + }, + { + "epoch": 13.02, + "learning_rate": 4.160759493670886e-06, + "loss": 0.0141, + "step": 23575 + }, + { + "epoch": 13.02, + "learning_rate": 4.154430379746836e-06, + "loss": 0.0126, + "step": 23600 + }, + { + "epoch": 13.02, + "learning_rate": 4.148101265822786e-06, + "loss": 0.015, + "step": 23625 + }, + { + "epoch": 13.02, + "learning_rate": 4.141772151898734e-06, + "loss": 0.0164, + "step": 23650 + }, + { + "epoch": 13.02, + "learning_rate": 4.135443037974684e-06, + "loss": 0.0135, + "step": 23675 + }, + { + "epoch": 13.02, + "learning_rate": 4.129113924050633e-06, + "loss": 0.0151, + "step": 23700 + }, + { + "epoch": 13.02, + "learning_rate": 4.122784810126583e-06, + "loss": 0.0157, + "step": 23725 + }, + { + "epoch": 13.02, + "learning_rate": 4.116455696202532e-06, + "loss": 0.0087, + "step": 23750 + }, + { + "epoch": 13.03, + "learning_rate": 4.1101265822784815e-06, + "loss": 0.0097, + "step": 23775 + }, + { + "epoch": 13.03, + "learning_rate": 4.103797468354431e-06, + "loss": 0.0141, + "step": 23800 + }, + { + "epoch": 13.03, + "learning_rate": 4.09746835443038e-06, + "loss": 0.0079, + "step": 23825 + }, + { + "epoch": 13.03, + "learning_rate": 4.09113924050633e-06, + "loss": 0.0116, + "step": 23850 + }, + { + "epoch": 13.03, + "learning_rate": 4.084810126582278e-06, + "loss": 0.0093, + "step": 23875 + }, + { + "epoch": 13.03, + "learning_rate": 4.078481012658228e-06, + "loss": 0.0149, + "step": 23900 + }, + { + "epoch": 13.03, + "learning_rate": 4.072151898734177e-06, + "loss": 0.0125, + "step": 23925 + }, + { + "epoch": 13.03, + "learning_rate": 4.065822784810127e-06, + "loss": 0.0106, + "step": 23950 + }, + { + "epoch": 13.03, + "learning_rate": 4.059493670886076e-06, + "loss": 0.0124, + "step": 23975 + }, + { + "epoch": 13.03, + "learning_rate": 4.0531645569620255e-06, + "loss": 0.0074, + "step": 24000 + }, + { + "epoch": 13.03, + "eval_loss": 0.27036768198013306, + "eval_runtime": 1458.9195, + "eval_samples_per_second": 7.136, + "eval_steps_per_second": 0.446, + "eval_wer": 24.153238264036816, + "step": 24000 + }, + { + "epoch": 13.03, + "learning_rate": 4.046835443037975e-06, + "loss": 0.0128, + "step": 24025 + }, + { + "epoch": 13.03, + "learning_rate": 4.040506329113924e-06, + "loss": 0.012, + "step": 24050 + }, + { + "epoch": 13.03, + "learning_rate": 4.034177215189874e-06, + "loss": 0.0098, + "step": 24075 + }, + { + "epoch": 13.03, + "learning_rate": 4.027848101265823e-06, + "loss": 0.0091, + "step": 24100 + }, + { + "epoch": 13.03, + "learning_rate": 4.021518987341773e-06, + "loss": 0.0077, + "step": 24125 + }, + { + "epoch": 13.03, + "learning_rate": 4.015189873417722e-06, + "loss": 0.0135, + "step": 24150 + }, + { + "epoch": 13.04, + "learning_rate": 4.0088607594936715e-06, + "loss": 0.0124, + "step": 24175 + }, + { + "epoch": 13.04, + "learning_rate": 4.002531645569621e-06, + "loss": 0.0152, + "step": 24200 + }, + { + "epoch": 13.04, + "learning_rate": 3.99620253164557e-06, + "loss": 0.0127, + "step": 24225 + }, + { + "epoch": 13.04, + "learning_rate": 3.989873417721519e-06, + "loss": 0.0066, + "step": 24250 + }, + { + "epoch": 13.04, + "learning_rate": 3.983544303797468e-06, + "loss": 0.0163, + "step": 24275 + }, + { + "epoch": 13.04, + "learning_rate": 3.977215189873418e-06, + "loss": 0.0095, + "step": 24300 + }, + { + "epoch": 13.04, + "learning_rate": 3.970886075949367e-06, + "loss": 0.0103, + "step": 24325 + }, + { + "epoch": 13.04, + "learning_rate": 3.964556962025317e-06, + "loss": 0.0112, + "step": 24350 + }, + { + "epoch": 13.04, + "learning_rate": 3.958227848101266e-06, + "loss": 0.01, + "step": 24375 + }, + { + "epoch": 13.04, + "learning_rate": 3.9518987341772155e-06, + "loss": 0.0131, + "step": 24400 + }, + { + "epoch": 13.04, + "learning_rate": 3.945569620253165e-06, + "loss": 0.0143, + "step": 24425 + }, + { + "epoch": 13.04, + "learning_rate": 3.939240506329114e-06, + "loss": 0.0151, + "step": 24450 + }, + { + "epoch": 13.04, + "learning_rate": 3.932911392405064e-06, + "loss": 0.0129, + "step": 24475 + }, + { + "epoch": 13.04, + "learning_rate": 3.926582278481013e-06, + "loss": 0.0125, + "step": 24500 + }, + { + "epoch": 14.0, + "learning_rate": 3.920506329113925e-06, + "loss": 0.0165, + "step": 24525 + }, + { + "epoch": 14.0, + "learning_rate": 3.914177215189874e-06, + "loss": 0.0114, + "step": 24550 + }, + { + "epoch": 14.0, + "learning_rate": 3.9078481012658235e-06, + "loss": 0.0111, + "step": 24575 + }, + { + "epoch": 14.0, + "learning_rate": 3.901518987341772e-06, + "loss": 0.0134, + "step": 24600 + }, + { + "epoch": 14.0, + "learning_rate": 3.8951898734177215e-06, + "loss": 0.0138, + "step": 24625 + }, + { + "epoch": 14.0, + "learning_rate": 3.888860759493671e-06, + "loss": 0.0151, + "step": 24650 + }, + { + "epoch": 14.0, + "learning_rate": 3.88253164556962e-06, + "loss": 0.0101, + "step": 24675 + }, + { + "epoch": 14.0, + "learning_rate": 3.87620253164557e-06, + "loss": 0.0108, + "step": 24700 + }, + { + "epoch": 14.01, + "learning_rate": 3.869873417721519e-06, + "loss": 0.0151, + "step": 24725 + }, + { + "epoch": 14.01, + "learning_rate": 3.863544303797469e-06, + "loss": 0.0132, + "step": 24750 + }, + { + "epoch": 14.01, + "learning_rate": 3.857215189873418e-06, + "loss": 0.012, + "step": 24775 + }, + { + "epoch": 14.01, + "learning_rate": 3.8508860759493675e-06, + "loss": 0.017, + "step": 24800 + }, + { + "epoch": 14.01, + "learning_rate": 3.844556962025317e-06, + "loss": 0.0141, + "step": 24825 + }, + { + "epoch": 14.01, + "learning_rate": 3.838227848101266e-06, + "loss": 0.0166, + "step": 24850 + }, + { + "epoch": 14.01, + "learning_rate": 3.831898734177216e-06, + "loss": 0.0112, + "step": 24875 + }, + { + "epoch": 14.01, + "learning_rate": 3.825569620253165e-06, + "loss": 0.0097, + "step": 24900 + }, + { + "epoch": 14.01, + "learning_rate": 3.819240506329115e-06, + "loss": 0.0138, + "step": 24925 + }, + { + "epoch": 14.01, + "learning_rate": 3.8129113924050636e-06, + "loss": 0.0069, + "step": 24950 + }, + { + "epoch": 14.01, + "learning_rate": 3.806582278481013e-06, + "loss": 0.0111, + "step": 24975 + }, + { + "epoch": 14.01, + "learning_rate": 3.8002531645569625e-06, + "loss": 0.0149, + "step": 25000 + }, + { + "epoch": 14.01, + "eval_loss": 0.2640444040298462, + "eval_runtime": 1478.4977, + "eval_samples_per_second": 7.042, + "eval_steps_per_second": 0.44, + "eval_wer": 24.063805915543345, + "step": 25000 + }, + { + "epoch": 14.01, + "learning_rate": 3.7939240506329115e-06, + "loss": 0.0176, + "step": 25025 + }, + { + "epoch": 14.01, + "learning_rate": 3.787594936708861e-06, + "loss": 0.0104, + "step": 25050 + }, + { + "epoch": 14.01, + "learning_rate": 3.7812658227848103e-06, + "loss": 0.0084, + "step": 25075 + }, + { + "epoch": 14.01, + "learning_rate": 3.7749367088607598e-06, + "loss": 0.0118, + "step": 25100 + }, + { + "epoch": 14.02, + "learning_rate": 3.768607594936709e-06, + "loss": 0.0099, + "step": 25125 + }, + { + "epoch": 14.02, + "learning_rate": 3.762278481012658e-06, + "loss": 0.0089, + "step": 25150 + }, + { + "epoch": 14.02, + "learning_rate": 3.7559493670886076e-06, + "loss": 0.0119, + "step": 25175 + }, + { + "epoch": 14.02, + "learning_rate": 3.749620253164557e-06, + "loss": 0.0118, + "step": 25200 + }, + { + "epoch": 14.02, + "learning_rate": 3.743291139240507e-06, + "loss": 0.0108, + "step": 25225 + }, + { + "epoch": 14.02, + "learning_rate": 3.7369620253164563e-06, + "loss": 0.0101, + "step": 25250 + }, + { + "epoch": 14.02, + "learning_rate": 3.7306329113924058e-06, + "loss": 0.0095, + "step": 25275 + }, + { + "epoch": 14.02, + "learning_rate": 3.7243037974683548e-06, + "loss": 0.0106, + "step": 25300 + }, + { + "epoch": 14.02, + "learning_rate": 3.717974683544304e-06, + "loss": 0.0135, + "step": 25325 + }, + { + "epoch": 14.02, + "learning_rate": 3.7116455696202536e-06, + "loss": 0.0115, + "step": 25350 + }, + { + "epoch": 14.02, + "learning_rate": 3.705316455696203e-06, + "loss": 0.0114, + "step": 25375 + }, + { + "epoch": 14.02, + "learning_rate": 3.698987341772152e-06, + "loss": 0.0119, + "step": 25400 + }, + { + "epoch": 14.02, + "learning_rate": 3.6926582278481015e-06, + "loss": 0.0123, + "step": 25425 + }, + { + "epoch": 14.02, + "learning_rate": 3.686329113924051e-06, + "loss": 0.0138, + "step": 25450 + }, + { + "epoch": 14.02, + "learning_rate": 3.6800000000000003e-06, + "loss": 0.0133, + "step": 25475 + }, + { + "epoch": 14.02, + "learning_rate": 3.6736708860759498e-06, + "loss": 0.0106, + "step": 25500 + }, + { + "epoch": 14.03, + "learning_rate": 3.6673417721518988e-06, + "loss": 0.0099, + "step": 25525 + }, + { + "epoch": 14.03, + "learning_rate": 3.661012658227848e-06, + "loss": 0.0104, + "step": 25550 + }, + { + "epoch": 14.03, + "learning_rate": 3.6546835443037976e-06, + "loss": 0.0143, + "step": 25575 + }, + { + "epoch": 14.03, + "learning_rate": 3.648354430379747e-06, + "loss": 0.0096, + "step": 25600 + }, + { + "epoch": 14.03, + "learning_rate": 3.642025316455696e-06, + "loss": 0.0094, + "step": 25625 + }, + { + "epoch": 14.03, + "learning_rate": 3.6356962025316463e-06, + "loss": 0.0162, + "step": 25650 + }, + { + "epoch": 14.03, + "learning_rate": 3.6293670886075953e-06, + "loss": 0.008, + "step": 25675 + }, + { + "epoch": 14.03, + "learning_rate": 3.6230379746835448e-06, + "loss": 0.0094, + "step": 25700 + }, + { + "epoch": 14.03, + "learning_rate": 3.616708860759494e-06, + "loss": 0.0122, + "step": 25725 + }, + { + "epoch": 14.03, + "learning_rate": 3.6103797468354436e-06, + "loss": 0.0128, + "step": 25750 + }, + { + "epoch": 14.03, + "learning_rate": 3.6040506329113926e-06, + "loss": 0.0117, + "step": 25775 + }, + { + "epoch": 14.03, + "learning_rate": 3.597721518987342e-06, + "loss": 0.0067, + "step": 25800 + }, + { + "epoch": 14.03, + "learning_rate": 3.5913924050632915e-06, + "loss": 0.0078, + "step": 25825 + }, + { + "epoch": 14.03, + "learning_rate": 3.585063291139241e-06, + "loss": 0.0071, + "step": 25850 + }, + { + "epoch": 14.03, + "learning_rate": 3.57873417721519e-06, + "loss": 0.0093, + "step": 25875 + }, + { + "epoch": 14.03, + "learning_rate": 3.5724050632911393e-06, + "loss": 0.0064, + "step": 25900 + }, + { + "epoch": 14.04, + "learning_rate": 3.5660759493670888e-06, + "loss": 0.0067, + "step": 25925 + }, + { + "epoch": 14.04, + "learning_rate": 3.559746835443038e-06, + "loss": 0.0076, + "step": 25950 + }, + { + "epoch": 14.04, + "learning_rate": 3.5534177215189876e-06, + "loss": 0.0099, + "step": 25975 + }, + { + "epoch": 14.04, + "learning_rate": 3.5470886075949366e-06, + "loss": 0.0111, + "step": 26000 + }, + { + "epoch": 14.04, + "eval_loss": 0.2734878659248352, + "eval_runtime": 1456.7487, + "eval_samples_per_second": 7.147, + "eval_steps_per_second": 0.447, + "eval_wer": 24.359745686921737, + "step": 26000 + }, + { + "epoch": 14.04, + "learning_rate": 3.540759493670886e-06, + "loss": 0.0134, + "step": 26025 + }, + { + "epoch": 14.04, + "learning_rate": 3.534430379746836e-06, + "loss": 0.008, + "step": 26050 + }, + { + "epoch": 14.04, + "learning_rate": 3.5281012658227853e-06, + "loss": 0.0138, + "step": 26075 + }, + { + "epoch": 14.04, + "learning_rate": 3.5217721518987348e-06, + "loss": 0.0075, + "step": 26100 + }, + { + "epoch": 14.04, + "learning_rate": 3.515443037974684e-06, + "loss": 0.0097, + "step": 26125 + }, + { + "epoch": 14.04, + "learning_rate": 3.509113924050633e-06, + "loss": 0.0139, + "step": 26150 + }, + { + "epoch": 14.04, + "learning_rate": 3.5027848101265826e-06, + "loss": 0.0099, + "step": 26175 + }, + { + "epoch": 14.04, + "learning_rate": 3.496455696202532e-06, + "loss": 0.0148, + "step": 26200 + }, + { + "epoch": 14.04, + "learning_rate": 3.4901265822784815e-06, + "loss": 0.0119, + "step": 26225 + }, + { + "epoch": 14.04, + "learning_rate": 3.4837974683544305e-06, + "loss": 0.0086, + "step": 26250 + }, + { + "epoch": 15.0, + "learning_rate": 3.47746835443038e-06, + "loss": 0.0091, + "step": 26275 + }, + { + "epoch": 15.0, + "learning_rate": 3.4711392405063293e-06, + "loss": 0.0105, + "step": 26300 + }, + { + "epoch": 15.0, + "learning_rate": 3.465063291139241e-06, + "loss": 0.0138, + "step": 26325 + }, + { + "epoch": 15.0, + "learning_rate": 3.45873417721519e-06, + "loss": 0.0185, + "step": 26350 + }, + { + "epoch": 15.0, + "learning_rate": 3.4524050632911392e-06, + "loss": 0.015, + "step": 26375 + }, + { + "epoch": 15.0, + "learning_rate": 3.4460759493670886e-06, + "loss": 0.0177, + "step": 26400 + }, + { + "epoch": 15.0, + "learning_rate": 3.439746835443038e-06, + "loss": 0.014, + "step": 26425 + }, + { + "epoch": 15.0, + "learning_rate": 3.433417721518988e-06, + "loss": 0.0127, + "step": 26450 + }, + { + "epoch": 15.01, + "learning_rate": 3.4270886075949374e-06, + "loss": 0.0135, + "step": 26475 + }, + { + "epoch": 15.01, + "learning_rate": 3.4207594936708864e-06, + "loss": 0.0171, + "step": 26500 + }, + { + "epoch": 15.01, + "learning_rate": 3.4144303797468358e-06, + "loss": 0.0114, + "step": 26525 + }, + { + "epoch": 15.01, + "learning_rate": 3.4081012658227852e-06, + "loss": 0.0099, + "step": 26550 + }, + { + "epoch": 15.01, + "learning_rate": 3.4017721518987346e-06, + "loss": 0.0126, + "step": 26575 + }, + { + "epoch": 15.01, + "learning_rate": 3.395443037974684e-06, + "loss": 0.0116, + "step": 26600 + }, + { + "epoch": 15.01, + "learning_rate": 3.389113924050633e-06, + "loss": 0.0087, + "step": 26625 + }, + { + "epoch": 15.01, + "learning_rate": 3.3827848101265825e-06, + "loss": 0.0073, + "step": 26650 + }, + { + "epoch": 15.01, + "learning_rate": 3.376455696202532e-06, + "loss": 0.0102, + "step": 26675 + }, + { + "epoch": 15.01, + "learning_rate": 3.3701265822784814e-06, + "loss": 0.0106, + "step": 26700 + }, + { + "epoch": 15.01, + "learning_rate": 3.3637974683544304e-06, + "loss": 0.0094, + "step": 26725 + }, + { + "epoch": 15.01, + "learning_rate": 3.35746835443038e-06, + "loss": 0.0113, + "step": 26750 + }, + { + "epoch": 15.01, + "learning_rate": 3.3511392405063292e-06, + "loss": 0.0169, + "step": 26775 + }, + { + "epoch": 15.01, + "learning_rate": 3.3448101265822786e-06, + "loss": 0.0084, + "step": 26800 + }, + { + "epoch": 15.01, + "learning_rate": 3.338481012658228e-06, + "loss": 0.0153, + "step": 26825 + }, + { + "epoch": 15.01, + "learning_rate": 3.332151898734177e-06, + "loss": 0.012, + "step": 26850 + }, + { + "epoch": 15.02, + "learning_rate": 3.325822784810127e-06, + "loss": 0.0106, + "step": 26875 + }, + { + "epoch": 15.02, + "learning_rate": 3.3194936708860764e-06, + "loss": 0.0099, + "step": 26900 + }, + { + "epoch": 15.02, + "learning_rate": 3.3131645569620258e-06, + "loss": 0.008, + "step": 26925 + }, + { + "epoch": 15.02, + "learning_rate": 3.306835443037975e-06, + "loss": 0.0076, + "step": 26950 + }, + { + "epoch": 15.02, + "learning_rate": 3.3005063291139246e-06, + "loss": 0.0088, + "step": 26975 + }, + { + "epoch": 15.02, + "learning_rate": 3.2941772151898736e-06, + "loss": 0.009, + "step": 27000 + }, + { + "epoch": 15.02, + "eval_loss": 0.2693029046058655, + "eval_runtime": 1479.1374, + "eval_samples_per_second": 7.039, + "eval_steps_per_second": 0.44, + "eval_wer": 24.28819980812696, + "step": 27000 + }, + { + "epoch": 15.02, + "learning_rate": 3.287848101265823e-06, + "loss": 0.0115, + "step": 27025 + }, + { + "epoch": 15.02, + "learning_rate": 3.2815189873417725e-06, + "loss": 0.0099, + "step": 27050 + }, + { + "epoch": 15.02, + "learning_rate": 3.275189873417722e-06, + "loss": 0.0073, + "step": 27075 + }, + { + "epoch": 15.02, + "learning_rate": 3.268860759493671e-06, + "loss": 0.0061, + "step": 27100 + }, + { + "epoch": 15.02, + "learning_rate": 3.2625316455696204e-06, + "loss": 0.007, + "step": 27125 + }, + { + "epoch": 15.02, + "learning_rate": 3.2562025316455698e-06, + "loss": 0.0109, + "step": 27150 + }, + { + "epoch": 15.02, + "learning_rate": 3.2498734177215192e-06, + "loss": 0.0123, + "step": 27175 + }, + { + "epoch": 15.02, + "learning_rate": 3.2435443037974686e-06, + "loss": 0.0121, + "step": 27200 + }, + { + "epoch": 15.02, + "learning_rate": 3.2372151898734176e-06, + "loss": 0.0129, + "step": 27225 + }, + { + "epoch": 15.02, + "learning_rate": 3.230886075949367e-06, + "loss": 0.0128, + "step": 27250 + }, + { + "epoch": 15.03, + "learning_rate": 3.224556962025317e-06, + "loss": 0.0094, + "step": 27275 + }, + { + "epoch": 15.03, + "learning_rate": 3.2182278481012664e-06, + "loss": 0.0081, + "step": 27300 + }, + { + "epoch": 15.03, + "learning_rate": 3.2118987341772158e-06, + "loss": 0.0087, + "step": 27325 + }, + { + "epoch": 15.03, + "learning_rate": 3.2055696202531648e-06, + "loss": 0.0131, + "step": 27350 + }, + { + "epoch": 15.03, + "learning_rate": 3.199240506329114e-06, + "loss": 0.0073, + "step": 27375 + }, + { + "epoch": 15.03, + "learning_rate": 3.1929113924050636e-06, + "loss": 0.0086, + "step": 27400 + }, + { + "epoch": 15.03, + "learning_rate": 3.186582278481013e-06, + "loss": 0.0108, + "step": 27425 + }, + { + "epoch": 15.03, + "learning_rate": 3.1802531645569625e-06, + "loss": 0.01, + "step": 27450 + }, + { + "epoch": 15.03, + "learning_rate": 3.1739240506329115e-06, + "loss": 0.0123, + "step": 27475 + }, + { + "epoch": 15.03, + "learning_rate": 3.167594936708861e-06, + "loss": 0.0081, + "step": 27500 + }, + { + "epoch": 15.03, + "learning_rate": 3.1612658227848104e-06, + "loss": 0.0123, + "step": 27525 + }, + { + "epoch": 15.03, + "learning_rate": 3.1549367088607598e-06, + "loss": 0.0096, + "step": 27550 + }, + { + "epoch": 15.03, + "learning_rate": 3.1486075949367088e-06, + "loss": 0.0054, + "step": 27575 + }, + { + "epoch": 15.03, + "learning_rate": 3.1422784810126582e-06, + "loss": 0.006, + "step": 27600 + }, + { + "epoch": 15.03, + "learning_rate": 3.1359493670886076e-06, + "loss": 0.0089, + "step": 27625 + }, + { + "epoch": 15.03, + "learning_rate": 3.129620253164557e-06, + "loss": 0.0109, + "step": 27650 + }, + { + "epoch": 15.04, + "learning_rate": 3.1232911392405065e-06, + "loss": 0.0089, + "step": 27675 + }, + { + "epoch": 15.04, + "learning_rate": 3.1169620253164563e-06, + "loss": 0.0094, + "step": 27700 + }, + { + "epoch": 15.04, + "learning_rate": 3.1106329113924054e-06, + "loss": 0.0102, + "step": 27725 + }, + { + "epoch": 15.04, + "learning_rate": 3.1043037974683548e-06, + "loss": 0.0097, + "step": 27750 + }, + { + "epoch": 15.04, + "learning_rate": 3.097974683544304e-06, + "loss": 0.0078, + "step": 27775 + }, + { + "epoch": 15.04, + "learning_rate": 3.0916455696202536e-06, + "loss": 0.0073, + "step": 27800 + }, + { + "epoch": 15.04, + "learning_rate": 3.085316455696203e-06, + "loss": 0.0105, + "step": 27825 + }, + { + "epoch": 15.04, + "learning_rate": 3.078987341772152e-06, + "loss": 0.0083, + "step": 27850 + }, + { + "epoch": 15.04, + "learning_rate": 3.0726582278481015e-06, + "loss": 0.0138, + "step": 27875 + }, + { + "epoch": 15.04, + "learning_rate": 3.066329113924051e-06, + "loss": 0.0049, + "step": 27900 + }, + { + "epoch": 15.04, + "learning_rate": 3.0600000000000003e-06, + "loss": 0.0107, + "step": 27925 + }, + { + "epoch": 15.04, + "learning_rate": 3.0536708860759494e-06, + "loss": 0.0105, + "step": 27950 + }, + { + "epoch": 15.04, + "learning_rate": 3.0473417721518988e-06, + "loss": 0.0134, + "step": 27975 + }, + { + "epoch": 15.04, + "learning_rate": 3.041012658227848e-06, + "loss": 0.0137, + "step": 28000 + }, + { + "epoch": 15.04, + "eval_loss": 0.2780563235282898, + "eval_runtime": 1465.9542, + "eval_samples_per_second": 7.102, + "eval_steps_per_second": 0.444, + "eval_wer": 25.010162766874256, + "step": 28000 + }, + { + "epoch": 16.0, + "learning_rate": 3.0346835443037976e-06, + "loss": 0.009, + "step": 28025 + }, + { + "epoch": 16.0, + "learning_rate": 3.028354430379747e-06, + "loss": 0.0081, + "step": 28050 + }, + { + "epoch": 16.0, + "learning_rate": 3.022025316455696e-06, + "loss": 0.0109, + "step": 28075 + }, + { + "epoch": 16.0, + "learning_rate": 3.015696202531646e-06, + "loss": 0.0132, + "step": 28100 + }, + { + "epoch": 16.0, + "learning_rate": 3.0093670886075953e-06, + "loss": 0.0142, + "step": 28125 + }, + { + "epoch": 16.0, + "learning_rate": 3.0030379746835448e-06, + "loss": 0.0152, + "step": 28150 + }, + { + "epoch": 16.0, + "learning_rate": 2.996708860759494e-06, + "loss": 0.0074, + "step": 28175 + }, + { + "epoch": 16.0, + "learning_rate": 2.990379746835443e-06, + "loss": 0.0129, + "step": 28200 + }, + { + "epoch": 16.01, + "learning_rate": 2.9840506329113926e-06, + "loss": 0.0118, + "step": 28225 + }, + { + "epoch": 16.01, + "learning_rate": 2.977721518987342e-06, + "loss": 0.0153, + "step": 28250 + }, + { + "epoch": 16.01, + "learning_rate": 2.9713924050632915e-06, + "loss": 0.0121, + "step": 28275 + }, + { + "epoch": 16.01, + "learning_rate": 2.965063291139241e-06, + "loss": 0.0097, + "step": 28300 + }, + { + "epoch": 16.01, + "learning_rate": 2.95873417721519e-06, + "loss": 0.0083, + "step": 28325 + }, + { + "epoch": 16.01, + "learning_rate": 2.9524050632911393e-06, + "loss": 0.01, + "step": 28350 + }, + { + "epoch": 16.01, + "learning_rate": 2.9460759493670888e-06, + "loss": 0.0084, + "step": 28375 + }, + { + "epoch": 16.01, + "learning_rate": 2.939746835443038e-06, + "loss": 0.0124, + "step": 28400 + }, + { + "epoch": 16.01, + "learning_rate": 2.933417721518987e-06, + "loss": 0.0071, + "step": 28425 + }, + { + "epoch": 16.01, + "learning_rate": 2.9270886075949366e-06, + "loss": 0.0113, + "step": 28450 + }, + { + "epoch": 16.01, + "learning_rate": 2.920759493670886e-06, + "loss": 0.0085, + "step": 28475 + }, + { + "epoch": 16.01, + "learning_rate": 2.914430379746836e-06, + "loss": 0.0133, + "step": 28500 + }, + { + "epoch": 16.01, + "learning_rate": 2.9081012658227853e-06, + "loss": 0.014, + "step": 28525 + }, + { + "epoch": 16.01, + "learning_rate": 2.9017721518987348e-06, + "loss": 0.0105, + "step": 28550 + }, + { + "epoch": 16.01, + "learning_rate": 2.8954430379746838e-06, + "loss": 0.01, + "step": 28575 + }, + { + "epoch": 16.01, + "learning_rate": 2.889113924050633e-06, + "loss": 0.0135, + "step": 28600 + }, + { + "epoch": 16.02, + "learning_rate": 2.8827848101265826e-06, + "loss": 0.0118, + "step": 28625 + }, + { + "epoch": 16.02, + "learning_rate": 2.876455696202532e-06, + "loss": 0.0111, + "step": 28650 + }, + { + "epoch": 16.02, + "learning_rate": 2.8701265822784815e-06, + "loss": 0.008, + "step": 28675 + }, + { + "epoch": 16.02, + "learning_rate": 2.8637974683544305e-06, + "loss": 0.0103, + "step": 28700 + }, + { + "epoch": 16.02, + "learning_rate": 2.85746835443038e-06, + "loss": 0.0082, + "step": 28725 + }, + { + "epoch": 16.02, + "learning_rate": 2.8511392405063293e-06, + "loss": 0.0057, + "step": 28750 + }, + { + "epoch": 16.02, + "learning_rate": 2.8448101265822788e-06, + "loss": 0.0062, + "step": 28775 + }, + { + "epoch": 16.02, + "learning_rate": 2.8384810126582278e-06, + "loss": 0.0092, + "step": 28800 + }, + { + "epoch": 16.02, + "learning_rate": 2.832151898734177e-06, + "loss": 0.0084, + "step": 28825 + }, + { + "epoch": 16.02, + "learning_rate": 2.8258227848101266e-06, + "loss": 0.0099, + "step": 28850 + }, + { + "epoch": 16.02, + "learning_rate": 2.819493670886076e-06, + "loss": 0.0117, + "step": 28875 + }, + { + "epoch": 16.02, + "learning_rate": 2.8131645569620255e-06, + "loss": 0.0117, + "step": 28900 + }, + { + "epoch": 16.02, + "learning_rate": 2.8068354430379753e-06, + "loss": 0.0127, + "step": 28925 + }, + { + "epoch": 16.02, + "learning_rate": 2.8005063291139243e-06, + "loss": 0.0123, + "step": 28950 + }, + { + "epoch": 16.02, + "learning_rate": 2.7941772151898738e-06, + "loss": 0.0128, + "step": 28975 + }, + { + "epoch": 16.02, + "learning_rate": 2.787848101265823e-06, + "loss": 0.0073, + "step": 29000 + }, + { + "epoch": 16.02, + "eval_loss": 0.2685891091823578, + "eval_runtime": 1456.562, + "eval_samples_per_second": 7.148, + "eval_steps_per_second": 0.447, + "eval_wer": 23.278427291500677, + "step": 29000 + }, + { + "epoch": 16.03, + "learning_rate": 2.7815189873417726e-06, + "loss": 0.0068, + "step": 29025 + }, + { + "epoch": 16.03, + "learning_rate": 2.7751898734177216e-06, + "loss": 0.0087, + "step": 29050 + }, + { + "epoch": 16.03, + "learning_rate": 2.768860759493671e-06, + "loss": 0.0084, + "step": 29075 + }, + { + "epoch": 16.03, + "learning_rate": 2.7625316455696205e-06, + "loss": 0.0039, + "step": 29100 + }, + { + "epoch": 16.03, + "learning_rate": 2.75620253164557e-06, + "loss": 0.0066, + "step": 29125 + }, + { + "epoch": 16.03, + "learning_rate": 2.7498734177215193e-06, + "loss": 0.0108, + "step": 29150 + }, + { + "epoch": 16.03, + "learning_rate": 2.7435443037974683e-06, + "loss": 0.0052, + "step": 29175 + }, + { + "epoch": 16.03, + "learning_rate": 2.7372151898734178e-06, + "loss": 0.0061, + "step": 29200 + }, + { + "epoch": 16.03, + "learning_rate": 2.730886075949367e-06, + "loss": 0.0083, + "step": 29225 + }, + { + "epoch": 16.03, + "learning_rate": 2.7245569620253166e-06, + "loss": 0.0062, + "step": 29250 + }, + { + "epoch": 16.03, + "learning_rate": 2.7182278481012656e-06, + "loss": 0.0078, + "step": 29275 + }, + { + "epoch": 16.03, + "learning_rate": 2.711898734177215e-06, + "loss": 0.0127, + "step": 29300 + }, + { + "epoch": 16.03, + "learning_rate": 2.705569620253165e-06, + "loss": 0.0092, + "step": 29325 + }, + { + "epoch": 16.03, + "learning_rate": 2.6992405063291143e-06, + "loss": 0.0087, + "step": 29350 + }, + { + "epoch": 16.03, + "learning_rate": 2.6929113924050638e-06, + "loss": 0.0062, + "step": 29375 + }, + { + "epoch": 16.03, + "learning_rate": 2.686582278481013e-06, + "loss": 0.0065, + "step": 29400 + }, + { + "epoch": 16.04, + "learning_rate": 2.680253164556962e-06, + "loss": 0.0057, + "step": 29425 + }, + { + "epoch": 16.04, + "learning_rate": 2.6739240506329116e-06, + "loss": 0.0071, + "step": 29450 + }, + { + "epoch": 16.04, + "learning_rate": 2.667594936708861e-06, + "loss": 0.01, + "step": 29475 + }, + { + "epoch": 16.04, + "learning_rate": 2.6612658227848105e-06, + "loss": 0.009, + "step": 29500 + }, + { + "epoch": 16.04, + "learning_rate": 2.65493670886076e-06, + "loss": 0.0091, + "step": 29525 + }, + { + "epoch": 16.04, + "learning_rate": 2.648607594936709e-06, + "loss": 0.0104, + "step": 29550 + }, + { + "epoch": 16.04, + "learning_rate": 2.6422784810126583e-06, + "loss": 0.0071, + "step": 29575 + }, + { + "epoch": 16.04, + "learning_rate": 2.6359493670886078e-06, + "loss": 0.0081, + "step": 29600 + }, + { + "epoch": 16.04, + "learning_rate": 2.629620253164557e-06, + "loss": 0.008, + "step": 29625 + }, + { + "epoch": 16.04, + "learning_rate": 2.623291139240506e-06, + "loss": 0.0067, + "step": 29650 + }, + { + "epoch": 16.04, + "learning_rate": 2.6169620253164556e-06, + "loss": 0.0066, + "step": 29675 + }, + { + "epoch": 16.04, + "learning_rate": 2.610632911392405e-06, + "loss": 0.0121, + "step": 29700 + }, + { + "epoch": 16.04, + "learning_rate": 2.6043037974683545e-06, + "loss": 0.0075, + "step": 29725 + }, + { + "epoch": 16.04, + "learning_rate": 2.5979746835443043e-06, + "loss": 0.0111, + "step": 29750 + }, + { + "epoch": 17.0, + "learning_rate": 2.5916455696202538e-06, + "loss": 0.0095, + "step": 29775 + }, + { + "epoch": 17.0, + "learning_rate": 2.5853164556962028e-06, + "loss": 0.0077, + "step": 29800 + }, + { + "epoch": 17.0, + "learning_rate": 2.578987341772152e-06, + "loss": 0.0047, + "step": 29825 + }, + { + "epoch": 17.0, + "learning_rate": 2.5726582278481016e-06, + "loss": 0.0092, + "step": 29850 + }, + { + "epoch": 17.0, + "learning_rate": 2.566329113924051e-06, + "loss": 0.0191, + "step": 29875 + }, + { + "epoch": 17.0, + "learning_rate": 2.56e-06, + "loss": 0.0102, + "step": 29900 + }, + { + "epoch": 17.0, + "learning_rate": 2.5536708860759495e-06, + "loss": 0.0146, + "step": 29925 + }, + { + "epoch": 17.0, + "learning_rate": 2.547341772151899e-06, + "loss": 0.0096, + "step": 29950 + }, + { + "epoch": 17.01, + "learning_rate": 2.5410126582278483e-06, + "loss": 0.0159, + "step": 29975 + }, + { + "epoch": 17.01, + "learning_rate": 2.5346835443037978e-06, + "loss": 0.0094, + "step": 30000 + }, + { + "epoch": 17.01, + "eval_loss": 0.2636851966381073, + "eval_runtime": 1508.7196, + "eval_samples_per_second": 6.901, + "eval_steps_per_second": 0.431, + "eval_wer": 23.029642758418838, + "step": 30000 + }, + { + "epoch": 17.01, + "learning_rate": 2.5283544303797468e-06, + "loss": 0.0078, + "step": 30025 + }, + { + "epoch": 17.01, + "learning_rate": 2.522025316455696e-06, + "loss": 0.01, + "step": 30050 + }, + { + "epoch": 17.01, + "learning_rate": 2.5156962025316456e-06, + "loss": 0.0058, + "step": 30075 + }, + { + "epoch": 17.01, + "learning_rate": 2.509367088607595e-06, + "loss": 0.0092, + "step": 30100 + }, + { + "epoch": 17.01, + "learning_rate": 2.503037974683544e-06, + "loss": 0.0084, + "step": 30125 + }, + { + "epoch": 17.01, + "learning_rate": 2.496708860759494e-06, + "loss": 0.0069, + "step": 30150 + }, + { + "epoch": 17.01, + "learning_rate": 2.4903797468354433e-06, + "loss": 0.0059, + "step": 30175 + }, + { + "epoch": 17.01, + "learning_rate": 2.4840506329113923e-06, + "loss": 0.0097, + "step": 30200 + }, + { + "epoch": 17.01, + "learning_rate": 2.4777215189873418e-06, + "loss": 0.0126, + "step": 30225 + }, + { + "epoch": 17.01, + "learning_rate": 2.4713924050632916e-06, + "loss": 0.0096, + "step": 30250 + }, + { + "epoch": 17.01, + "learning_rate": 2.4650632911392406e-06, + "loss": 0.0094, + "step": 30275 + }, + { + "epoch": 17.01, + "learning_rate": 2.45873417721519e-06, + "loss": 0.0111, + "step": 30300 + }, + { + "epoch": 17.01, + "learning_rate": 2.4524050632911395e-06, + "loss": 0.0062, + "step": 30325 + }, + { + "epoch": 17.01, + "learning_rate": 2.446075949367089e-06, + "loss": 0.0078, + "step": 30350 + }, + { + "epoch": 17.02, + "learning_rate": 2.4397468354430383e-06, + "loss": 0.0043, + "step": 30375 + }, + { + "epoch": 17.02, + "learning_rate": 2.4336708860759494e-06, + "loss": 0.0102, + "step": 30400 + }, + { + "epoch": 17.02, + "learning_rate": 2.4273417721518988e-06, + "loss": 0.0101, + "step": 30425 + }, + { + "epoch": 17.02, + "learning_rate": 2.4210126582278482e-06, + "loss": 0.0066, + "step": 30450 + }, + { + "epoch": 17.02, + "learning_rate": 2.4146835443037976e-06, + "loss": 0.0078, + "step": 30475 + }, + { + "epoch": 17.02, + "learning_rate": 2.408354430379747e-06, + "loss": 0.0074, + "step": 30500 + }, + { + "epoch": 17.02, + "learning_rate": 2.4020253164556965e-06, + "loss": 0.0072, + "step": 30525 + }, + { + "epoch": 17.02, + "learning_rate": 2.395696202531646e-06, + "loss": 0.0109, + "step": 30550 + }, + { + "epoch": 17.02, + "learning_rate": 2.389367088607595e-06, + "loss": 0.0113, + "step": 30575 + }, + { + "epoch": 17.02, + "learning_rate": 2.3830379746835444e-06, + "loss": 0.0109, + "step": 30600 + }, + { + "epoch": 17.02, + "learning_rate": 2.3767088607594938e-06, + "loss": 0.0089, + "step": 30625 + }, + { + "epoch": 17.02, + "learning_rate": 2.3703797468354432e-06, + "loss": 0.0062, + "step": 30650 + }, + { + "epoch": 17.02, + "learning_rate": 2.3640506329113926e-06, + "loss": 0.0101, + "step": 30675 + }, + { + "epoch": 17.02, + "learning_rate": 2.357721518987342e-06, + "loss": 0.0096, + "step": 30700 + }, + { + "epoch": 17.02, + "learning_rate": 2.3513924050632915e-06, + "loss": 0.0093, + "step": 30725 + }, + { + "epoch": 17.02, + "learning_rate": 2.3450632911392405e-06, + "loss": 0.0113, + "step": 30750 + }, + { + "epoch": 17.03, + "learning_rate": 2.33873417721519e-06, + "loss": 0.0094, + "step": 30775 + }, + { + "epoch": 17.03, + "learning_rate": 2.3324050632911394e-06, + "loss": 0.0086, + "step": 30800 + }, + { + "epoch": 17.03, + "learning_rate": 2.3260759493670888e-06, + "loss": 0.0096, + "step": 30825 + }, + { + "epoch": 17.03, + "learning_rate": 2.319746835443038e-06, + "loss": 0.0086, + "step": 30850 + }, + { + "epoch": 17.03, + "learning_rate": 2.3134177215189876e-06, + "loss": 0.0064, + "step": 30875 + }, + { + "epoch": 17.03, + "learning_rate": 2.307088607594937e-06, + "loss": 0.0107, + "step": 30900 + }, + { + "epoch": 17.03, + "learning_rate": 2.3007594936708865e-06, + "loss": 0.0073, + "step": 30925 + }, + { + "epoch": 17.03, + "learning_rate": 2.2944303797468355e-06, + "loss": 0.005, + "step": 30950 + }, + { + "epoch": 17.03, + "learning_rate": 2.288101265822785e-06, + "loss": 0.0055, + "step": 30975 + }, + { + "epoch": 17.03, + "learning_rate": 2.2817721518987344e-06, + "loss": 0.006, + "step": 31000 + }, + { + "epoch": 17.03, + "eval_loss": 0.2709895372390747, + "eval_runtime": 1461.5544, + "eval_samples_per_second": 7.123, + "eval_steps_per_second": 0.445, + "eval_wer": 23.29468771849949, + "step": 31000 + }, + { + "epoch": 17.03, + "learning_rate": 2.2754430379746838e-06, + "loss": 0.0094, + "step": 31025 + }, + { + "epoch": 17.03, + "learning_rate": 2.2691139240506328e-06, + "loss": 0.0088, + "step": 31050 + }, + { + "epoch": 17.03, + "learning_rate": 2.2627848101265826e-06, + "loss": 0.0057, + "step": 31075 + }, + { + "epoch": 17.03, + "learning_rate": 2.256455696202532e-06, + "loss": 0.0066, + "step": 31100 + }, + { + "epoch": 17.03, + "learning_rate": 2.250126582278481e-06, + "loss": 0.008, + "step": 31125 + }, + { + "epoch": 17.03, + "learning_rate": 2.2437974683544305e-06, + "loss": 0.0048, + "step": 31150 + }, + { + "epoch": 17.04, + "learning_rate": 2.23746835443038e-06, + "loss": 0.0074, + "step": 31175 + }, + { + "epoch": 17.04, + "learning_rate": 2.2311392405063294e-06, + "loss": 0.0061, + "step": 31200 + }, + { + "epoch": 17.04, + "learning_rate": 2.2248101265822788e-06, + "loss": 0.0088, + "step": 31225 + }, + { + "epoch": 17.04, + "learning_rate": 2.2184810126582278e-06, + "loss": 0.0087, + "step": 31250 + }, + { + "epoch": 17.04, + "learning_rate": 2.2121518987341776e-06, + "loss": 0.0089, + "step": 31275 + }, + { + "epoch": 17.04, + "learning_rate": 2.2058227848101266e-06, + "loss": 0.0061, + "step": 31300 + }, + { + "epoch": 17.04, + "learning_rate": 2.199493670886076e-06, + "loss": 0.0097, + "step": 31325 + }, + { + "epoch": 17.04, + "learning_rate": 2.1931645569620255e-06, + "loss": 0.0063, + "step": 31350 + }, + { + "epoch": 17.04, + "learning_rate": 2.186835443037975e-06, + "loss": 0.0084, + "step": 31375 + }, + { + "epoch": 17.04, + "learning_rate": 2.1805063291139243e-06, + "loss": 0.0089, + "step": 31400 + }, + { + "epoch": 17.04, + "learning_rate": 2.1741772151898734e-06, + "loss": 0.0077, + "step": 31425 + }, + { + "epoch": 17.04, + "learning_rate": 2.1678481012658228e-06, + "loss": 0.0153, + "step": 31450 + }, + { + "epoch": 17.04, + "learning_rate": 2.1615189873417726e-06, + "loss": 0.0088, + "step": 31475 + }, + { + "epoch": 17.04, + "learning_rate": 2.1551898734177216e-06, + "loss": 0.0095, + "step": 31500 + }, + { + "epoch": 18.0, + "learning_rate": 2.148860759493671e-06, + "loss": 0.0084, + "step": 31525 + }, + { + "epoch": 18.0, + "learning_rate": 2.1425316455696205e-06, + "loss": 0.0054, + "step": 31550 + }, + { + "epoch": 18.0, + "learning_rate": 2.13620253164557e-06, + "loss": 0.013, + "step": 31575 + }, + { + "epoch": 18.0, + "learning_rate": 2.129873417721519e-06, + "loss": 0.0074, + "step": 31600 + }, + { + "epoch": 18.0, + "learning_rate": 2.1235443037974684e-06, + "loss": 0.0083, + "step": 31625 + }, + { + "epoch": 18.0, + "learning_rate": 2.1172151898734178e-06, + "loss": 0.0118, + "step": 31650 + }, + { + "epoch": 18.0, + "learning_rate": 2.110886075949367e-06, + "loss": 0.0095, + "step": 31675 + }, + { + "epoch": 18.0, + "learning_rate": 2.1045569620253166e-06, + "loss": 0.0075, + "step": 31700 + }, + { + "epoch": 18.01, + "learning_rate": 2.098227848101266e-06, + "loss": 0.0149, + "step": 31725 + }, + { + "epoch": 18.01, + "learning_rate": 2.0918987341772155e-06, + "loss": 0.0095, + "step": 31750 + }, + { + "epoch": 18.01, + "learning_rate": 2.085569620253165e-06, + "loss": 0.0098, + "step": 31775 + }, + { + "epoch": 18.01, + "learning_rate": 2.079240506329114e-06, + "loss": 0.0058, + "step": 31800 + }, + { + "epoch": 18.01, + "learning_rate": 2.0729113924050633e-06, + "loss": 0.0072, + "step": 31825 + }, + { + "epoch": 18.01, + "learning_rate": 2.0665822784810128e-06, + "loss": 0.0103, + "step": 31850 + }, + { + "epoch": 18.01, + "learning_rate": 2.060253164556962e-06, + "loss": 0.0085, + "step": 31875 + }, + { + "epoch": 18.01, + "learning_rate": 2.0539240506329116e-06, + "loss": 0.0064, + "step": 31900 + }, + { + "epoch": 18.01, + "learning_rate": 2.047594936708861e-06, + "loss": 0.0077, + "step": 31925 + }, + { + "epoch": 18.01, + "learning_rate": 2.0412658227848105e-06, + "loss": 0.0086, + "step": 31950 + }, + { + "epoch": 18.01, + "learning_rate": 2.0349367088607595e-06, + "loss": 0.0136, + "step": 31975 + }, + { + "epoch": 18.01, + "learning_rate": 2.028607594936709e-06, + "loss": 0.0085, + "step": 32000 + }, + { + "epoch": 18.01, + "eval_loss": 0.26488059759140015, + "eval_runtime": 1466.953, + "eval_samples_per_second": 7.097, + "eval_steps_per_second": 0.444, + "eval_wer": 23.089806338314446, + "step": 32000 + }, + { + "epoch": 18.01, + "learning_rate": 2.0222784810126583e-06, + "loss": 0.011, + "step": 32025 + }, + { + "epoch": 18.01, + "learning_rate": 2.0159493670886078e-06, + "loss": 0.0086, + "step": 32050 + }, + { + "epoch": 18.01, + "learning_rate": 2.009620253164557e-06, + "loss": 0.008, + "step": 32075 + }, + { + "epoch": 18.01, + "learning_rate": 2.0032911392405066e-06, + "loss": 0.0083, + "step": 32100 + }, + { + "epoch": 18.02, + "learning_rate": 1.996962025316456e-06, + "loss": 0.0073, + "step": 32125 + }, + { + "epoch": 18.02, + "learning_rate": 1.9906329113924055e-06, + "loss": 0.0036, + "step": 32150 + }, + { + "epoch": 18.02, + "learning_rate": 1.9843037974683545e-06, + "loss": 0.0096, + "step": 32175 + }, + { + "epoch": 18.02, + "learning_rate": 1.977974683544304e-06, + "loss": 0.0071, + "step": 32200 + }, + { + "epoch": 18.02, + "learning_rate": 1.9716455696202533e-06, + "loss": 0.009, + "step": 32225 + }, + { + "epoch": 18.02, + "learning_rate": 1.9653164556962028e-06, + "loss": 0.0093, + "step": 32250 + }, + { + "epoch": 18.02, + "learning_rate": 1.9589873417721518e-06, + "loss": 0.0071, + "step": 32275 + }, + { + "epoch": 18.02, + "learning_rate": 1.9526582278481016e-06, + "loss": 0.0037, + "step": 32300 + }, + { + "epoch": 18.02, + "learning_rate": 1.946329113924051e-06, + "loss": 0.007, + "step": 32325 + }, + { + "epoch": 18.02, + "learning_rate": 1.94e-06, + "loss": 0.0104, + "step": 32350 + }, + { + "epoch": 18.02, + "learning_rate": 1.9336708860759495e-06, + "loss": 0.0082, + "step": 32375 + }, + { + "epoch": 18.02, + "learning_rate": 1.927341772151899e-06, + "loss": 0.0095, + "step": 32400 + }, + { + "epoch": 18.02, + "learning_rate": 1.9210126582278483e-06, + "loss": 0.0101, + "step": 32425 + }, + { + "epoch": 18.02, + "learning_rate": 1.9146835443037973e-06, + "loss": 0.0122, + "step": 32450 + }, + { + "epoch": 18.02, + "learning_rate": 1.9083544303797468e-06, + "loss": 0.0082, + "step": 32475 + }, + { + "epoch": 18.02, + "learning_rate": 1.9020253164556964e-06, + "loss": 0.0093, + "step": 32500 + }, + { + "epoch": 18.03, + "learning_rate": 1.8956962025316458e-06, + "loss": 0.0073, + "step": 32525 + }, + { + "epoch": 18.03, + "learning_rate": 1.889367088607595e-06, + "loss": 0.0061, + "step": 32550 + }, + { + "epoch": 18.03, + "learning_rate": 1.8830379746835445e-06, + "loss": 0.0072, + "step": 32575 + }, + { + "epoch": 18.03, + "learning_rate": 1.876708860759494e-06, + "loss": 0.0064, + "step": 32600 + }, + { + "epoch": 18.03, + "learning_rate": 1.8703797468354431e-06, + "loss": 0.0084, + "step": 32625 + }, + { + "epoch": 18.03, + "learning_rate": 1.8640506329113926e-06, + "loss": 0.0067, + "step": 32650 + }, + { + "epoch": 18.03, + "learning_rate": 1.8577215189873418e-06, + "loss": 0.0052, + "step": 32675 + }, + { + "epoch": 18.03, + "learning_rate": 1.8513924050632912e-06, + "loss": 0.0085, + "step": 32700 + }, + { + "epoch": 18.03, + "learning_rate": 1.8450632911392408e-06, + "loss": 0.0077, + "step": 32725 + }, + { + "epoch": 18.03, + "learning_rate": 1.83873417721519e-06, + "loss": 0.0068, + "step": 32750 + }, + { + "epoch": 18.03, + "learning_rate": 1.8324050632911395e-06, + "loss": 0.0079, + "step": 32775 + }, + { + "epoch": 18.03, + "learning_rate": 1.8260759493670887e-06, + "loss": 0.008, + "step": 32800 + }, + { + "epoch": 18.03, + "learning_rate": 1.8197468354430381e-06, + "loss": 0.0071, + "step": 32825 + }, + { + "epoch": 18.03, + "learning_rate": 1.8134177215189873e-06, + "loss": 0.0091, + "step": 32850 + }, + { + "epoch": 18.03, + "learning_rate": 1.8070886075949368e-06, + "loss": 0.0059, + "step": 32875 + }, + { + "epoch": 18.03, + "learning_rate": 1.8007594936708862e-06, + "loss": 0.0079, + "step": 32900 + }, + { + "epoch": 18.04, + "learning_rate": 1.7944303797468356e-06, + "loss": 0.0055, + "step": 32925 + }, + { + "epoch": 18.04, + "learning_rate": 1.788101265822785e-06, + "loss": 0.0095, + "step": 32950 + }, + { + "epoch": 18.04, + "learning_rate": 1.7817721518987343e-06, + "loss": 0.0086, + "step": 32975 + }, + { + "epoch": 18.04, + "learning_rate": 1.7754430379746837e-06, + "loss": 0.0057, + "step": 33000 + }, + { + "epoch": 18.04, + "eval_loss": 0.2678743898868561, + "eval_runtime": 1484.126, + "eval_samples_per_second": 7.015, + "eval_steps_per_second": 0.439, + "eval_wer": 23.405258622091416, + "step": 33000 + }, + { + "epoch": 18.04, + "learning_rate": 1.7691139240506331e-06, + "loss": 0.0085, + "step": 33025 + }, + { + "epoch": 18.04, + "learning_rate": 1.7627848101265823e-06, + "loss": 0.0073, + "step": 33050 + }, + { + "epoch": 18.04, + "learning_rate": 1.7564556962025318e-06, + "loss": 0.0045, + "step": 33075 + }, + { + "epoch": 18.04, + "learning_rate": 1.750126582278481e-06, + "loss": 0.0065, + "step": 33100 + }, + { + "epoch": 18.04, + "learning_rate": 1.7437974683544306e-06, + "loss": 0.0076, + "step": 33125 + }, + { + "epoch": 18.04, + "learning_rate": 1.73746835443038e-06, + "loss": 0.007, + "step": 33150 + }, + { + "epoch": 18.04, + "learning_rate": 1.7311392405063293e-06, + "loss": 0.0109, + "step": 33175 + }, + { + "epoch": 18.04, + "learning_rate": 1.7248101265822787e-06, + "loss": 0.0095, + "step": 33200 + }, + { + "epoch": 18.04, + "learning_rate": 1.718481012658228e-06, + "loss": 0.0109, + "step": 33225 + }, + { + "epoch": 18.04, + "learning_rate": 1.7121518987341773e-06, + "loss": 0.0117, + "step": 33250 + }, + { + "epoch": 19.0, + "learning_rate": 1.7058227848101266e-06, + "loss": 0.0038, + "step": 33275 + }, + { + "epoch": 19.0, + "learning_rate": 1.699493670886076e-06, + "loss": 0.013, + "step": 33300 + }, + { + "epoch": 19.0, + "learning_rate": 1.6931645569620256e-06, + "loss": 0.0104, + "step": 33325 + }, + { + "epoch": 19.0, + "learning_rate": 1.6868354430379748e-06, + "loss": 0.0064, + "step": 33350 + }, + { + "epoch": 19.0, + "learning_rate": 1.6805063291139243e-06, + "loss": 0.0137, + "step": 33375 + }, + { + "epoch": 19.0, + "learning_rate": 1.6741772151898735e-06, + "loss": 0.012, + "step": 33400 + }, + { + "epoch": 19.0, + "learning_rate": 1.667848101265823e-06, + "loss": 0.0106, + "step": 33425 + }, + { + "epoch": 19.0, + "learning_rate": 1.6615189873417723e-06, + "loss": 0.0072, + "step": 33450 + }, + { + "epoch": 19.01, + "learning_rate": 1.6551898734177216e-06, + "loss": 0.0154, + "step": 33475 + }, + { + "epoch": 19.01, + "learning_rate": 1.648860759493671e-06, + "loss": 0.0129, + "step": 33500 + }, + { + "epoch": 19.01, + "learning_rate": 1.6425316455696206e-06, + "loss": 0.008, + "step": 33525 + }, + { + "epoch": 19.01, + "learning_rate": 1.6362025316455698e-06, + "loss": 0.0071, + "step": 33550 + }, + { + "epoch": 19.01, + "learning_rate": 1.6298734177215193e-06, + "loss": 0.0088, + "step": 33575 + }, + { + "epoch": 19.01, + "learning_rate": 1.6235443037974685e-06, + "loss": 0.0102, + "step": 33600 + }, + { + "epoch": 19.01, + "learning_rate": 1.617215189873418e-06, + "loss": 0.0064, + "step": 33625 + }, + { + "epoch": 19.01, + "learning_rate": 1.6108860759493671e-06, + "loss": 0.0098, + "step": 33650 + }, + { + "epoch": 19.01, + "learning_rate": 1.6045569620253166e-06, + "loss": 0.009, + "step": 33675 + }, + { + "epoch": 19.01, + "learning_rate": 1.5982278481012658e-06, + "loss": 0.0074, + "step": 33700 + }, + { + "epoch": 19.01, + "learning_rate": 1.5918987341772152e-06, + "loss": 0.0055, + "step": 33725 + }, + { + "epoch": 19.01, + "learning_rate": 1.5855696202531648e-06, + "loss": 0.0059, + "step": 33750 + }, + { + "epoch": 19.01, + "learning_rate": 1.579240506329114e-06, + "loss": 0.0102, + "step": 33775 + }, + { + "epoch": 19.01, + "learning_rate": 1.5729113924050635e-06, + "loss": 0.0111, + "step": 33800 + }, + { + "epoch": 19.01, + "learning_rate": 1.5665822784810127e-06, + "loss": 0.0033, + "step": 33825 + }, + { + "epoch": 19.01, + "learning_rate": 1.5602531645569621e-06, + "loss": 0.0057, + "step": 33850 + }, + { + "epoch": 19.02, + "learning_rate": 1.5539240506329115e-06, + "loss": 0.0074, + "step": 33875 + }, + { + "epoch": 19.02, + "learning_rate": 1.5475949367088608e-06, + "loss": 0.0056, + "step": 33900 + }, + { + "epoch": 19.02, + "learning_rate": 1.5412658227848102e-06, + "loss": 0.0053, + "step": 33925 + }, + { + "epoch": 19.02, + "learning_rate": 1.5349367088607598e-06, + "loss": 0.0049, + "step": 33950 + }, + { + "epoch": 19.02, + "learning_rate": 1.528607594936709e-06, + "loss": 0.0065, + "step": 33975 + }, + { + "epoch": 19.02, + "learning_rate": 1.5222784810126585e-06, + "loss": 0.007, + "step": 34000 + }, + { + "epoch": 19.02, + "eval_loss": 0.2660910487174988, + "eval_runtime": 1471.5922, + "eval_samples_per_second": 7.075, + "eval_steps_per_second": 0.442, + "eval_wer": 23.14834387551017, + "step": 34000 + }, + { + "epoch": 19.02, + "learning_rate": 1.5159493670886077e-06, + "loss": 0.0027, + "step": 34025 + }, + { + "epoch": 19.02, + "learning_rate": 1.5096202531645571e-06, + "loss": 0.0081, + "step": 34050 + }, + { + "epoch": 19.02, + "learning_rate": 1.5032911392405063e-06, + "loss": 0.0044, + "step": 34075 + }, + { + "epoch": 19.02, + "learning_rate": 1.4969620253164558e-06, + "loss": 0.0093, + "step": 34100 + }, + { + "epoch": 19.02, + "learning_rate": 1.490632911392405e-06, + "loss": 0.0067, + "step": 34125 + }, + { + "epoch": 19.02, + "learning_rate": 1.4843037974683546e-06, + "loss": 0.0047, + "step": 34150 + }, + { + "epoch": 19.02, + "learning_rate": 1.477974683544304e-06, + "loss": 0.0119, + "step": 34175 + }, + { + "epoch": 19.02, + "learning_rate": 1.4716455696202533e-06, + "loss": 0.0091, + "step": 34200 + }, + { + "epoch": 19.02, + "learning_rate": 1.4653164556962027e-06, + "loss": 0.0094, + "step": 34225 + }, + { + "epoch": 19.02, + "learning_rate": 1.458987341772152e-06, + "loss": 0.0069, + "step": 34250 + }, + { + "epoch": 19.03, + "learning_rate": 1.4526582278481013e-06, + "loss": 0.0066, + "step": 34275 + }, + { + "epoch": 19.03, + "learning_rate": 1.4463291139240508e-06, + "loss": 0.0072, + "step": 34300 + }, + { + "epoch": 19.03, + "learning_rate": 1.44e-06, + "loss": 0.0067, + "step": 34325 + }, + { + "epoch": 19.03, + "learning_rate": 1.4336708860759496e-06, + "loss": 0.0076, + "step": 34350 + }, + { + "epoch": 19.03, + "learning_rate": 1.427341772151899e-06, + "loss": 0.0083, + "step": 34375 + }, + { + "epoch": 19.03, + "learning_rate": 1.4212658227848103e-06, + "loss": 0.0072, + "step": 34400 + }, + { + "epoch": 19.03, + "learning_rate": 1.4149367088607597e-06, + "loss": 0.0086, + "step": 34425 + }, + { + "epoch": 19.03, + "learning_rate": 1.408607594936709e-06, + "loss": 0.0057, + "step": 34450 + }, + { + "epoch": 19.03, + "learning_rate": 1.4022784810126584e-06, + "loss": 0.0111, + "step": 34475 + }, + { + "epoch": 19.03, + "learning_rate": 1.3959493670886076e-06, + "loss": 0.01, + "step": 34500 + }, + { + "epoch": 19.03, + "learning_rate": 1.389620253164557e-06, + "loss": 0.008, + "step": 34525 + }, + { + "epoch": 19.03, + "learning_rate": 1.3832911392405066e-06, + "loss": 0.0082, + "step": 34550 + }, + { + "epoch": 19.03, + "learning_rate": 1.3769620253164559e-06, + "loss": 0.0062, + "step": 34575 + }, + { + "epoch": 19.03, + "learning_rate": 1.3706329113924053e-06, + "loss": 0.0052, + "step": 34600 + }, + { + "epoch": 19.03, + "learning_rate": 1.3643037974683545e-06, + "loss": 0.007, + "step": 34625 + }, + { + "epoch": 19.03, + "learning_rate": 1.357974683544304e-06, + "loss": 0.0045, + "step": 34650 + }, + { + "epoch": 19.04, + "learning_rate": 1.3516455696202531e-06, + "loss": 0.0089, + "step": 34675 + }, + { + "epoch": 19.04, + "learning_rate": 1.3453164556962026e-06, + "loss": 0.0081, + "step": 34700 + }, + { + "epoch": 19.04, + "learning_rate": 1.338987341772152e-06, + "loss": 0.0101, + "step": 34725 + }, + { + "epoch": 19.04, + "learning_rate": 1.3326582278481014e-06, + "loss": 0.0072, + "step": 34750 + }, + { + "epoch": 19.04, + "learning_rate": 1.3263291139240509e-06, + "loss": 0.005, + "step": 34775 + }, + { + "epoch": 19.04, + "learning_rate": 1.32e-06, + "loss": 0.0083, + "step": 34800 + }, + { + "epoch": 19.04, + "learning_rate": 1.3136708860759495e-06, + "loss": 0.0087, + "step": 34825 + }, + { + "epoch": 19.04, + "learning_rate": 1.307341772151899e-06, + "loss": 0.0067, + "step": 34850 + }, + { + "epoch": 19.04, + "learning_rate": 1.3010126582278481e-06, + "loss": 0.0048, + "step": 34875 + }, + { + "epoch": 19.04, + "learning_rate": 1.2946835443037976e-06, + "loss": 0.0065, + "step": 34900 + }, + { + "epoch": 19.04, + "learning_rate": 1.2883544303797468e-06, + "loss": 0.0076, + "step": 34925 + }, + { + "epoch": 19.04, + "learning_rate": 1.2820253164556964e-06, + "loss": 0.007, + "step": 34950 + }, + { + "epoch": 19.04, + "learning_rate": 1.2756962025316458e-06, + "loss": 0.0044, + "step": 34975 + }, + { + "epoch": 19.04, + "learning_rate": 1.269367088607595e-06, + "loss": 0.0082, + "step": 35000 + }, + { + "epoch": 19.04, + "eval_loss": 0.26719748973846436, + "eval_runtime": 1465.7868, + "eval_samples_per_second": 7.103, + "eval_steps_per_second": 0.444, + "eval_wer": 24.169498691035628, + "step": 35000 + }, + { + "epoch": 20.0, + "learning_rate": 1.2630379746835445e-06, + "loss": 0.0072, + "step": 35025 + }, + { + "epoch": 20.0, + "learning_rate": 1.2567088607594937e-06, + "loss": 0.0078, + "step": 35050 + }, + { + "epoch": 20.0, + "learning_rate": 1.2503797468354431e-06, + "loss": 0.0069, + "step": 35075 + }, + { + "epoch": 20.0, + "learning_rate": 1.2440506329113924e-06, + "loss": 0.0063, + "step": 35100 + }, + { + "epoch": 20.0, + "learning_rate": 1.237721518987342e-06, + "loss": 0.0073, + "step": 35125 + }, + { + "epoch": 20.0, + "learning_rate": 1.2313924050632912e-06, + "loss": 0.0076, + "step": 35150 + }, + { + "epoch": 20.0, + "learning_rate": 1.2250632911392406e-06, + "loss": 0.0084, + "step": 35175 + }, + { + "epoch": 20.0, + "learning_rate": 1.2187341772151899e-06, + "loss": 0.0061, + "step": 35200 + }, + { + "epoch": 20.01, + "learning_rate": 1.2124050632911393e-06, + "loss": 0.0074, + "step": 35225 + }, + { + "epoch": 20.01, + "learning_rate": 1.2060759493670887e-06, + "loss": 0.011, + "step": 35250 + }, + { + "epoch": 20.01, + "learning_rate": 1.1997468354430381e-06, + "loss": 0.0076, + "step": 35275 + }, + { + "epoch": 20.01, + "learning_rate": 1.1934177215189874e-06, + "loss": 0.007, + "step": 35300 + }, + { + "epoch": 20.01, + "learning_rate": 1.1870886075949368e-06, + "loss": 0.0074, + "step": 35325 + }, + { + "epoch": 20.01, + "learning_rate": 1.1807594936708862e-06, + "loss": 0.0077, + "step": 35350 + }, + { + "epoch": 20.01, + "learning_rate": 1.1744303797468354e-06, + "loss": 0.0074, + "step": 35375 + }, + { + "epoch": 20.01, + "learning_rate": 1.1681012658227848e-06, + "loss": 0.007, + "step": 35400 + }, + { + "epoch": 20.01, + "learning_rate": 1.1617721518987343e-06, + "loss": 0.0068, + "step": 35425 + }, + { + "epoch": 20.01, + "learning_rate": 1.1554430379746837e-06, + "loss": 0.0049, + "step": 35450 + }, + { + "epoch": 20.01, + "learning_rate": 1.149113924050633e-06, + "loss": 0.0074, + "step": 35475 + }, + { + "epoch": 20.01, + "learning_rate": 1.1427848101265823e-06, + "loss": 0.0097, + "step": 35500 + }, + { + "epoch": 20.01, + "learning_rate": 1.1364556962025318e-06, + "loss": 0.0065, + "step": 35525 + }, + { + "epoch": 20.01, + "learning_rate": 1.1301265822784812e-06, + "loss": 0.0056, + "step": 35550 + }, + { + "epoch": 20.01, + "learning_rate": 1.1237974683544304e-06, + "loss": 0.0056, + "step": 35575 + }, + { + "epoch": 20.01, + "learning_rate": 1.1174683544303798e-06, + "loss": 0.0056, + "step": 35600 + }, + { + "epoch": 20.02, + "learning_rate": 1.1111392405063293e-06, + "loss": 0.0104, + "step": 35625 + }, + { + "epoch": 20.02, + "learning_rate": 1.1048101265822787e-06, + "loss": 0.0071, + "step": 35650 + }, + { + "epoch": 20.02, + "learning_rate": 1.098481012658228e-06, + "loss": 0.0079, + "step": 35675 + }, + { + "epoch": 20.02, + "learning_rate": 1.0921518987341773e-06, + "loss": 0.0077, + "step": 35700 + }, + { + "epoch": 20.02, + "learning_rate": 1.0858227848101268e-06, + "loss": 0.0038, + "step": 35725 + }, + { + "epoch": 20.02, + "learning_rate": 1.079493670886076e-06, + "loss": 0.0035, + "step": 35750 + }, + { + "epoch": 20.02, + "learning_rate": 1.0731645569620254e-06, + "loss": 0.0047, + "step": 35775 + }, + { + "epoch": 20.02, + "learning_rate": 1.0668354430379746e-06, + "loss": 0.0079, + "step": 35800 + }, + { + "epoch": 20.02, + "learning_rate": 1.0605063291139243e-06, + "loss": 0.006, + "step": 35825 + }, + { + "epoch": 20.02, + "learning_rate": 1.0541772151898735e-06, + "loss": 0.0084, + "step": 35850 + }, + { + "epoch": 20.02, + "learning_rate": 1.047848101265823e-06, + "loss": 0.0073, + "step": 35875 + }, + { + "epoch": 20.02, + "learning_rate": 1.0415189873417721e-06, + "loss": 0.0049, + "step": 35900 + }, + { + "epoch": 20.02, + "learning_rate": 1.0351898734177216e-06, + "loss": 0.0114, + "step": 35925 + }, + { + "epoch": 20.02, + "learning_rate": 1.028860759493671e-06, + "loss": 0.0086, + "step": 35950 + }, + { + "epoch": 20.02, + "learning_rate": 1.0225316455696204e-06, + "loss": 0.0088, + "step": 35975 + }, + { + "epoch": 20.02, + "learning_rate": 1.0162025316455696e-06, + "loss": 0.0054, + "step": 36000 + }, + { + "epoch": 20.02, + "eval_loss": 0.26699596643447876, + "eval_runtime": 1450.3878, + "eval_samples_per_second": 7.178, + "eval_steps_per_second": 0.449, + "eval_wer": 22.689799834143646, + "step": 36000 + }, + { + "epoch": 20.03, + "learning_rate": 1.009873417721519e-06, + "loss": 0.005, + "step": 36025 + }, + { + "epoch": 20.03, + "learning_rate": 1.0035443037974685e-06, + "loss": 0.0058, + "step": 36050 + }, + { + "epoch": 20.03, + "learning_rate": 9.97215189873418e-07, + "loss": 0.0068, + "step": 36075 + }, + { + "epoch": 20.03, + "learning_rate": 9.908860759493671e-07, + "loss": 0.0074, + "step": 36100 + }, + { + "epoch": 20.03, + "learning_rate": 9.845569620253166e-07, + "loss": 0.0081, + "step": 36125 + }, + { + "epoch": 20.03, + "learning_rate": 9.78227848101266e-07, + "loss": 0.0063, + "step": 36150 + }, + { + "epoch": 20.03, + "learning_rate": 9.718987341772152e-07, + "loss": 0.0085, + "step": 36175 + }, + { + "epoch": 20.03, + "learning_rate": 9.655696202531646e-07, + "loss": 0.0059, + "step": 36200 + }, + { + "epoch": 20.03, + "learning_rate": 9.592405063291138e-07, + "loss": 0.0079, + "step": 36225 + }, + { + "epoch": 20.03, + "learning_rate": 9.529113924050634e-07, + "loss": 0.0047, + "step": 36250 + }, + { + "epoch": 20.03, + "learning_rate": 9.465822784810127e-07, + "loss": 0.0058, + "step": 36275 + }, + { + "epoch": 20.03, + "learning_rate": 9.402531645569621e-07, + "loss": 0.0039, + "step": 36300 + }, + { + "epoch": 20.03, + "learning_rate": 9.339240506329115e-07, + "loss": 0.0054, + "step": 36325 + }, + { + "epoch": 20.03, + "learning_rate": 9.275949367088609e-07, + "loss": 0.0074, + "step": 36350 + }, + { + "epoch": 20.03, + "learning_rate": 9.212658227848102e-07, + "loss": 0.0063, + "step": 36375 + }, + { + "epoch": 20.03, + "learning_rate": 9.149367088607595e-07, + "loss": 0.004, + "step": 36400 + }, + { + "epoch": 20.04, + "learning_rate": 9.086075949367088e-07, + "loss": 0.0071, + "step": 36425 + }, + { + "epoch": 20.04, + "learning_rate": 9.022784810126584e-07, + "loss": 0.0075, + "step": 36450 + }, + { + "epoch": 20.04, + "learning_rate": 8.959493670886077e-07, + "loss": 0.006, + "step": 36475 + }, + { + "epoch": 20.04, + "learning_rate": 8.89873417721519e-07, + "loss": 0.0063, + "step": 36500 + }, + { + "epoch": 20.04, + "learning_rate": 8.835443037974684e-07, + "loss": 0.0042, + "step": 36525 + }, + { + "epoch": 20.04, + "learning_rate": 8.772151898734178e-07, + "loss": 0.0038, + "step": 36550 + }, + { + "epoch": 20.04, + "learning_rate": 8.708860759493671e-07, + "loss": 0.0058, + "step": 36575 + }, + { + "epoch": 20.04, + "learning_rate": 8.645569620253165e-07, + "loss": 0.0061, + "step": 36600 + }, + { + "epoch": 20.04, + "learning_rate": 8.582278481012659e-07, + "loss": 0.0046, + "step": 36625 + }, + { + "epoch": 20.04, + "learning_rate": 8.518987341772153e-07, + "loss": 0.006, + "step": 36650 + }, + { + "epoch": 20.04, + "learning_rate": 8.455696202531646e-07, + "loss": 0.0082, + "step": 36675 + }, + { + "epoch": 20.04, + "learning_rate": 8.392405063291139e-07, + "loss": 0.0074, + "step": 36700 + }, + { + "epoch": 20.04, + "learning_rate": 8.329113924050633e-07, + "loss": 0.0107, + "step": 36725 + }, + { + "epoch": 20.04, + "learning_rate": 8.265822784810128e-07, + "loss": 0.0062, + "step": 36750 + }, + { + "epoch": 21.0, + "learning_rate": 8.202531645569621e-07, + "loss": 0.0127, + "step": 36775 + }, + { + "epoch": 21.0, + "learning_rate": 8.139240506329114e-07, + "loss": 0.0061, + "step": 36800 + }, + { + "epoch": 21.0, + "learning_rate": 8.075949367088608e-07, + "loss": 0.005, + "step": 36825 + }, + { + "epoch": 21.0, + "learning_rate": 8.012658227848103e-07, + "loss": 0.0071, + "step": 36850 + }, + { + "epoch": 21.0, + "learning_rate": 7.949367088607596e-07, + "loss": 0.008, + "step": 36875 + }, + { + "epoch": 21.0, + "learning_rate": 7.886075949367089e-07, + "loss": 0.0091, + "step": 36900 + }, + { + "epoch": 21.0, + "learning_rate": 7.822784810126583e-07, + "loss": 0.008, + "step": 36925 + }, + { + "epoch": 21.0, + "learning_rate": 7.759493670886077e-07, + "loss": 0.006, + "step": 36950 + }, + { + "epoch": 21.01, + "learning_rate": 7.69873417721519e-07, + "loss": 0.0083, + "step": 36975 + }, + { + "epoch": 21.01, + "learning_rate": 7.635443037974683e-07, + "loss": 0.0078, + "step": 37000 + }, + { + "epoch": 21.01, + "eval_loss": 0.2638327479362488, + "eval_runtime": 1492.7276, + "eval_samples_per_second": 6.974, + "eval_steps_per_second": 0.436, + "eval_wer": 23.06053756971658, + "step": 37000 + }, + { + "epoch": 21.01, + "learning_rate": 7.572151898734177e-07, + "loss": 0.0122, + "step": 37025 + }, + { + "epoch": 21.01, + "learning_rate": 7.508860759493672e-07, + "loss": 0.0093, + "step": 37050 + }, + { + "epoch": 21.01, + "learning_rate": 7.445569620253165e-07, + "loss": 0.0061, + "step": 37075 + }, + { + "epoch": 21.01, + "learning_rate": 7.382278481012658e-07, + "loss": 0.0063, + "step": 37100 + }, + { + "epoch": 21.01, + "learning_rate": 7.318987341772152e-07, + "loss": 0.011, + "step": 37125 + }, + { + "epoch": 21.01, + "learning_rate": 7.255696202531647e-07, + "loss": 0.0075, + "step": 37150 + }, + { + "epoch": 21.01, + "learning_rate": 7.19240506329114e-07, + "loss": 0.0056, + "step": 37175 + }, + { + "epoch": 21.01, + "learning_rate": 7.129113924050633e-07, + "loss": 0.0044, + "step": 37200 + }, + { + "epoch": 21.01, + "learning_rate": 7.065822784810127e-07, + "loss": 0.0068, + "step": 37225 + }, + { + "epoch": 21.01, + "learning_rate": 7.002531645569621e-07, + "loss": 0.0114, + "step": 37250 + }, + { + "epoch": 21.01, + "learning_rate": 6.939240506329114e-07, + "loss": 0.0104, + "step": 37275 + }, + { + "epoch": 21.01, + "learning_rate": 6.875949367088608e-07, + "loss": 0.0098, + "step": 37300 + }, + { + "epoch": 21.01, + "learning_rate": 6.812658227848102e-07, + "loss": 0.0078, + "step": 37325 + }, + { + "epoch": 21.01, + "learning_rate": 6.749367088607596e-07, + "loss": 0.0075, + "step": 37350 + }, + { + "epoch": 21.02, + "learning_rate": 6.686075949367089e-07, + "loss": 0.0081, + "step": 37375 + }, + { + "epoch": 21.02, + "learning_rate": 6.622784810126582e-07, + "loss": 0.0039, + "step": 37400 + }, + { + "epoch": 21.02, + "learning_rate": 6.559493670886076e-07, + "loss": 0.0081, + "step": 37425 + }, + { + "epoch": 21.02, + "learning_rate": 6.496202531645571e-07, + "loss": 0.0066, + "step": 37450 + }, + { + "epoch": 21.02, + "learning_rate": 6.432911392405064e-07, + "loss": 0.0076, + "step": 37475 + }, + { + "epoch": 21.02, + "learning_rate": 6.369620253164557e-07, + "loss": 0.0067, + "step": 37500 + }, + { + "epoch": 21.02, + "learning_rate": 6.306329113924051e-07, + "loss": 0.0081, + "step": 37525 + }, + { + "epoch": 21.02, + "learning_rate": 6.243037974683545e-07, + "loss": 0.0059, + "step": 37550 + }, + { + "epoch": 21.02, + "learning_rate": 6.179746835443039e-07, + "loss": 0.0072, + "step": 37575 + }, + { + "epoch": 21.02, + "learning_rate": 6.116455696202532e-07, + "loss": 0.0095, + "step": 37600 + }, + { + "epoch": 21.02, + "learning_rate": 6.053164556962026e-07, + "loss": 0.0078, + "step": 37625 + }, + { + "epoch": 21.02, + "learning_rate": 5.98987341772152e-07, + "loss": 0.0072, + "step": 37650 + }, + { + "epoch": 21.02, + "learning_rate": 5.926582278481013e-07, + "loss": 0.0057, + "step": 37675 + }, + { + "epoch": 21.02, + "learning_rate": 5.863291139240506e-07, + "loss": 0.0064, + "step": 37700 + }, + { + "epoch": 21.02, + "learning_rate": 5.800000000000001e-07, + "loss": 0.0069, + "step": 37725 + }, + { + "epoch": 21.02, + "learning_rate": 5.736708860759494e-07, + "loss": 0.0063, + "step": 37750 + }, + { + "epoch": 21.03, + "learning_rate": 5.673417721518988e-07, + "loss": 0.0073, + "step": 37775 + }, + { + "epoch": 21.03, + "learning_rate": 5.610126582278481e-07, + "loss": 0.0058, + "step": 37800 + }, + { + "epoch": 21.03, + "learning_rate": 5.546835443037976e-07, + "loss": 0.0056, + "step": 37825 + }, + { + "epoch": 21.03, + "learning_rate": 5.483544303797469e-07, + "loss": 0.0061, + "step": 37850 + }, + { + "epoch": 21.03, + "learning_rate": 5.420253164556962e-07, + "loss": 0.0039, + "step": 37875 + }, + { + "epoch": 21.03, + "learning_rate": 5.356962025316456e-07, + "loss": 0.0082, + "step": 37900 + }, + { + "epoch": 21.03, + "learning_rate": 5.29367088607595e-07, + "loss": 0.0067, + "step": 37925 + }, + { + "epoch": 21.03, + "learning_rate": 5.230379746835444e-07, + "loss": 0.0096, + "step": 37950 + }, + { + "epoch": 21.03, + "learning_rate": 5.167088607594937e-07, + "loss": 0.004, + "step": 37975 + }, + { + "epoch": 21.03, + "learning_rate": 5.103797468354431e-07, + "loss": 0.0055, + "step": 38000 + }, + { + "epoch": 21.03, + "eval_loss": 0.2642187774181366, + "eval_runtime": 1455.8371, + "eval_samples_per_second": 7.151, + "eval_steps_per_second": 0.447, + "eval_wer": 22.437763215662045, + "step": 38000 + }, + { + "epoch": 21.03, + "learning_rate": 5.040506329113924e-07, + "loss": 0.0067, + "step": 38025 + }, + { + "epoch": 21.03, + "learning_rate": 4.977215189873418e-07, + "loss": 0.0053, + "step": 38050 + }, + { + "epoch": 21.03, + "learning_rate": 4.913924050632912e-07, + "loss": 0.0048, + "step": 38075 + }, + { + "epoch": 21.03, + "learning_rate": 4.850632911392405e-07, + "loss": 0.0091, + "step": 38100 + }, + { + "epoch": 21.03, + "learning_rate": 4.787341772151898e-07, + "loss": 0.0055, + "step": 38125 + }, + { + "epoch": 21.03, + "learning_rate": 4.7240506329113927e-07, + "loss": 0.0052, + "step": 38150 + }, + { + "epoch": 21.04, + "learning_rate": 4.6607594936708865e-07, + "loss": 0.011, + "step": 38175 + }, + { + "epoch": 21.04, + "learning_rate": 4.59746835443038e-07, + "loss": 0.0054, + "step": 38200 + }, + { + "epoch": 21.04, + "learning_rate": 4.5341772151898734e-07, + "loss": 0.0072, + "step": 38225 + }, + { + "epoch": 21.04, + "learning_rate": 4.4708860759493677e-07, + "loss": 0.0044, + "step": 38250 + }, + { + "epoch": 21.04, + "learning_rate": 4.407594936708861e-07, + "loss": 0.0075, + "step": 38275 + }, + { + "epoch": 21.04, + "learning_rate": 4.344303797468355e-07, + "loss": 0.0056, + "step": 38300 + }, + { + "epoch": 21.04, + "learning_rate": 4.2810126582278484e-07, + "loss": 0.0063, + "step": 38325 + }, + { + "epoch": 21.04, + "learning_rate": 4.217721518987342e-07, + "loss": 0.0068, + "step": 38350 + }, + { + "epoch": 21.04, + "learning_rate": 4.1544303797468354e-07, + "loss": 0.0066, + "step": 38375 + }, + { + "epoch": 21.04, + "learning_rate": 4.0911392405063296e-07, + "loss": 0.0035, + "step": 38400 + }, + { + "epoch": 21.04, + "learning_rate": 4.027848101265823e-07, + "loss": 0.0046, + "step": 38425 + }, + { + "epoch": 21.04, + "learning_rate": 3.964556962025317e-07, + "loss": 0.0098, + "step": 38450 + }, + { + "epoch": 21.04, + "learning_rate": 3.9012658227848104e-07, + "loss": 0.0058, + "step": 38475 + }, + { + "epoch": 21.04, + "learning_rate": 3.837974683544304e-07, + "loss": 0.0091, + "step": 38500 + }, + { + "epoch": 22.0, + "learning_rate": 3.774683544303798e-07, + "loss": 0.0105, + "step": 38525 + }, + { + "epoch": 22.0, + "learning_rate": 3.7113924050632916e-07, + "loss": 0.0044, + "step": 38550 + }, + { + "epoch": 22.0, + "learning_rate": 3.648101265822785e-07, + "loss": 0.0068, + "step": 38575 + }, + { + "epoch": 22.0, + "learning_rate": 3.584810126582279e-07, + "loss": 0.0049, + "step": 38600 + }, + { + "epoch": 22.0, + "learning_rate": 3.5215189873417723e-07, + "loss": 0.0063, + "step": 38625 + }, + { + "epoch": 22.0, + "learning_rate": 3.4582278481012666e-07, + "loss": 0.0069, + "step": 38650 + }, + { + "epoch": 22.0, + "learning_rate": 3.39493670886076e-07, + "loss": 0.0075, + "step": 38675 + }, + { + "epoch": 22.0, + "learning_rate": 3.3316455696202536e-07, + "loss": 0.0048, + "step": 38700 + }, + { + "epoch": 22.01, + "learning_rate": 3.2683544303797473e-07, + "loss": 0.0118, + "step": 38725 + }, + { + "epoch": 22.01, + "learning_rate": 3.205063291139241e-07, + "loss": 0.0073, + "step": 38750 + }, + { + "epoch": 22.01, + "learning_rate": 3.1417721518987343e-07, + "loss": 0.0066, + "step": 38775 + }, + { + "epoch": 22.01, + "learning_rate": 3.078481012658228e-07, + "loss": 0.0044, + "step": 38800 + }, + { + "epoch": 22.01, + "learning_rate": 3.015189873417722e-07, + "loss": 0.0079, + "step": 38825 + }, + { + "epoch": 22.01, + "learning_rate": 2.9518987341772155e-07, + "loss": 0.0093, + "step": 38850 + }, + { + "epoch": 22.01, + "learning_rate": 2.888607594936709e-07, + "loss": 0.0059, + "step": 38875 + }, + { + "epoch": 22.01, + "learning_rate": 2.825316455696203e-07, + "loss": 0.0098, + "step": 38900 + }, + { + "epoch": 22.01, + "learning_rate": 2.762025316455697e-07, + "loss": 0.0097, + "step": 38925 + }, + { + "epoch": 22.01, + "learning_rate": 2.69873417721519e-07, + "loss": 0.0069, + "step": 38950 + }, + { + "epoch": 22.01, + "learning_rate": 2.6354430379746837e-07, + "loss": 0.006, + "step": 38975 + }, + { + "epoch": 22.01, + "learning_rate": 2.5721518987341775e-07, + "loss": 0.0085, + "step": 39000 + }, + { + "epoch": 22.01, + "eval_loss": 0.26025164127349854, + "eval_runtime": 1463.2032, + "eval_samples_per_second": 7.115, + "eval_steps_per_second": 0.445, + "eval_wer": 22.616627912648987, + "step": 39000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.800138010116096e+20, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-39000/training_args.bin b/checkpoint-39000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-39000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-4000/config.json b/checkpoint-4000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-4000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-4000/generation_config.json b/checkpoint-4000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-4000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-4000/optimizer.pt b/checkpoint-4000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cde0648b78513251a5ea62a0ce39a866a595d9b9 --- /dev/null +++ b/checkpoint-4000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e5ccf5902ca36237e0033d3dfb60f7a3cf349de6f5603c7f68b5a1e404cafa9 +size 1934161093 diff --git a/checkpoint-4000/preprocessor_config.json b/checkpoint-4000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-4000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-4000/pytorch_model.bin b/checkpoint-4000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..014da7fdc79e58b348b57aee2ed9b3db73462bb1 --- /dev/null +++ b/checkpoint-4000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d2b1cc94969de9ecb84270f80dc6f018c01185fb0346c6568f9900334b809ce +size 967102601 diff --git a/checkpoint-4000/rng_state.pth b/checkpoint-4000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ca91ffe8e49a6022b7c8fdbc8bd8b6729e7b1762 --- /dev/null +++ b/checkpoint-4000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c75071767f7713dc59f081b716982baac519bed5dde3c9cd1eabcc511049367 +size 14575 diff --git a/checkpoint-4000/scaler.pt b/checkpoint-4000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9760a75af9d48e4e0ce8532ef0f83554028e9068 --- /dev/null +++ b/checkpoint-4000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e09928762acee67462589dd2d6a548f17d2906112215d94333d0fdace4f354b +size 557 diff --git a/checkpoint-4000/scheduler.pt b/checkpoint-4000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e00829f40ae91147b0fa891b8bdecde59ba83130 --- /dev/null +++ b/checkpoint-4000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d86403abbddfc44f8798046fc14f97bee510d5c9e51d2165158638a7e9944fd +size 627 diff --git a/checkpoint-4000/trainer_state.json b/checkpoint-4000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c7ab368e81b4d75fd74fd82e213d24787a10a98b --- /dev/null +++ b/checkpoint-4000/trainer_state.json @@ -0,0 +1,1012 @@ +{ + "best_metric": 35.12252231743606, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-2000", + "epoch": 2.01245, + "global_step": 4000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.84636939124736e+19, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-4000/training_args.bin b/checkpoint-4000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-4000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-40000/config.json b/checkpoint-40000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-40000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-40000/generation_config.json b/checkpoint-40000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-40000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-40000/optimizer.pt b/checkpoint-40000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a6901c95870ffa7d3befb261b145e7c69b6b32bf --- /dev/null +++ b/checkpoint-40000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ee70889aed1a51c850713a0da7c0459608b4f1e5c8d9e49ae29f5905b91a215 +size 1934161093 diff --git a/checkpoint-40000/preprocessor_config.json b/checkpoint-40000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-40000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-40000/pytorch_model.bin b/checkpoint-40000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..41e3f14a47de4c906bb25eeac1bd81e762747934 --- /dev/null +++ b/checkpoint-40000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6862f96bea60779d33b9b9f9df9766f54bec04b84ff9a2c92949972f42f5213 +size 967102601 diff --git a/checkpoint-40000/rng_state.pth b/checkpoint-40000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e3e615309cc3f9b193d29af4da33c0968e66151b --- /dev/null +++ b/checkpoint-40000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78bf932619ab80dc2eca63fdfcd797700550425a6ca7fa4749f1901f3ebb5bcd +size 14575 diff --git a/checkpoint-40000/scaler.pt b/checkpoint-40000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd5d78e72f3ee5a8c26918b0811ad64a4673a4e5 --- /dev/null +++ b/checkpoint-40000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a05b5d3c08e7fe261ea90b1c68a9460b173141e0be62efd347f93903acabd4b +size 557 diff --git a/checkpoint-40000/scheduler.pt b/checkpoint-40000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6b0dfa6c2e7b083186da6b50bea69f578b5fcb3b --- /dev/null +++ b/checkpoint-40000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1857941e35ab1d38161b9abce3e3dc3f5667974d986aacd44125ab865718f96 +size 627 diff --git a/checkpoint-40000/trainer_state.json b/checkpoint-40000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..90f13be5e6ec8007a79104303d42dc83d4ab753a --- /dev/null +++ b/checkpoint-40000/trainer_state.json @@ -0,0 +1,9976 @@ +{ + "best_metric": 22.437763215662045, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-38000", + "epoch": 22.03695, + "global_step": 40000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + }, + { + "epoch": 5.01, + "learning_rate": 7.843037974683545e-06, + "loss": 0.0434, + "step": 9025 + }, + { + "epoch": 5.01, + "learning_rate": 7.836708860759495e-06, + "loss": 0.033, + "step": 9050 + }, + { + "epoch": 5.01, + "learning_rate": 7.830379746835444e-06, + "loss": 0.0494, + "step": 9075 + }, + { + "epoch": 5.01, + "learning_rate": 7.824050632911394e-06, + "loss": 0.0522, + "step": 9100 + }, + { + "epoch": 5.01, + "learning_rate": 7.817721518987343e-06, + "loss": 0.0338, + "step": 9125 + }, + { + "epoch": 5.01, + "learning_rate": 7.811392405063291e-06, + "loss": 0.0395, + "step": 9150 + }, + { + "epoch": 5.01, + "learning_rate": 7.80506329113924e-06, + "loss": 0.0332, + "step": 9175 + }, + { + "epoch": 5.01, + "learning_rate": 7.79873417721519e-06, + "loss": 0.0381, + "step": 9200 + }, + { + "epoch": 5.01, + "learning_rate": 7.79240506329114e-06, + "loss": 0.0409, + "step": 9225 + }, + { + "epoch": 5.01, + "learning_rate": 7.786075949367089e-06, + "loss": 0.0529, + "step": 9250 + }, + { + "epoch": 5.01, + "learning_rate": 7.779746835443038e-06, + "loss": 0.0498, + "step": 9275 + }, + { + "epoch": 5.01, + "learning_rate": 7.773417721518987e-06, + "loss": 0.0379, + "step": 9300 + }, + { + "epoch": 5.01, + "learning_rate": 7.767088607594937e-06, + "loss": 0.0316, + "step": 9325 + }, + { + "epoch": 5.01, + "learning_rate": 7.760759493670886e-06, + "loss": 0.0398, + "step": 9350 + }, + { + "epoch": 5.02, + "learning_rate": 7.754430379746836e-06, + "loss": 0.0427, + "step": 9375 + }, + { + "epoch": 5.02, + "learning_rate": 7.748101265822785e-06, + "loss": 0.0363, + "step": 9400 + }, + { + "epoch": 5.02, + "learning_rate": 7.741772151898735e-06, + "loss": 0.0393, + "step": 9425 + }, + { + "epoch": 5.02, + "learning_rate": 7.735443037974684e-06, + "loss": 0.0377, + "step": 9450 + }, + { + "epoch": 5.02, + "learning_rate": 7.729113924050633e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 5.02, + "learning_rate": 7.722784810126583e-06, + "loss": 0.0359, + "step": 9500 + }, + { + "epoch": 5.02, + "learning_rate": 7.716455696202532e-06, + "loss": 0.0289, + "step": 9525 + }, + { + "epoch": 5.02, + "learning_rate": 7.710126582278482e-06, + "loss": 0.0354, + "step": 9550 + }, + { + "epoch": 5.02, + "learning_rate": 7.703797468354431e-06, + "loss": 0.0309, + "step": 9575 + }, + { + "epoch": 5.02, + "learning_rate": 7.69746835443038e-06, + "loss": 0.0321, + "step": 9600 + }, + { + "epoch": 5.02, + "learning_rate": 7.69113924050633e-06, + "loss": 0.0392, + "step": 9625 + }, + { + "epoch": 5.02, + "learning_rate": 7.68481012658228e-06, + "loss": 0.0362, + "step": 9650 + }, + { + "epoch": 5.02, + "learning_rate": 7.678481012658229e-06, + "loss": 0.0358, + "step": 9675 + }, + { + "epoch": 5.02, + "learning_rate": 7.672151898734178e-06, + "loss": 0.045, + "step": 9700 + }, + { + "epoch": 5.02, + "learning_rate": 7.665822784810128e-06, + "loss": 0.0401, + "step": 9725 + }, + { + "epoch": 5.02, + "learning_rate": 7.659493670886077e-06, + "loss": 0.0357, + "step": 9750 + }, + { + "epoch": 5.03, + "learning_rate": 7.653164556962027e-06, + "loss": 0.0256, + "step": 9775 + }, + { + "epoch": 5.03, + "learning_rate": 7.646835443037976e-06, + "loss": 0.0358, + "step": 9800 + }, + { + "epoch": 5.03, + "learning_rate": 7.640506329113925e-06, + "loss": 0.0361, + "step": 9825 + }, + { + "epoch": 5.03, + "learning_rate": 7.634177215189875e-06, + "loss": 0.0365, + "step": 9850 + }, + { + "epoch": 5.03, + "learning_rate": 7.6278481012658234e-06, + "loss": 0.0344, + "step": 9875 + }, + { + "epoch": 5.03, + "learning_rate": 7.621518987341773e-06, + "loss": 0.0349, + "step": 9900 + }, + { + "epoch": 5.03, + "learning_rate": 7.615189873417722e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 5.03, + "learning_rate": 7.608860759493672e-06, + "loss": 0.0259, + "step": 9950 + }, + { + "epoch": 5.03, + "learning_rate": 7.602531645569621e-06, + "loss": 0.0289, + "step": 9975 + }, + { + "epoch": 5.03, + "learning_rate": 7.59620253164557e-06, + "loss": 0.0269, + "step": 10000 + }, + { + "epoch": 5.03, + "eval_loss": 0.2615405321121216, + "eval_runtime": 1420.7197, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "eval_wer": 36.23310948145498, + "step": 10000 + }, + { + "epoch": 5.03, + "learning_rate": 7.589873417721519e-06, + "loss": 0.0261, + "step": 10025 + }, + { + "epoch": 5.03, + "learning_rate": 7.583544303797469e-06, + "loss": 0.0278, + "step": 10050 + }, + { + "epoch": 5.03, + "learning_rate": 7.577215189873418e-06, + "loss": 0.0248, + "step": 10075 + }, + { + "epoch": 5.03, + "learning_rate": 7.5708860759493674e-06, + "loss": 0.0263, + "step": 10100 + }, + { + "epoch": 5.03, + "learning_rate": 7.564556962025317e-06, + "loss": 0.0253, + "step": 10125 + }, + { + "epoch": 5.03, + "learning_rate": 7.558227848101266e-06, + "loss": 0.0248, + "step": 10150 + }, + { + "epoch": 5.04, + "learning_rate": 7.551898734177216e-06, + "loss": 0.0281, + "step": 10175 + }, + { + "epoch": 5.04, + "learning_rate": 7.545569620253165e-06, + "loss": 0.0276, + "step": 10200 + }, + { + "epoch": 5.04, + "learning_rate": 7.539240506329114e-06, + "loss": 0.0285, + "step": 10225 + }, + { + "epoch": 5.04, + "learning_rate": 7.532911392405063e-06, + "loss": 0.0265, + "step": 10250 + }, + { + "epoch": 5.04, + "learning_rate": 7.526582278481013e-06, + "loss": 0.0288, + "step": 10275 + }, + { + "epoch": 5.04, + "learning_rate": 7.520253164556963e-06, + "loss": 0.0247, + "step": 10300 + }, + { + "epoch": 5.04, + "learning_rate": 7.513924050632912e-06, + "loss": 0.0281, + "step": 10325 + }, + { + "epoch": 5.04, + "learning_rate": 7.507594936708862e-06, + "loss": 0.0262, + "step": 10350 + }, + { + "epoch": 5.04, + "learning_rate": 7.501265822784811e-06, + "loss": 0.0243, + "step": 10375 + }, + { + "epoch": 5.04, + "learning_rate": 7.494936708860761e-06, + "loss": 0.0331, + "step": 10400 + }, + { + "epoch": 5.04, + "learning_rate": 7.48860759493671e-06, + "loss": 0.0346, + "step": 10425 + }, + { + "epoch": 5.04, + "learning_rate": 7.4822784810126594e-06, + "loss": 0.0348, + "step": 10450 + }, + { + "epoch": 5.04, + "learning_rate": 7.475949367088609e-06, + "loss": 0.0272, + "step": 10475 + }, + { + "epoch": 5.04, + "learning_rate": 7.4696202531645574e-06, + "loss": 0.0326, + "step": 10500 + }, + { + "epoch": 6.0, + "learning_rate": 7.463291139240507e-06, + "loss": 0.033, + "step": 10525 + }, + { + "epoch": 6.0, + "learning_rate": 7.456962025316456e-06, + "loss": 0.0333, + "step": 10550 + }, + { + "epoch": 6.0, + "learning_rate": 7.450632911392406e-06, + "loss": 0.0371, + "step": 10575 + }, + { + "epoch": 6.0, + "learning_rate": 7.444303797468355e-06, + "loss": 0.0359, + "step": 10600 + }, + { + "epoch": 6.0, + "learning_rate": 7.437974683544305e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 6.0, + "learning_rate": 7.431645569620254e-06, + "loss": 0.0382, + "step": 10650 + }, + { + "epoch": 6.0, + "learning_rate": 7.4253164556962034e-06, + "loss": 0.0337, + "step": 10675 + }, + { + "epoch": 6.0, + "learning_rate": 7.418987341772153e-06, + "loss": 0.0422, + "step": 10700 + }, + { + "epoch": 6.01, + "learning_rate": 7.4126582278481014e-06, + "loss": 0.0467, + "step": 10725 + }, + { + "epoch": 6.01, + "learning_rate": 7.406329113924051e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 6.01, + "learning_rate": 7.4e-06, + "loss": 0.0328, + "step": 10775 + }, + { + "epoch": 6.01, + "learning_rate": 7.39367088607595e-06, + "loss": 0.0295, + "step": 10800 + }, + { + "epoch": 6.01, + "learning_rate": 7.387341772151899e-06, + "loss": 0.0431, + "step": 10825 + }, + { + "epoch": 6.01, + "learning_rate": 7.381012658227849e-06, + "loss": 0.0456, + "step": 10850 + }, + { + "epoch": 6.01, + "learning_rate": 7.374683544303798e-06, + "loss": 0.0267, + "step": 10875 + }, + { + "epoch": 6.01, + "learning_rate": 7.3683544303797474e-06, + "loss": 0.0334, + "step": 10900 + }, + { + "epoch": 6.01, + "learning_rate": 7.362025316455697e-06, + "loss": 0.0296, + "step": 10925 + }, + { + "epoch": 6.01, + "learning_rate": 7.3556962025316454e-06, + "loss": 0.0261, + "step": 10950 + }, + { + "epoch": 6.01, + "learning_rate": 7.349367088607595e-06, + "loss": 0.0299, + "step": 10975 + }, + { + "epoch": 6.01, + "learning_rate": 7.343037974683544e-06, + "loss": 0.0379, + "step": 11000 + }, + { + "epoch": 6.01, + "eval_loss": 0.23971551656723022, + "eval_runtime": 1435.6772, + "eval_samples_per_second": 7.252, + "eval_steps_per_second": 0.453, + "eval_wer": 31.7712483129807, + "step": 11000 + }, + { + "epoch": 6.01, + "learning_rate": 7.336708860759494e-06, + "loss": 0.0457, + "step": 11025 + }, + { + "epoch": 6.01, + "learning_rate": 7.330379746835443e-06, + "loss": 0.0275, + "step": 11050 + }, + { + "epoch": 6.01, + "learning_rate": 7.324050632911393e-06, + "loss": 0.0254, + "step": 11075 + }, + { + "epoch": 6.01, + "learning_rate": 7.317721518987342e-06, + "loss": 0.035, + "step": 11100 + }, + { + "epoch": 6.02, + "learning_rate": 7.311392405063292e-06, + "loss": 0.0273, + "step": 11125 + }, + { + "epoch": 6.02, + "learning_rate": 7.305063291139242e-06, + "loss": 0.0227, + "step": 11150 + }, + { + "epoch": 6.02, + "learning_rate": 7.298734177215191e-06, + "loss": 0.0263, + "step": 11175 + }, + { + "epoch": 6.02, + "learning_rate": 7.2924050632911406e-06, + "loss": 0.0294, + "step": 11200 + }, + { + "epoch": 6.02, + "learning_rate": 7.28607594936709e-06, + "loss": 0.026, + "step": 11225 + }, + { + "epoch": 6.02, + "learning_rate": 7.279746835443039e-06, + "loss": 0.0251, + "step": 11250 + }, + { + "epoch": 6.02, + "learning_rate": 7.273417721518988e-06, + "loss": 0.024, + "step": 11275 + }, + { + "epoch": 6.02, + "learning_rate": 7.2670886075949374e-06, + "loss": 0.0344, + "step": 11300 + }, + { + "epoch": 6.02, + "learning_rate": 7.260759493670887e-06, + "loss": 0.0338, + "step": 11325 + }, + { + "epoch": 6.02, + "learning_rate": 7.254430379746836e-06, + "loss": 0.0266, + "step": 11350 + }, + { + "epoch": 6.02, + "learning_rate": 7.248101265822786e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 6.02, + "learning_rate": 7.241772151898735e-06, + "loss": 0.0338, + "step": 11400 + }, + { + "epoch": 6.02, + "learning_rate": 7.2354430379746846e-06, + "loss": 0.0321, + "step": 11425 + }, + { + "epoch": 6.02, + "learning_rate": 7.229113924050634e-06, + "loss": 0.0385, + "step": 11450 + }, + { + "epoch": 6.02, + "learning_rate": 7.222784810126583e-06, + "loss": 0.0284, + "step": 11475 + }, + { + "epoch": 6.02, + "learning_rate": 7.216455696202532e-06, + "loss": 0.0268, + "step": 11500 + }, + { + "epoch": 6.03, + "learning_rate": 7.2101265822784814e-06, + "loss": 0.0191, + "step": 11525 + }, + { + "epoch": 6.03, + "learning_rate": 7.203797468354431e-06, + "loss": 0.0243, + "step": 11550 + }, + { + "epoch": 6.03, + "learning_rate": 7.19746835443038e-06, + "loss": 0.0228, + "step": 11575 + }, + { + "epoch": 6.03, + "learning_rate": 7.19113924050633e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 6.03, + "learning_rate": 7.184810126582279e-06, + "loss": 0.0255, + "step": 11625 + }, + { + "epoch": 6.03, + "learning_rate": 7.1784810126582286e-06, + "loss": 0.0218, + "step": 11650 + }, + { + "epoch": 6.03, + "learning_rate": 7.172151898734178e-06, + "loss": 0.0249, + "step": 11675 + }, + { + "epoch": 6.03, + "learning_rate": 7.165822784810127e-06, + "loss": 0.0254, + "step": 11700 + }, + { + "epoch": 6.03, + "learning_rate": 7.159493670886076e-06, + "loss": 0.0323, + "step": 11725 + }, + { + "epoch": 6.03, + "learning_rate": 7.1531645569620254e-06, + "loss": 0.021, + "step": 11750 + }, + { + "epoch": 6.03, + "learning_rate": 7.146835443037975e-06, + "loss": 0.0242, + "step": 11775 + }, + { + "epoch": 6.03, + "learning_rate": 7.140506329113924e-06, + "loss": 0.0233, + "step": 11800 + }, + { + "epoch": 6.03, + "learning_rate": 7.134177215189874e-06, + "loss": 0.0237, + "step": 11825 + }, + { + "epoch": 6.03, + "learning_rate": 7.127848101265823e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.03, + "learning_rate": 7.121518987341773e-06, + "loss": 0.0256, + "step": 11875 + }, + { + "epoch": 6.03, + "learning_rate": 7.115189873417722e-06, + "loss": 0.0264, + "step": 11900 + }, + { + "epoch": 6.04, + "learning_rate": 7.108860759493671e-06, + "loss": 0.0254, + "step": 11925 + }, + { + "epoch": 6.04, + "learning_rate": 7.102531645569622e-06, + "loss": 0.025, + "step": 11950 + }, + { + "epoch": 6.04, + "learning_rate": 7.096202531645571e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.04, + "learning_rate": 7.08987341772152e-06, + "loss": 0.0241, + "step": 12000 + }, + { + "epoch": 6.04, + "eval_loss": 0.26222535967826843, + "eval_runtime": 1460.2321, + "eval_samples_per_second": 7.13, + "eval_steps_per_second": 0.446, + "eval_wer": 32.87370526350022, + "step": 12000 + }, + { + "epoch": 6.04, + "learning_rate": 7.083544303797469e-06, + "loss": 0.0337, + "step": 12025 + }, + { + "epoch": 6.04, + "learning_rate": 7.0772151898734186e-06, + "loss": 0.0179, + "step": 12050 + }, + { + "epoch": 6.04, + "learning_rate": 7.070886075949368e-06, + "loss": 0.0174, + "step": 12075 + }, + { + "epoch": 6.04, + "learning_rate": 7.0645569620253174e-06, + "loss": 0.0211, + "step": 12100 + }, + { + "epoch": 6.04, + "learning_rate": 7.058227848101267e-06, + "loss": 0.0224, + "step": 12125 + }, + { + "epoch": 6.04, + "learning_rate": 7.051898734177216e-06, + "loss": 0.0226, + "step": 12150 + }, + { + "epoch": 6.04, + "learning_rate": 7.045569620253166e-06, + "loss": 0.0271, + "step": 12175 + }, + { + "epoch": 6.04, + "learning_rate": 7.039240506329114e-06, + "loss": 0.0264, + "step": 12200 + }, + { + "epoch": 6.04, + "learning_rate": 7.032911392405064e-06, + "loss": 0.031, + "step": 12225 + }, + { + "epoch": 6.04, + "learning_rate": 7.026582278481013e-06, + "loss": 0.022, + "step": 12250 + }, + { + "epoch": 7.0, + "learning_rate": 7.0202531645569626e-06, + "loss": 0.0266, + "step": 12275 + }, + { + "epoch": 7.0, + "learning_rate": 7.013924050632912e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 7.0, + "learning_rate": 7.0075949367088614e-06, + "loss": 0.028, + "step": 12325 + }, + { + "epoch": 7.0, + "learning_rate": 7.001265822784811e-06, + "loss": 0.0361, + "step": 12350 + }, + { + "epoch": 7.0, + "learning_rate": 6.99493670886076e-06, + "loss": 0.0383, + "step": 12375 + }, + { + "epoch": 7.0, + "learning_rate": 6.98860759493671e-06, + "loss": 0.0312, + "step": 12400 + }, + { + "epoch": 7.0, + "learning_rate": 6.982278481012658e-06, + "loss": 0.0275, + "step": 12425 + }, + { + "epoch": 7.0, + "learning_rate": 6.975949367088608e-06, + "loss": 0.0407, + "step": 12450 + }, + { + "epoch": 7.01, + "learning_rate": 6.969620253164557e-06, + "loss": 0.0436, + "step": 12475 + }, + { + "epoch": 7.01, + "learning_rate": 6.9632911392405066e-06, + "loss": 0.0359, + "step": 12500 + }, + { + "epoch": 7.01, + "learning_rate": 6.956962025316456e-06, + "loss": 0.0281, + "step": 12525 + }, + { + "epoch": 7.01, + "learning_rate": 6.9506329113924054e-06, + "loss": 0.0283, + "step": 12550 + }, + { + "epoch": 7.01, + "learning_rate": 6.944303797468355e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.01, + "learning_rate": 6.937974683544304e-06, + "loss": 0.0295, + "step": 12600 + }, + { + "epoch": 7.01, + "learning_rate": 6.931645569620254e-06, + "loss": 0.0287, + "step": 12625 + }, + { + "epoch": 7.01, + "learning_rate": 6.925316455696202e-06, + "loss": 0.0261, + "step": 12650 + }, + { + "epoch": 7.01, + "learning_rate": 6.918987341772152e-06, + "loss": 0.0259, + "step": 12675 + }, + { + "epoch": 7.01, + "learning_rate": 6.912658227848101e-06, + "loss": 0.0265, + "step": 12700 + }, + { + "epoch": 7.01, + "learning_rate": 6.906329113924051e-06, + "loss": 0.0239, + "step": 12725 + }, + { + "epoch": 7.01, + "learning_rate": 6.9e-06, + "loss": 0.0342, + "step": 12750 + }, + { + "epoch": 7.01, + "learning_rate": 6.89367088607595e-06, + "loss": 0.0253, + "step": 12775 + }, + { + "epoch": 7.01, + "learning_rate": 6.8873417721519e-06, + "loss": 0.0317, + "step": 12800 + }, + { + "epoch": 7.01, + "learning_rate": 6.881012658227849e-06, + "loss": 0.0233, + "step": 12825 + }, + { + "epoch": 7.01, + "learning_rate": 6.8746835443037986e-06, + "loss": 0.0198, + "step": 12850 + }, + { + "epoch": 7.02, + "learning_rate": 6.868354430379748e-06, + "loss": 0.018, + "step": 12875 + }, + { + "epoch": 7.02, + "learning_rate": 6.862025316455697e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.02, + "learning_rate": 6.855949367088608e-06, + "loss": 0.0279, + "step": 12925 + }, + { + "epoch": 7.02, + "learning_rate": 6.8496202531645574e-06, + "loss": 0.0201, + "step": 12950 + }, + { + "epoch": 7.02, + "learning_rate": 6.843291139240507e-06, + "loss": 0.0222, + "step": 12975 + }, + { + "epoch": 7.02, + "learning_rate": 6.836962025316456e-06, + "loss": 0.0236, + "step": 13000 + }, + { + "epoch": 7.02, + "eval_loss": 0.2616036534309387, + "eval_runtime": 1403.0214, + "eval_samples_per_second": 7.42, + "eval_steps_per_second": 0.464, + "eval_wer": 27.36142051090262, + "step": 13000 + }, + { + "epoch": 7.02, + "learning_rate": 6.830632911392405e-06, + "loss": 0.0159, + "step": 13025 + }, + { + "epoch": 7.02, + "learning_rate": 6.824303797468354e-06, + "loss": 0.0186, + "step": 13050 + }, + { + "epoch": 7.02, + "learning_rate": 6.817974683544304e-06, + "loss": 0.0246, + "step": 13075 + }, + { + "epoch": 7.02, + "learning_rate": 6.811645569620253e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 7.02, + "learning_rate": 6.805316455696203e-06, + "loss": 0.0287, + "step": 13125 + }, + { + "epoch": 7.02, + "learning_rate": 6.798987341772152e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 7.02, + "learning_rate": 6.792658227848102e-06, + "loss": 0.035, + "step": 13175 + }, + { + "epoch": 7.02, + "learning_rate": 6.786329113924052e-06, + "loss": 0.0358, + "step": 13200 + }, + { + "epoch": 7.02, + "learning_rate": 6.780000000000001e-06, + "loss": 0.0283, + "step": 13225 + }, + { + "epoch": 7.02, + "learning_rate": 6.773670886075951e-06, + "loss": 0.0203, + "step": 13250 + }, + { + "epoch": 7.03, + "learning_rate": 6.7673417721519e-06, + "loss": 0.0193, + "step": 13275 + }, + { + "epoch": 7.03, + "learning_rate": 6.7610126582278494e-06, + "loss": 0.0174, + "step": 13300 + }, + { + "epoch": 7.03, + "learning_rate": 6.754683544303798e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 7.03, + "learning_rate": 6.7483544303797474e-06, + "loss": 0.023, + "step": 13350 + }, + { + "epoch": 7.03, + "learning_rate": 6.742025316455697e-06, + "loss": 0.0266, + "step": 13375 + }, + { + "epoch": 7.03, + "learning_rate": 6.735696202531646e-06, + "loss": 0.024, + "step": 13400 + }, + { + "epoch": 7.03, + "learning_rate": 6.729367088607596e-06, + "loss": 0.0164, + "step": 13425 + }, + { + "epoch": 7.03, + "learning_rate": 6.723037974683545e-06, + "loss": 0.019, + "step": 13450 + }, + { + "epoch": 7.03, + "learning_rate": 6.716708860759495e-06, + "loss": 0.0209, + "step": 13475 + }, + { + "epoch": 7.03, + "learning_rate": 6.710379746835444e-06, + "loss": 0.0241, + "step": 13500 + }, + { + "epoch": 7.03, + "learning_rate": 6.7040506329113934e-06, + "loss": 0.0233, + "step": 13525 + }, + { + "epoch": 7.03, + "learning_rate": 6.697721518987342e-06, + "loss": 0.0204, + "step": 13550 + }, + { + "epoch": 7.03, + "learning_rate": 6.6913924050632914e-06, + "loss": 0.0131, + "step": 13575 + }, + { + "epoch": 7.03, + "learning_rate": 6.685063291139241e-06, + "loss": 0.0268, + "step": 13600 + }, + { + "epoch": 7.03, + "learning_rate": 6.67873417721519e-06, + "loss": 0.0156, + "step": 13625 + }, + { + "epoch": 7.03, + "learning_rate": 6.67240506329114e-06, + "loss": 0.0205, + "step": 13650 + }, + { + "epoch": 7.04, + "learning_rate": 6.666075949367089e-06, + "loss": 0.0217, + "step": 13675 + }, + { + "epoch": 7.04, + "learning_rate": 6.659746835443039e-06, + "loss": 0.0258, + "step": 13700 + }, + { + "epoch": 7.04, + "learning_rate": 6.653417721518988e-06, + "loss": 0.0211, + "step": 13725 + }, + { + "epoch": 7.04, + "learning_rate": 6.647088607594937e-06, + "loss": 0.0188, + "step": 13750 + }, + { + "epoch": 7.04, + "learning_rate": 6.640759493670886e-06, + "loss": 0.0234, + "step": 13775 + }, + { + "epoch": 7.04, + "learning_rate": 6.6344303797468355e-06, + "loss": 0.0202, + "step": 13800 + }, + { + "epoch": 7.04, + "learning_rate": 6.628101265822785e-06, + "loss": 0.0251, + "step": 13825 + }, + { + "epoch": 7.04, + "learning_rate": 6.621772151898734e-06, + "loss": 0.017, + "step": 13850 + }, + { + "epoch": 7.04, + "learning_rate": 6.615443037974684e-06, + "loss": 0.0254, + "step": 13875 + }, + { + "epoch": 7.04, + "learning_rate": 6.609113924050633e-06, + "loss": 0.0289, + "step": 13900 + }, + { + "epoch": 7.04, + "learning_rate": 6.602784810126583e-06, + "loss": 0.0238, + "step": 13925 + }, + { + "epoch": 7.04, + "learning_rate": 6.596455696202532e-06, + "loss": 0.0218, + "step": 13950 + }, + { + "epoch": 7.04, + "learning_rate": 6.590126582278481e-06, + "loss": 0.0289, + "step": 13975 + }, + { + "epoch": 7.04, + "learning_rate": 6.583797468354432e-06, + "loss": 0.0271, + "step": 14000 + }, + { + "epoch": 7.04, + "eval_loss": 0.26684999465942383, + "eval_runtime": 1448.0228, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "eval_wer": 42.464105107400115, + "step": 14000 + }, + { + "epoch": 8.0, + "learning_rate": 6.577468354430381e-06, + "loss": 0.0292, + "step": 14025 + }, + { + "epoch": 8.0, + "learning_rate": 6.57113924050633e-06, + "loss": 0.0192, + "step": 14050 + }, + { + "epoch": 8.0, + "learning_rate": 6.564810126582279e-06, + "loss": 0.027, + "step": 14075 + }, + { + "epoch": 8.0, + "learning_rate": 6.558481012658229e-06, + "loss": 0.03, + "step": 14100 + }, + { + "epoch": 8.0, + "learning_rate": 6.552151898734178e-06, + "loss": 0.0375, + "step": 14125 + }, + { + "epoch": 8.0, + "learning_rate": 6.5458227848101274e-06, + "loss": 0.0338, + "step": 14150 + }, + { + "epoch": 8.0, + "learning_rate": 6.539493670886077e-06, + "loss": 0.0305, + "step": 14175 + }, + { + "epoch": 8.0, + "learning_rate": 6.533164556962026e-06, + "loss": 0.0266, + "step": 14200 + }, + { + "epoch": 8.01, + "learning_rate": 6.526835443037976e-06, + "loss": 0.0378, + "step": 14225 + }, + { + "epoch": 8.01, + "learning_rate": 6.520506329113925e-06, + "loss": 0.0256, + "step": 14250 + }, + { + "epoch": 8.01, + "learning_rate": 6.514177215189874e-06, + "loss": 0.0203, + "step": 14275 + }, + { + "epoch": 8.01, + "learning_rate": 6.507848101265823e-06, + "loss": 0.0212, + "step": 14300 + }, + { + "epoch": 8.01, + "learning_rate": 6.501518987341773e-06, + "loss": 0.0204, + "step": 14325 + }, + { + "epoch": 8.01, + "learning_rate": 6.495189873417722e-06, + "loss": 0.0259, + "step": 14350 + }, + { + "epoch": 8.01, + "learning_rate": 6.4888607594936714e-06, + "loss": 0.0214, + "step": 14375 + }, + { + "epoch": 8.01, + "learning_rate": 6.482531645569621e-06, + "loss": 0.021, + "step": 14400 + }, + { + "epoch": 8.01, + "learning_rate": 6.47620253164557e-06, + "loss": 0.0241, + "step": 14425 + }, + { + "epoch": 8.01, + "learning_rate": 6.46987341772152e-06, + "loss": 0.0216, + "step": 14450 + }, + { + "epoch": 8.01, + "learning_rate": 6.463544303797469e-06, + "loss": 0.0209, + "step": 14475 + }, + { + "epoch": 8.01, + "learning_rate": 6.457215189873418e-06, + "loss": 0.0292, + "step": 14500 + }, + { + "epoch": 8.01, + "learning_rate": 6.450886075949367e-06, + "loss": 0.0286, + "step": 14525 + }, + { + "epoch": 8.01, + "learning_rate": 6.444556962025317e-06, + "loss": 0.0218, + "step": 14550 + }, + { + "epoch": 8.01, + "learning_rate": 6.438227848101266e-06, + "loss": 0.0209, + "step": 14575 + }, + { + "epoch": 8.01, + "learning_rate": 6.4318987341772154e-06, + "loss": 0.0244, + "step": 14600 + }, + { + "epoch": 8.02, + "learning_rate": 6.425569620253165e-06, + "loss": 0.0212, + "step": 14625 + }, + { + "epoch": 8.02, + "learning_rate": 6.419240506329114e-06, + "loss": 0.0155, + "step": 14650 + }, + { + "epoch": 8.02, + "learning_rate": 6.412911392405064e-06, + "loss": 0.0209, + "step": 14675 + }, + { + "epoch": 8.02, + "learning_rate": 6.406582278481013e-06, + "loss": 0.0203, + "step": 14700 + }, + { + "epoch": 8.02, + "learning_rate": 6.400253164556962e-06, + "loss": 0.0192, + "step": 14725 + }, + { + "epoch": 8.02, + "learning_rate": 6.393924050632911e-06, + "loss": 0.0216, + "step": 14750 + }, + { + "epoch": 8.02, + "learning_rate": 6.387594936708861e-06, + "loss": 0.012, + "step": 14775 + }, + { + "epoch": 8.02, + "learning_rate": 6.38126582278481e-06, + "loss": 0.0241, + "step": 14800 + }, + { + "epoch": 8.02, + "learning_rate": 6.37493670886076e-06, + "loss": 0.0192, + "step": 14825 + }, + { + "epoch": 8.02, + "learning_rate": 6.36860759493671e-06, + "loss": 0.0202, + "step": 14850 + }, + { + "epoch": 8.02, + "learning_rate": 6.362278481012659e-06, + "loss": 0.0257, + "step": 14875 + }, + { + "epoch": 8.02, + "learning_rate": 6.3559493670886086e-06, + "loss": 0.0218, + "step": 14900 + }, + { + "epoch": 8.02, + "learning_rate": 6.349620253164558e-06, + "loss": 0.0232, + "step": 14925 + }, + { + "epoch": 8.02, + "learning_rate": 6.3432911392405074e-06, + "loss": 0.03, + "step": 14950 + }, + { + "epoch": 8.02, + "learning_rate": 6.337215189873418e-06, + "loss": 0.0252, + "step": 14975 + }, + { + "epoch": 8.02, + "learning_rate": 6.3308860759493675e-06, + "loss": 0.0176, + "step": 15000 + }, + { + "epoch": 8.02, + "eval_loss": 0.2596448063850403, + "eval_runtime": 1417.6481, + "eval_samples_per_second": 7.344, + "eval_steps_per_second": 0.459, + "eval_wer": 28.52241499861786, + "step": 15000 + }, + { + "epoch": 8.03, + "learning_rate": 6.324556962025317e-06, + "loss": 0.0166, + "step": 15025 + }, + { + "epoch": 8.03, + "learning_rate": 6.318227848101266e-06, + "loss": 0.017, + "step": 15050 + }, + { + "epoch": 8.03, + "learning_rate": 6.311898734177216e-06, + "loss": 0.0291, + "step": 15075 + }, + { + "epoch": 8.03, + "learning_rate": 6.305569620253164e-06, + "loss": 0.0149, + "step": 15100 + }, + { + "epoch": 8.03, + "learning_rate": 6.299240506329114e-06, + "loss": 0.0223, + "step": 15125 + }, + { + "epoch": 8.03, + "learning_rate": 6.292911392405063e-06, + "loss": 0.0212, + "step": 15150 + }, + { + "epoch": 8.03, + "learning_rate": 6.286582278481013e-06, + "loss": 0.0125, + "step": 15175 + }, + { + "epoch": 8.03, + "learning_rate": 6.280253164556962e-06, + "loss": 0.0169, + "step": 15200 + }, + { + "epoch": 8.03, + "learning_rate": 6.273924050632912e-06, + "loss": 0.0244, + "step": 15225 + }, + { + "epoch": 8.03, + "learning_rate": 6.267594936708862e-06, + "loss": 0.015, + "step": 15250 + }, + { + "epoch": 8.03, + "learning_rate": 6.261265822784811e-06, + "loss": 0.0171, + "step": 15275 + }, + { + "epoch": 8.03, + "learning_rate": 6.254936708860761e-06, + "loss": 0.017, + "step": 15300 + }, + { + "epoch": 8.03, + "learning_rate": 6.24860759493671e-06, + "loss": 0.0162, + "step": 15325 + }, + { + "epoch": 8.03, + "learning_rate": 6.2422784810126594e-06, + "loss": 0.0133, + "step": 15350 + }, + { + "epoch": 8.03, + "learning_rate": 6.235949367088608e-06, + "loss": 0.0139, + "step": 15375 + }, + { + "epoch": 8.03, + "learning_rate": 6.2296202531645575e-06, + "loss": 0.0177, + "step": 15400 + }, + { + "epoch": 8.04, + "learning_rate": 6.223291139240507e-06, + "loss": 0.0207, + "step": 15425 + }, + { + "epoch": 8.04, + "learning_rate": 6.216962025316456e-06, + "loss": 0.0192, + "step": 15450 + }, + { + "epoch": 8.04, + "learning_rate": 6.210632911392406e-06, + "loss": 0.0176, + "step": 15475 + }, + { + "epoch": 8.04, + "learning_rate": 6.204303797468355e-06, + "loss": 0.0133, + "step": 15500 + }, + { + "epoch": 8.04, + "learning_rate": 6.197974683544305e-06, + "loss": 0.0199, + "step": 15525 + }, + { + "epoch": 8.04, + "learning_rate": 6.191645569620254e-06, + "loss": 0.0135, + "step": 15550 + }, + { + "epoch": 8.04, + "learning_rate": 6.1853164556962035e-06, + "loss": 0.0121, + "step": 15575 + }, + { + "epoch": 8.04, + "learning_rate": 6.178987341772152e-06, + "loss": 0.0152, + "step": 15600 + }, + { + "epoch": 8.04, + "learning_rate": 6.1726582278481015e-06, + "loss": 0.0201, + "step": 15625 + }, + { + "epoch": 8.04, + "learning_rate": 6.166329113924051e-06, + "loss": 0.0226, + "step": 15650 + }, + { + "epoch": 8.04, + "learning_rate": 6.16e-06, + "loss": 0.0222, + "step": 15675 + }, + { + "epoch": 8.04, + "learning_rate": 6.15367088607595e-06, + "loss": 0.0184, + "step": 15700 + }, + { + "epoch": 8.04, + "learning_rate": 6.147341772151899e-06, + "loss": 0.0219, + "step": 15725 + }, + { + "epoch": 8.04, + "learning_rate": 6.141012658227849e-06, + "loss": 0.0219, + "step": 15750 + }, + { + "epoch": 9.0, + "learning_rate": 6.134683544303798e-06, + "loss": 0.0208, + "step": 15775 + }, + { + "epoch": 9.0, + "learning_rate": 6.1283544303797475e-06, + "loss": 0.0143, + "step": 15800 + }, + { + "epoch": 9.0, + "learning_rate": 6.122025316455696e-06, + "loss": 0.0254, + "step": 15825 + }, + { + "epoch": 9.0, + "learning_rate": 6.1156962025316455e-06, + "loss": 0.0287, + "step": 15850 + }, + { + "epoch": 9.0, + "learning_rate": 6.109367088607595e-06, + "loss": 0.0252, + "step": 15875 + }, + { + "epoch": 9.0, + "learning_rate": 6.103037974683544e-06, + "loss": 0.0275, + "step": 15900 + }, + { + "epoch": 9.0, + "learning_rate": 6.096708860759494e-06, + "loss": 0.0193, + "step": 15925 + }, + { + "epoch": 9.0, + "learning_rate": 6.090379746835443e-06, + "loss": 0.0255, + "step": 15950 + }, + { + "epoch": 9.01, + "learning_rate": 6.084050632911393e-06, + "loss": 0.0248, + "step": 15975 + }, + { + "epoch": 9.01, + "learning_rate": 6.077721518987342e-06, + "loss": 0.0328, + "step": 16000 + }, + { + "epoch": 9.01, + "eval_loss": 0.2512108087539673, + "eval_runtime": 1474.2883, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.442, + "eval_wer": 35.66236849379665, + "step": 16000 + }, + { + "epoch": 9.01, + "learning_rate": 6.0713924050632915e-06, + "loss": 0.0181, + "step": 16025 + }, + { + "epoch": 9.01, + "learning_rate": 6.065063291139242e-06, + "loss": 0.0134, + "step": 16050 + }, + { + "epoch": 9.01, + "learning_rate": 6.058734177215191e-06, + "loss": 0.0203, + "step": 16075 + }, + { + "epoch": 9.01, + "learning_rate": 6.052405063291141e-06, + "loss": 0.0224, + "step": 16100 + }, + { + "epoch": 9.01, + "learning_rate": 6.046075949367089e-06, + "loss": 0.0164, + "step": 16125 + }, + { + "epoch": 9.01, + "learning_rate": 6.039746835443039e-06, + "loss": 0.019, + "step": 16150 + }, + { + "epoch": 9.01, + "learning_rate": 6.033417721518988e-06, + "loss": 0.0152, + "step": 16175 + }, + { + "epoch": 9.01, + "learning_rate": 6.0270886075949374e-06, + "loss": 0.0197, + "step": 16200 + }, + { + "epoch": 9.01, + "learning_rate": 6.020759493670887e-06, + "loss": 0.0188, + "step": 16225 + }, + { + "epoch": 9.01, + "learning_rate": 6.014430379746836e-06, + "loss": 0.0245, + "step": 16250 + }, + { + "epoch": 9.01, + "learning_rate": 6.008101265822786e-06, + "loss": 0.0202, + "step": 16275 + }, + { + "epoch": 9.01, + "learning_rate": 6.001772151898735e-06, + "loss": 0.0184, + "step": 16300 + }, + { + "epoch": 9.01, + "learning_rate": 5.995443037974685e-06, + "loss": 0.0158, + "step": 16325 + }, + { + "epoch": 9.01, + "learning_rate": 5.989113924050633e-06, + "loss": 0.0175, + "step": 16350 + }, + { + "epoch": 9.02, + "learning_rate": 5.982784810126583e-06, + "loss": 0.014, + "step": 16375 + }, + { + "epoch": 9.02, + "learning_rate": 5.976455696202532e-06, + "loss": 0.0149, + "step": 16400 + }, + { + "epoch": 9.02, + "learning_rate": 5.9701265822784815e-06, + "loss": 0.0188, + "step": 16425 + }, + { + "epoch": 9.02, + "learning_rate": 5.963797468354431e-06, + "loss": 0.0215, + "step": 16450 + }, + { + "epoch": 9.02, + "learning_rate": 5.95746835443038e-06, + "loss": 0.017, + "step": 16475 + }, + { + "epoch": 9.02, + "learning_rate": 5.95113924050633e-06, + "loss": 0.0095, + "step": 16500 + }, + { + "epoch": 9.02, + "learning_rate": 5.944810126582279e-06, + "loss": 0.0112, + "step": 16525 + }, + { + "epoch": 9.02, + "learning_rate": 5.938481012658229e-06, + "loss": 0.0198, + "step": 16550 + }, + { + "epoch": 9.02, + "learning_rate": 5.932151898734177e-06, + "loss": 0.0252, + "step": 16575 + }, + { + "epoch": 9.02, + "learning_rate": 5.925822784810127e-06, + "loss": 0.0226, + "step": 16600 + }, + { + "epoch": 9.02, + "learning_rate": 5.919493670886076e-06, + "loss": 0.0167, + "step": 16625 + }, + { + "epoch": 9.02, + "learning_rate": 5.9131645569620255e-06, + "loss": 0.0259, + "step": 16650 + }, + { + "epoch": 9.02, + "learning_rate": 5.906835443037975e-06, + "loss": 0.0229, + "step": 16675 + }, + { + "epoch": 9.02, + "learning_rate": 5.900506329113924e-06, + "loss": 0.0238, + "step": 16700 + }, + { + "epoch": 9.02, + "learning_rate": 5.894177215189874e-06, + "loss": 0.02, + "step": 16725 + }, + { + "epoch": 9.02, + "learning_rate": 5.887848101265823e-06, + "loss": 0.0182, + "step": 16750 + }, + { + "epoch": 9.03, + "learning_rate": 5.881518987341773e-06, + "loss": 0.0157, + "step": 16775 + }, + { + "epoch": 9.03, + "learning_rate": 5.875189873417721e-06, + "loss": 0.0145, + "step": 16800 + }, + { + "epoch": 9.03, + "learning_rate": 5.868860759493671e-06, + "loss": 0.0166, + "step": 16825 + }, + { + "epoch": 9.03, + "learning_rate": 5.86253164556962e-06, + "loss": 0.0108, + "step": 16850 + }, + { + "epoch": 9.03, + "learning_rate": 5.85620253164557e-06, + "loss": 0.014, + "step": 16875 + }, + { + "epoch": 9.03, + "learning_rate": 5.84987341772152e-06, + "loss": 0.0167, + "step": 16900 + }, + { + "epoch": 9.03, + "learning_rate": 5.843544303797469e-06, + "loss": 0.0121, + "step": 16925 + }, + { + "epoch": 9.03, + "learning_rate": 5.837215189873419e-06, + "loss": 0.0149, + "step": 16950 + }, + { + "epoch": 9.03, + "learning_rate": 5.831139240506329e-06, + "loss": 0.0143, + "step": 16975 + }, + { + "epoch": 9.03, + "learning_rate": 5.824810126582279e-06, + "loss": 0.0139, + "step": 17000 + }, + { + "epoch": 9.03, + "eval_loss": 0.2688957154750824, + "eval_runtime": 1423.1196, + "eval_samples_per_second": 7.316, + "eval_steps_per_second": 0.457, + "eval_wer": 25.920746678807788, + "step": 17000 + }, + { + "epoch": 9.03, + "learning_rate": 5.818481012658228e-06, + "loss": 0.0169, + "step": 17025 + }, + { + "epoch": 9.03, + "learning_rate": 5.8121518987341775e-06, + "loss": 0.013, + "step": 17050 + }, + { + "epoch": 9.03, + "learning_rate": 5.805822784810127e-06, + "loss": 0.0123, + "step": 17075 + }, + { + "epoch": 9.03, + "learning_rate": 5.799493670886076e-06, + "loss": 0.0161, + "step": 17100 + }, + { + "epoch": 9.03, + "learning_rate": 5.793164556962026e-06, + "loss": 0.0114, + "step": 17125 + }, + { + "epoch": 9.03, + "learning_rate": 5.786835443037974e-06, + "loss": 0.0194, + "step": 17150 + }, + { + "epoch": 9.04, + "learning_rate": 5.780506329113924e-06, + "loss": 0.0179, + "step": 17175 + }, + { + "epoch": 9.04, + "learning_rate": 5.774177215189873e-06, + "loss": 0.0165, + "step": 17200 + }, + { + "epoch": 9.04, + "learning_rate": 5.767848101265823e-06, + "loss": 0.0165, + "step": 17225 + }, + { + "epoch": 9.04, + "learning_rate": 5.761518987341773e-06, + "loss": 0.0125, + "step": 17250 + }, + { + "epoch": 9.04, + "learning_rate": 5.755189873417722e-06, + "loss": 0.0172, + "step": 17275 + }, + { + "epoch": 9.04, + "learning_rate": 5.748860759493672e-06, + "loss": 0.0158, + "step": 17300 + }, + { + "epoch": 9.04, + "learning_rate": 5.742531645569621e-06, + "loss": 0.0174, + "step": 17325 + }, + { + "epoch": 9.04, + "learning_rate": 5.736202531645571e-06, + "loss": 0.0162, + "step": 17350 + }, + { + "epoch": 9.04, + "learning_rate": 5.72987341772152e-06, + "loss": 0.0203, + "step": 17375 + }, + { + "epoch": 9.04, + "learning_rate": 5.7235443037974695e-06, + "loss": 0.0157, + "step": 17400 + }, + { + "epoch": 9.04, + "learning_rate": 5.717215189873419e-06, + "loss": 0.018, + "step": 17425 + }, + { + "epoch": 9.04, + "learning_rate": 5.7108860759493675e-06, + "loss": 0.0199, + "step": 17450 + }, + { + "epoch": 9.04, + "learning_rate": 5.704556962025317e-06, + "loss": 0.0153, + "step": 17475 + }, + { + "epoch": 9.04, + "learning_rate": 5.698227848101266e-06, + "loss": 0.0234, + "step": 17500 + }, + { + "epoch": 10.0, + "learning_rate": 5.691898734177216e-06, + "loss": 0.0168, + "step": 17525 + }, + { + "epoch": 10.0, + "learning_rate": 5.685569620253165e-06, + "loss": 0.0214, + "step": 17550 + }, + { + "epoch": 10.0, + "learning_rate": 5.679240506329115e-06, + "loss": 0.0185, + "step": 17575 + }, + { + "epoch": 10.0, + "learning_rate": 5.672911392405064e-06, + "loss": 0.0228, + "step": 17600 + }, + { + "epoch": 10.0, + "learning_rate": 5.6665822784810135e-06, + "loss": 0.024, + "step": 17625 + }, + { + "epoch": 10.0, + "learning_rate": 5.660253164556963e-06, + "loss": 0.0235, + "step": 17650 + }, + { + "epoch": 10.0, + "learning_rate": 5.6539240506329115e-06, + "loss": 0.0262, + "step": 17675 + }, + { + "epoch": 10.0, + "learning_rate": 5.647594936708861e-06, + "loss": 0.0214, + "step": 17700 + }, + { + "epoch": 10.01, + "learning_rate": 5.64126582278481e-06, + "loss": 0.0279, + "step": 17725 + }, + { + "epoch": 10.01, + "learning_rate": 5.63493670886076e-06, + "loss": 0.0248, + "step": 17750 + }, + { + "epoch": 10.01, + "learning_rate": 5.628607594936709e-06, + "loss": 0.0155, + "step": 17775 + }, + { + "epoch": 10.01, + "learning_rate": 5.622278481012659e-06, + "loss": 0.017, + "step": 17800 + }, + { + "epoch": 10.01, + "learning_rate": 5.615949367088608e-06, + "loss": 0.0205, + "step": 17825 + }, + { + "epoch": 10.01, + "learning_rate": 5.6096202531645575e-06, + "loss": 0.021, + "step": 17850 + }, + { + "epoch": 10.01, + "learning_rate": 5.603291139240507e-06, + "loss": 0.0178, + "step": 17875 + }, + { + "epoch": 10.01, + "learning_rate": 5.5969620253164555e-06, + "loss": 0.0169, + "step": 17900 + }, + { + "epoch": 10.01, + "learning_rate": 5.590632911392405e-06, + "loss": 0.0158, + "step": 17925 + }, + { + "epoch": 10.01, + "learning_rate": 5.584303797468354e-06, + "loss": 0.0195, + "step": 17950 + }, + { + "epoch": 10.01, + "learning_rate": 5.577974683544304e-06, + "loss": 0.0195, + "step": 17975 + }, + { + "epoch": 10.01, + "learning_rate": 5.571645569620253e-06, + "loss": 0.0187, + "step": 18000 + }, + { + "epoch": 10.01, + "eval_loss": 0.2605174481868744, + "eval_runtime": 1553.7854, + "eval_samples_per_second": 6.7, + "eval_steps_per_second": 0.419, + "eval_wer": 25.330493178750874, + "step": 18000 + }, + { + "epoch": 10.01, + "learning_rate": 5.565316455696203e-06, + "loss": 0.0214, + "step": 18025 + }, + { + "epoch": 10.01, + "learning_rate": 5.558987341772152e-06, + "loss": 0.0193, + "step": 18050 + }, + { + "epoch": 10.01, + "learning_rate": 5.552658227848102e-06, + "loss": 0.0168, + "step": 18075 + }, + { + "epoch": 10.01, + "learning_rate": 5.546329113924052e-06, + "loss": 0.0147, + "step": 18100 + }, + { + "epoch": 10.02, + "learning_rate": 5.540000000000001e-06, + "loss": 0.0192, + "step": 18125 + }, + { + "epoch": 10.02, + "learning_rate": 5.533670886075951e-06, + "loss": 0.014, + "step": 18150 + }, + { + "epoch": 10.02, + "learning_rate": 5.527341772151899e-06, + "loss": 0.0163, + "step": 18175 + }, + { + "epoch": 10.02, + "learning_rate": 5.521012658227849e-06, + "loss": 0.0139, + "step": 18200 + }, + { + "epoch": 10.02, + "learning_rate": 5.514683544303798e-06, + "loss": 0.0168, + "step": 18225 + }, + { + "epoch": 10.02, + "learning_rate": 5.5083544303797475e-06, + "loss": 0.0118, + "step": 18250 + }, + { + "epoch": 10.02, + "learning_rate": 5.502025316455697e-06, + "loss": 0.0119, + "step": 18275 + }, + { + "epoch": 10.02, + "learning_rate": 5.495696202531646e-06, + "loss": 0.0136, + "step": 18300 + }, + { + "epoch": 10.02, + "learning_rate": 5.489367088607596e-06, + "loss": 0.0132, + "step": 18325 + }, + { + "epoch": 10.02, + "learning_rate": 5.483037974683545e-06, + "loss": 0.0148, + "step": 18350 + }, + { + "epoch": 10.02, + "learning_rate": 5.476708860759495e-06, + "loss": 0.015, + "step": 18375 + }, + { + "epoch": 10.02, + "learning_rate": 5.470379746835443e-06, + "loss": 0.0154, + "step": 18400 + }, + { + "epoch": 10.02, + "learning_rate": 5.464050632911393e-06, + "loss": 0.0139, + "step": 18425 + }, + { + "epoch": 10.02, + "learning_rate": 5.457721518987342e-06, + "loss": 0.0239, + "step": 18450 + }, + { + "epoch": 10.02, + "learning_rate": 5.4513924050632915e-06, + "loss": 0.0203, + "step": 18475 + }, + { + "epoch": 10.02, + "learning_rate": 5.445063291139241e-06, + "loss": 0.0199, + "step": 18500 + }, + { + "epoch": 10.03, + "learning_rate": 5.43873417721519e-06, + "loss": 0.013, + "step": 18525 + }, + { + "epoch": 10.03, + "learning_rate": 5.43240506329114e-06, + "loss": 0.0128, + "step": 18550 + }, + { + "epoch": 10.03, + "learning_rate": 5.426075949367089e-06, + "loss": 0.0161, + "step": 18575 + }, + { + "epoch": 10.03, + "learning_rate": 5.419746835443039e-06, + "loss": 0.0129, + "step": 18600 + }, + { + "epoch": 10.03, + "learning_rate": 5.413417721518987e-06, + "loss": 0.0124, + "step": 18625 + }, + { + "epoch": 10.03, + "learning_rate": 5.407088607594937e-06, + "loss": 0.0161, + "step": 18650 + }, + { + "epoch": 10.03, + "learning_rate": 5.400759493670886e-06, + "loss": 0.0104, + "step": 18675 + }, + { + "epoch": 10.03, + "learning_rate": 5.3944303797468355e-06, + "loss": 0.0109, + "step": 18700 + }, + { + "epoch": 10.03, + "learning_rate": 5.388101265822785e-06, + "loss": 0.0128, + "step": 18725 + }, + { + "epoch": 10.03, + "learning_rate": 5.381772151898734e-06, + "loss": 0.014, + "step": 18750 + }, + { + "epoch": 10.03, + "learning_rate": 5.375443037974684e-06, + "loss": 0.0159, + "step": 18775 + }, + { + "epoch": 10.03, + "learning_rate": 5.369113924050633e-06, + "loss": 0.017, + "step": 18800 + }, + { + "epoch": 10.03, + "learning_rate": 5.362784810126583e-06, + "loss": 0.012, + "step": 18825 + }, + { + "epoch": 10.03, + "learning_rate": 5.356455696202531e-06, + "loss": 0.0165, + "step": 18850 + }, + { + "epoch": 10.03, + "learning_rate": 5.350126582278481e-06, + "loss": 0.0153, + "step": 18875 + }, + { + "epoch": 10.03, + "learning_rate": 5.343797468354432e-06, + "loss": 0.0182, + "step": 18900 + }, + { + "epoch": 10.04, + "learning_rate": 5.33746835443038e-06, + "loss": 0.0194, + "step": 18925 + }, + { + "epoch": 10.04, + "learning_rate": 5.33113924050633e-06, + "loss": 0.0144, + "step": 18950 + }, + { + "epoch": 10.04, + "learning_rate": 5.324810126582279e-06, + "loss": 0.0157, + "step": 18975 + }, + { + "epoch": 10.04, + "learning_rate": 5.318481012658229e-06, + "loss": 0.0111, + "step": 19000 + }, + { + "epoch": 10.04, + "eval_loss": 0.2696678638458252, + "eval_runtime": 1463.5986, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.445, + "eval_wer": 24.79877721588969, + "step": 19000 + }, + { + "epoch": 10.04, + "learning_rate": 5.312151898734178e-06, + "loss": 0.0176, + "step": 19025 + }, + { + "epoch": 10.04, + "learning_rate": 5.3058227848101275e-06, + "loss": 0.0144, + "step": 19050 + }, + { + "epoch": 10.04, + "learning_rate": 5.299493670886077e-06, + "loss": 0.0129, + "step": 19075 + }, + { + "epoch": 10.04, + "learning_rate": 5.293164556962026e-06, + "loss": 0.0162, + "step": 19100 + }, + { + "epoch": 10.04, + "learning_rate": 5.286835443037976e-06, + "loss": 0.0128, + "step": 19125 + }, + { + "epoch": 10.04, + "learning_rate": 5.280506329113924e-06, + "loss": 0.0152, + "step": 19150 + }, + { + "epoch": 10.04, + "learning_rate": 5.274177215189874e-06, + "loss": 0.0183, + "step": 19175 + }, + { + "epoch": 10.04, + "learning_rate": 5.267848101265823e-06, + "loss": 0.0198, + "step": 19200 + }, + { + "epoch": 10.04, + "learning_rate": 5.261518987341773e-06, + "loss": 0.0179, + "step": 19225 + }, + { + "epoch": 10.04, + "learning_rate": 5.255443037974683e-06, + "loss": 0.019, + "step": 19250 + }, + { + "epoch": 11.0, + "learning_rate": 5.249113924050633e-06, + "loss": 0.0159, + "step": 19275 + }, + { + "epoch": 11.0, + "learning_rate": 5.242784810126583e-06, + "loss": 0.0156, + "step": 19300 + }, + { + "epoch": 11.0, + "learning_rate": 5.236455696202532e-06, + "loss": 0.015, + "step": 19325 + }, + { + "epoch": 11.0, + "learning_rate": 5.230126582278482e-06, + "loss": 0.0162, + "step": 19350 + }, + { + "epoch": 11.0, + "learning_rate": 5.223797468354431e-06, + "loss": 0.0262, + "step": 19375 + }, + { + "epoch": 11.0, + "learning_rate": 5.217468354430381e-06, + "loss": 0.0207, + "step": 19400 + }, + { + "epoch": 11.0, + "learning_rate": 5.21113924050633e-06, + "loss": 0.0222, + "step": 19425 + }, + { + "epoch": 11.0, + "learning_rate": 5.2048101265822795e-06, + "loss": 0.016, + "step": 19450 + }, + { + "epoch": 11.01, + "learning_rate": 5.198481012658229e-06, + "loss": 0.0176, + "step": 19475 + }, + { + "epoch": 11.01, + "learning_rate": 5.192151898734178e-06, + "loss": 0.0209, + "step": 19500 + }, + { + "epoch": 11.01, + "learning_rate": 5.185822784810127e-06, + "loss": 0.0212, + "step": 19525 + }, + { + "epoch": 11.01, + "learning_rate": 5.179493670886076e-06, + "loss": 0.0143, + "step": 19550 + }, + { + "epoch": 11.01, + "learning_rate": 5.173164556962026e-06, + "loss": 0.0224, + "step": 19575 + }, + { + "epoch": 11.01, + "learning_rate": 5.166835443037975e-06, + "loss": 0.0171, + "step": 19600 + }, + { + "epoch": 11.01, + "learning_rate": 5.160506329113925e-06, + "loss": 0.0166, + "step": 19625 + }, + { + "epoch": 11.01, + "learning_rate": 5.154177215189874e-06, + "loss": 0.0135, + "step": 19650 + }, + { + "epoch": 11.01, + "learning_rate": 5.1478481012658235e-06, + "loss": 0.0164, + "step": 19675 + }, + { + "epoch": 11.01, + "learning_rate": 5.141518987341773e-06, + "loss": 0.0154, + "step": 19700 + }, + { + "epoch": 11.01, + "learning_rate": 5.135189873417722e-06, + "loss": 0.0183, + "step": 19725 + }, + { + "epoch": 11.01, + "learning_rate": 5.128860759493671e-06, + "loss": 0.0149, + "step": 19750 + }, + { + "epoch": 11.01, + "learning_rate": 5.12253164556962e-06, + "loss": 0.0176, + "step": 19775 + }, + { + "epoch": 11.01, + "learning_rate": 5.11620253164557e-06, + "loss": 0.0135, + "step": 19800 + }, + { + "epoch": 11.01, + "learning_rate": 5.109873417721519e-06, + "loss": 0.0121, + "step": 19825 + }, + { + "epoch": 11.01, + "learning_rate": 5.103544303797469e-06, + "loss": 0.0158, + "step": 19850 + }, + { + "epoch": 11.02, + "learning_rate": 5.097215189873418e-06, + "loss": 0.0191, + "step": 19875 + }, + { + "epoch": 11.02, + "learning_rate": 5.0908860759493675e-06, + "loss": 0.0127, + "step": 19900 + }, + { + "epoch": 11.02, + "learning_rate": 5.084556962025317e-06, + "loss": 0.0149, + "step": 19925 + }, + { + "epoch": 11.02, + "learning_rate": 5.0782278481012655e-06, + "loss": 0.0152, + "step": 19950 + }, + { + "epoch": 11.02, + "learning_rate": 5.071898734177215e-06, + "loss": 0.0139, + "step": 19975 + }, + { + "epoch": 11.02, + "learning_rate": 5.065569620253164e-06, + "loss": 0.015, + "step": 20000 + }, + { + "epoch": 11.02, + "eval_loss": 0.2623814344406128, + "eval_runtime": 1462.1621, + "eval_samples_per_second": 7.12, + "eval_steps_per_second": 0.445, + "eval_wer": 24.392266540919362, + "step": 20000 + }, + { + "epoch": 11.02, + "learning_rate": 5.059240506329114e-06, + "loss": 0.0094, + "step": 20025 + }, + { + "epoch": 11.02, + "learning_rate": 5.052911392405063e-06, + "loss": 0.0108, + "step": 20050 + }, + { + "epoch": 11.02, + "learning_rate": 5.046582278481013e-06, + "loss": 0.0155, + "step": 20075 + }, + { + "epoch": 11.02, + "learning_rate": 5.040253164556962e-06, + "loss": 0.0103, + "step": 20100 + }, + { + "epoch": 11.02, + "learning_rate": 5.033924050632912e-06, + "loss": 0.0109, + "step": 20125 + }, + { + "epoch": 11.02, + "learning_rate": 5.027594936708862e-06, + "loss": 0.0188, + "step": 20150 + }, + { + "epoch": 11.02, + "learning_rate": 5.021265822784811e-06, + "loss": 0.0178, + "step": 20175 + }, + { + "epoch": 11.02, + "learning_rate": 5.014936708860761e-06, + "loss": 0.0262, + "step": 20200 + }, + { + "epoch": 11.02, + "learning_rate": 5.00860759493671e-06, + "loss": 0.0194, + "step": 20225 + }, + { + "epoch": 11.02, + "learning_rate": 5.002278481012659e-06, + "loss": 0.0107, + "step": 20250 + }, + { + "epoch": 11.03, + "learning_rate": 4.995949367088608e-06, + "loss": 0.0147, + "step": 20275 + }, + { + "epoch": 11.03, + "learning_rate": 4.9896202531645575e-06, + "loss": 0.0084, + "step": 20300 + }, + { + "epoch": 11.03, + "learning_rate": 4.983291139240507e-06, + "loss": 0.0214, + "step": 20325 + }, + { + "epoch": 11.03, + "learning_rate": 4.976962025316456e-06, + "loss": 0.014, + "step": 20350 + }, + { + "epoch": 11.03, + "learning_rate": 4.970632911392406e-06, + "loss": 0.0129, + "step": 20375 + }, + { + "epoch": 11.03, + "learning_rate": 4.964303797468355e-06, + "loss": 0.0145, + "step": 20400 + }, + { + "epoch": 11.03, + "learning_rate": 4.957974683544305e-06, + "loss": 0.0109, + "step": 20425 + }, + { + "epoch": 11.03, + "learning_rate": 4.951645569620254e-06, + "loss": 0.0117, + "step": 20450 + }, + { + "epoch": 11.03, + "learning_rate": 4.945316455696203e-06, + "loss": 0.0137, + "step": 20475 + }, + { + "epoch": 11.03, + "learning_rate": 4.938987341772152e-06, + "loss": 0.0086, + "step": 20500 + }, + { + "epoch": 11.03, + "learning_rate": 4.9326582278481015e-06, + "loss": 0.0128, + "step": 20525 + }, + { + "epoch": 11.03, + "learning_rate": 4.926329113924051e-06, + "loss": 0.0101, + "step": 20550 + }, + { + "epoch": 11.03, + "learning_rate": 4.92e-06, + "loss": 0.0132, + "step": 20575 + }, + { + "epoch": 11.03, + "learning_rate": 4.91367088607595e-06, + "loss": 0.0147, + "step": 20600 + }, + { + "epoch": 11.03, + "learning_rate": 4.907341772151899e-06, + "loss": 0.0077, + "step": 20625 + }, + { + "epoch": 11.03, + "learning_rate": 4.901012658227849e-06, + "loss": 0.0125, + "step": 20650 + }, + { + "epoch": 11.04, + "learning_rate": 4.894683544303798e-06, + "loss": 0.0115, + "step": 20675 + }, + { + "epoch": 11.04, + "learning_rate": 4.888354430379747e-06, + "loss": 0.0136, + "step": 20700 + }, + { + "epoch": 11.04, + "learning_rate": 4.882025316455696e-06, + "loss": 0.0098, + "step": 20725 + }, + { + "epoch": 11.04, + "learning_rate": 4.875696202531646e-06, + "loss": 0.0154, + "step": 20750 + }, + { + "epoch": 11.04, + "learning_rate": 4.869367088607596e-06, + "loss": 0.0126, + "step": 20775 + }, + { + "epoch": 11.04, + "learning_rate": 4.863037974683545e-06, + "loss": 0.0116, + "step": 20800 + }, + { + "epoch": 11.04, + "learning_rate": 4.856708860759495e-06, + "loss": 0.0133, + "step": 20825 + }, + { + "epoch": 11.04, + "learning_rate": 4.850379746835443e-06, + "loss": 0.0106, + "step": 20850 + }, + { + "epoch": 11.04, + "learning_rate": 4.844050632911393e-06, + "loss": 0.0151, + "step": 20875 + }, + { + "epoch": 11.04, + "learning_rate": 4.837721518987342e-06, + "loss": 0.0148, + "step": 20900 + }, + { + "epoch": 11.04, + "learning_rate": 4.8313924050632915e-06, + "loss": 0.0173, + "step": 20925 + }, + { + "epoch": 11.04, + "learning_rate": 4.825063291139241e-06, + "loss": 0.0126, + "step": 20950 + }, + { + "epoch": 11.04, + "learning_rate": 4.81873417721519e-06, + "loss": 0.0163, + "step": 20975 + }, + { + "epoch": 11.04, + "learning_rate": 4.81240506329114e-06, + "loss": 0.0203, + "step": 21000 + }, + { + "epoch": 11.04, + "eval_loss": 0.27788785099983215, + "eval_runtime": 1546.852, + "eval_samples_per_second": 6.73, + "eval_steps_per_second": 0.421, + "eval_wer": 25.284963983154196, + "step": 21000 + }, + { + "epoch": 12.0, + "learning_rate": 4.806075949367089e-06, + "loss": 0.0176, + "step": 21025 + }, + { + "epoch": 12.0, + "learning_rate": 4.799746835443039e-06, + "loss": 0.0157, + "step": 21050 + }, + { + "epoch": 12.0, + "learning_rate": 4.793417721518987e-06, + "loss": 0.0192, + "step": 21075 + }, + { + "epoch": 12.0, + "learning_rate": 4.787088607594937e-06, + "loss": 0.0124, + "step": 21100 + }, + { + "epoch": 12.0, + "learning_rate": 4.780759493670886e-06, + "loss": 0.0194, + "step": 21125 + }, + { + "epoch": 12.0, + "learning_rate": 4.774430379746836e-06, + "loss": 0.0138, + "step": 21150 + }, + { + "epoch": 12.0, + "learning_rate": 4.768101265822786e-06, + "loss": 0.0191, + "step": 21175 + }, + { + "epoch": 12.0, + "learning_rate": 4.761772151898735e-06, + "loss": 0.0124, + "step": 21200 + }, + { + "epoch": 12.01, + "learning_rate": 4.755443037974684e-06, + "loss": 0.0175, + "step": 21225 + }, + { + "epoch": 12.01, + "learning_rate": 4.749113924050633e-06, + "loss": 0.0118, + "step": 21250 + }, + { + "epoch": 12.01, + "learning_rate": 4.742784810126583e-06, + "loss": 0.0172, + "step": 21275 + }, + { + "epoch": 12.01, + "learning_rate": 4.736455696202532e-06, + "loss": 0.0136, + "step": 21300 + }, + { + "epoch": 12.01, + "learning_rate": 4.7301265822784815e-06, + "loss": 0.018, + "step": 21325 + }, + { + "epoch": 12.01, + "learning_rate": 4.723797468354431e-06, + "loss": 0.0184, + "step": 21350 + }, + { + "epoch": 12.01, + "learning_rate": 4.71746835443038e-06, + "loss": 0.0166, + "step": 21375 + }, + { + "epoch": 12.01, + "learning_rate": 4.71113924050633e-06, + "loss": 0.0107, + "step": 21400 + }, + { + "epoch": 12.01, + "learning_rate": 4.704810126582279e-06, + "loss": 0.0125, + "step": 21425 + }, + { + "epoch": 12.01, + "learning_rate": 4.698481012658228e-06, + "loss": 0.0098, + "step": 21450 + }, + { + "epoch": 12.01, + "learning_rate": 4.692151898734177e-06, + "loss": 0.0103, + "step": 21475 + }, + { + "epoch": 12.01, + "learning_rate": 4.685822784810127e-06, + "loss": 0.0211, + "step": 21500 + }, + { + "epoch": 12.01, + "learning_rate": 4.679493670886076e-06, + "loss": 0.0125, + "step": 21525 + }, + { + "epoch": 12.01, + "learning_rate": 4.6731645569620255e-06, + "loss": 0.0083, + "step": 21550 + }, + { + "epoch": 12.01, + "learning_rate": 4.666835443037975e-06, + "loss": 0.0156, + "step": 21575 + }, + { + "epoch": 12.01, + "learning_rate": 4.660506329113924e-06, + "loss": 0.0137, + "step": 21600 + }, + { + "epoch": 12.02, + "learning_rate": 4.654177215189874e-06, + "loss": 0.0137, + "step": 21625 + }, + { + "epoch": 12.02, + "learning_rate": 4.647848101265823e-06, + "loss": 0.0092, + "step": 21650 + }, + { + "epoch": 12.02, + "learning_rate": 4.641518987341773e-06, + "loss": 0.0101, + "step": 21675 + }, + { + "epoch": 12.02, + "learning_rate": 4.635189873417722e-06, + "loss": 0.0118, + "step": 21700 + }, + { + "epoch": 12.02, + "learning_rate": 4.6288607594936715e-06, + "loss": 0.0119, + "step": 21725 + }, + { + "epoch": 12.02, + "learning_rate": 4.622531645569621e-06, + "loss": 0.009, + "step": 21750 + }, + { + "epoch": 12.02, + "learning_rate": 4.61620253164557e-06, + "loss": 0.014, + "step": 21775 + }, + { + "epoch": 12.02, + "learning_rate": 4.609873417721519e-06, + "loss": 0.0108, + "step": 21800 + }, + { + "epoch": 12.02, + "learning_rate": 4.603544303797468e-06, + "loss": 0.0117, + "step": 21825 + }, + { + "epoch": 12.02, + "learning_rate": 4.597215189873418e-06, + "loss": 0.0147, + "step": 21850 + }, + { + "epoch": 12.02, + "learning_rate": 4.590886075949367e-06, + "loss": 0.0133, + "step": 21875 + }, + { + "epoch": 12.02, + "learning_rate": 4.584556962025317e-06, + "loss": 0.0131, + "step": 21900 + }, + { + "epoch": 12.02, + "learning_rate": 4.578227848101266e-06, + "loss": 0.0156, + "step": 21925 + }, + { + "epoch": 12.02, + "learning_rate": 4.5718987341772155e-06, + "loss": 0.0211, + "step": 21950 + }, + { + "epoch": 12.02, + "learning_rate": 4.565569620253165e-06, + "loss": 0.0127, + "step": 21975 + }, + { + "epoch": 12.02, + "learning_rate": 4.559240506329114e-06, + "loss": 0.011, + "step": 22000 + }, + { + "epoch": 12.02, + "eval_loss": 0.26152506470680237, + "eval_runtime": 1497.9019, + "eval_samples_per_second": 6.95, + "eval_steps_per_second": 0.435, + "eval_wer": 26.236198962584755, + "step": 22000 + }, + { + "epoch": 12.03, + "learning_rate": 4.552911392405064e-06, + "loss": 0.0164, + "step": 22025 + }, + { + "epoch": 12.03, + "learning_rate": 4.546582278481013e-06, + "loss": 0.01, + "step": 22050 + }, + { + "epoch": 12.03, + "learning_rate": 4.540253164556963e-06, + "loss": 0.0127, + "step": 22075 + }, + { + "epoch": 12.03, + "learning_rate": 4.533924050632912e-06, + "loss": 0.0089, + "step": 22100 + }, + { + "epoch": 12.03, + "learning_rate": 4.5275949367088615e-06, + "loss": 0.0077, + "step": 22125 + }, + { + "epoch": 12.03, + "learning_rate": 4.521265822784811e-06, + "loss": 0.0098, + "step": 22150 + }, + { + "epoch": 12.03, + "learning_rate": 4.5149367088607595e-06, + "loss": 0.0084, + "step": 22175 + }, + { + "epoch": 12.03, + "learning_rate": 4.508607594936709e-06, + "loss": 0.0096, + "step": 22200 + }, + { + "epoch": 12.03, + "learning_rate": 4.502278481012658e-06, + "loss": 0.0097, + "step": 22225 + }, + { + "epoch": 12.03, + "learning_rate": 4.495949367088608e-06, + "loss": 0.0114, + "step": 22250 + }, + { + "epoch": 12.03, + "learning_rate": 4.489620253164557e-06, + "loss": 0.0109, + "step": 22275 + }, + { + "epoch": 12.03, + "learning_rate": 4.483291139240507e-06, + "loss": 0.0096, + "step": 22300 + }, + { + "epoch": 12.03, + "learning_rate": 4.476962025316456e-06, + "loss": 0.0117, + "step": 22325 + }, + { + "epoch": 12.03, + "learning_rate": 4.4706329113924055e-06, + "loss": 0.0105, + "step": 22350 + }, + { + "epoch": 12.03, + "learning_rate": 4.464303797468355e-06, + "loss": 0.0103, + "step": 22375 + }, + { + "epoch": 12.03, + "learning_rate": 4.457974683544304e-06, + "loss": 0.0085, + "step": 22400 + }, + { + "epoch": 12.04, + "learning_rate": 4.451645569620254e-06, + "loss": 0.0129, + "step": 22425 + }, + { + "epoch": 12.04, + "learning_rate": 4.445316455696203e-06, + "loss": 0.0188, + "step": 22450 + }, + { + "epoch": 12.04, + "learning_rate": 4.438987341772153e-06, + "loss": 0.013, + "step": 22475 + }, + { + "epoch": 12.04, + "learning_rate": 4.432658227848102e-06, + "loss": 0.0095, + "step": 22500 + }, + { + "epoch": 12.04, + "learning_rate": 4.4263291139240515e-06, + "loss": 0.0111, + "step": 22525 + }, + { + "epoch": 12.04, + "learning_rate": 4.42e-06, + "loss": 0.0115, + "step": 22550 + }, + { + "epoch": 12.04, + "learning_rate": 4.4136708860759495e-06, + "loss": 0.0096, + "step": 22575 + }, + { + "epoch": 12.04, + "learning_rate": 4.407341772151899e-06, + "loss": 0.0108, + "step": 22600 + }, + { + "epoch": 12.04, + "learning_rate": 4.401012658227848e-06, + "loss": 0.0118, + "step": 22625 + }, + { + "epoch": 12.04, + "learning_rate": 4.394683544303798e-06, + "loss": 0.0136, + "step": 22650 + }, + { + "epoch": 12.04, + "learning_rate": 4.388354430379747e-06, + "loss": 0.0176, + "step": 22675 + }, + { + "epoch": 12.04, + "learning_rate": 4.382025316455697e-06, + "loss": 0.011, + "step": 22700 + }, + { + "epoch": 12.04, + "learning_rate": 4.375696202531646e-06, + "loss": 0.0125, + "step": 22725 + }, + { + "epoch": 12.04, + "learning_rate": 4.3693670886075955e-06, + "loss": 0.0164, + "step": 22750 + }, + { + "epoch": 13.0, + "learning_rate": 4.363037974683544e-06, + "loss": 0.0192, + "step": 22775 + }, + { + "epoch": 13.0, + "learning_rate": 4.356708860759494e-06, + "loss": 0.0145, + "step": 22800 + }, + { + "epoch": 13.0, + "learning_rate": 4.350379746835444e-06, + "loss": 0.0174, + "step": 22825 + }, + { + "epoch": 13.0, + "learning_rate": 4.344050632911393e-06, + "loss": 0.0142, + "step": 22850 + }, + { + "epoch": 13.0, + "learning_rate": 4.337721518987343e-06, + "loss": 0.0228, + "step": 22875 + }, + { + "epoch": 13.0, + "learning_rate": 4.331392405063292e-06, + "loss": 0.0201, + "step": 22900 + }, + { + "epoch": 13.0, + "learning_rate": 4.325063291139241e-06, + "loss": 0.0188, + "step": 22925 + }, + { + "epoch": 13.0, + "learning_rate": 4.31873417721519e-06, + "loss": 0.0185, + "step": 22950 + }, + { + "epoch": 13.01, + "learning_rate": 4.3124050632911395e-06, + "loss": 0.0186, + "step": 22975 + }, + { + "epoch": 13.01, + "learning_rate": 4.306075949367089e-06, + "loss": 0.0191, + "step": 23000 + }, + { + "epoch": 13.01, + "eval_loss": 0.25467297434806824, + "eval_runtime": 1486.1182, + "eval_samples_per_second": 7.005, + "eval_steps_per_second": 0.438, + "eval_wer": 23.982503780549276, + "step": 23000 + }, + { + "epoch": 13.01, + "learning_rate": 4.299746835443038e-06, + "loss": 0.0145, + "step": 23025 + }, + { + "epoch": 13.01, + "learning_rate": 4.293417721518988e-06, + "loss": 0.0106, + "step": 23050 + }, + { + "epoch": 13.01, + "learning_rate": 4.287088607594937e-06, + "loss": 0.0135, + "step": 23075 + }, + { + "epoch": 13.01, + "learning_rate": 4.280759493670887e-06, + "loss": 0.0173, + "step": 23100 + }, + { + "epoch": 13.01, + "learning_rate": 4.274430379746836e-06, + "loss": 0.0126, + "step": 23125 + }, + { + "epoch": 13.01, + "learning_rate": 4.268101265822785e-06, + "loss": 0.0166, + "step": 23150 + }, + { + "epoch": 13.01, + "learning_rate": 4.261772151898734e-06, + "loss": 0.0097, + "step": 23175 + }, + { + "epoch": 13.01, + "learning_rate": 4.255443037974684e-06, + "loss": 0.0094, + "step": 23200 + }, + { + "epoch": 13.01, + "learning_rate": 4.249113924050634e-06, + "loss": 0.012, + "step": 23225 + }, + { + "epoch": 13.01, + "learning_rate": 4.242784810126583e-06, + "loss": 0.0153, + "step": 23250 + }, + { + "epoch": 13.01, + "learning_rate": 4.236455696202532e-06, + "loss": 0.0169, + "step": 23275 + }, + { + "epoch": 13.01, + "learning_rate": 4.230379746835443e-06, + "loss": 0.0097, + "step": 23300 + }, + { + "epoch": 13.01, + "learning_rate": 4.224050632911393e-06, + "loss": 0.0098, + "step": 23325 + }, + { + "epoch": 13.01, + "learning_rate": 4.217721518987342e-06, + "loss": 0.0129, + "step": 23350 + }, + { + "epoch": 13.02, + "learning_rate": 4.2113924050632915e-06, + "loss": 0.0157, + "step": 23375 + }, + { + "epoch": 13.02, + "learning_rate": 4.205063291139241e-06, + "loss": 0.0088, + "step": 23400 + }, + { + "epoch": 13.02, + "learning_rate": 4.19873417721519e-06, + "loss": 0.0105, + "step": 23425 + }, + { + "epoch": 13.02, + "learning_rate": 4.19240506329114e-06, + "loss": 0.0131, + "step": 23450 + }, + { + "epoch": 13.02, + "learning_rate": 4.186075949367089e-06, + "loss": 0.0133, + "step": 23475 + }, + { + "epoch": 13.02, + "learning_rate": 4.179746835443038e-06, + "loss": 0.0054, + "step": 23500 + }, + { + "epoch": 13.02, + "learning_rate": 4.173417721518987e-06, + "loss": 0.01, + "step": 23525 + }, + { + "epoch": 13.02, + "learning_rate": 4.167088607594937e-06, + "loss": 0.013, + "step": 23550 + }, + { + "epoch": 13.02, + "learning_rate": 4.160759493670886e-06, + "loss": 0.0141, + "step": 23575 + }, + { + "epoch": 13.02, + "learning_rate": 4.154430379746836e-06, + "loss": 0.0126, + "step": 23600 + }, + { + "epoch": 13.02, + "learning_rate": 4.148101265822786e-06, + "loss": 0.015, + "step": 23625 + }, + { + "epoch": 13.02, + "learning_rate": 4.141772151898734e-06, + "loss": 0.0164, + "step": 23650 + }, + { + "epoch": 13.02, + "learning_rate": 4.135443037974684e-06, + "loss": 0.0135, + "step": 23675 + }, + { + "epoch": 13.02, + "learning_rate": 4.129113924050633e-06, + "loss": 0.0151, + "step": 23700 + }, + { + "epoch": 13.02, + "learning_rate": 4.122784810126583e-06, + "loss": 0.0157, + "step": 23725 + }, + { + "epoch": 13.02, + "learning_rate": 4.116455696202532e-06, + "loss": 0.0087, + "step": 23750 + }, + { + "epoch": 13.03, + "learning_rate": 4.1101265822784815e-06, + "loss": 0.0097, + "step": 23775 + }, + { + "epoch": 13.03, + "learning_rate": 4.103797468354431e-06, + "loss": 0.0141, + "step": 23800 + }, + { + "epoch": 13.03, + "learning_rate": 4.09746835443038e-06, + "loss": 0.0079, + "step": 23825 + }, + { + "epoch": 13.03, + "learning_rate": 4.09113924050633e-06, + "loss": 0.0116, + "step": 23850 + }, + { + "epoch": 13.03, + "learning_rate": 4.084810126582278e-06, + "loss": 0.0093, + "step": 23875 + }, + { + "epoch": 13.03, + "learning_rate": 4.078481012658228e-06, + "loss": 0.0149, + "step": 23900 + }, + { + "epoch": 13.03, + "learning_rate": 4.072151898734177e-06, + "loss": 0.0125, + "step": 23925 + }, + { + "epoch": 13.03, + "learning_rate": 4.065822784810127e-06, + "loss": 0.0106, + "step": 23950 + }, + { + "epoch": 13.03, + "learning_rate": 4.059493670886076e-06, + "loss": 0.0124, + "step": 23975 + }, + { + "epoch": 13.03, + "learning_rate": 4.0531645569620255e-06, + "loss": 0.0074, + "step": 24000 + }, + { + "epoch": 13.03, + "eval_loss": 0.27036768198013306, + "eval_runtime": 1458.9195, + "eval_samples_per_second": 7.136, + "eval_steps_per_second": 0.446, + "eval_wer": 24.153238264036816, + "step": 24000 + }, + { + "epoch": 13.03, + "learning_rate": 4.046835443037975e-06, + "loss": 0.0128, + "step": 24025 + }, + { + "epoch": 13.03, + "learning_rate": 4.040506329113924e-06, + "loss": 0.012, + "step": 24050 + }, + { + "epoch": 13.03, + "learning_rate": 4.034177215189874e-06, + "loss": 0.0098, + "step": 24075 + }, + { + "epoch": 13.03, + "learning_rate": 4.027848101265823e-06, + "loss": 0.0091, + "step": 24100 + }, + { + "epoch": 13.03, + "learning_rate": 4.021518987341773e-06, + "loss": 0.0077, + "step": 24125 + }, + { + "epoch": 13.03, + "learning_rate": 4.015189873417722e-06, + "loss": 0.0135, + "step": 24150 + }, + { + "epoch": 13.04, + "learning_rate": 4.0088607594936715e-06, + "loss": 0.0124, + "step": 24175 + }, + { + "epoch": 13.04, + "learning_rate": 4.002531645569621e-06, + "loss": 0.0152, + "step": 24200 + }, + { + "epoch": 13.04, + "learning_rate": 3.99620253164557e-06, + "loss": 0.0127, + "step": 24225 + }, + { + "epoch": 13.04, + "learning_rate": 3.989873417721519e-06, + "loss": 0.0066, + "step": 24250 + }, + { + "epoch": 13.04, + "learning_rate": 3.983544303797468e-06, + "loss": 0.0163, + "step": 24275 + }, + { + "epoch": 13.04, + "learning_rate": 3.977215189873418e-06, + "loss": 0.0095, + "step": 24300 + }, + { + "epoch": 13.04, + "learning_rate": 3.970886075949367e-06, + "loss": 0.0103, + "step": 24325 + }, + { + "epoch": 13.04, + "learning_rate": 3.964556962025317e-06, + "loss": 0.0112, + "step": 24350 + }, + { + "epoch": 13.04, + "learning_rate": 3.958227848101266e-06, + "loss": 0.01, + "step": 24375 + }, + { + "epoch": 13.04, + "learning_rate": 3.9518987341772155e-06, + "loss": 0.0131, + "step": 24400 + }, + { + "epoch": 13.04, + "learning_rate": 3.945569620253165e-06, + "loss": 0.0143, + "step": 24425 + }, + { + "epoch": 13.04, + "learning_rate": 3.939240506329114e-06, + "loss": 0.0151, + "step": 24450 + }, + { + "epoch": 13.04, + "learning_rate": 3.932911392405064e-06, + "loss": 0.0129, + "step": 24475 + }, + { + "epoch": 13.04, + "learning_rate": 3.926582278481013e-06, + "loss": 0.0125, + "step": 24500 + }, + { + "epoch": 14.0, + "learning_rate": 3.920506329113925e-06, + "loss": 0.0165, + "step": 24525 + }, + { + "epoch": 14.0, + "learning_rate": 3.914177215189874e-06, + "loss": 0.0114, + "step": 24550 + }, + { + "epoch": 14.0, + "learning_rate": 3.9078481012658235e-06, + "loss": 0.0111, + "step": 24575 + }, + { + "epoch": 14.0, + "learning_rate": 3.901518987341772e-06, + "loss": 0.0134, + "step": 24600 + }, + { + "epoch": 14.0, + "learning_rate": 3.8951898734177215e-06, + "loss": 0.0138, + "step": 24625 + }, + { + "epoch": 14.0, + "learning_rate": 3.888860759493671e-06, + "loss": 0.0151, + "step": 24650 + }, + { + "epoch": 14.0, + "learning_rate": 3.88253164556962e-06, + "loss": 0.0101, + "step": 24675 + }, + { + "epoch": 14.0, + "learning_rate": 3.87620253164557e-06, + "loss": 0.0108, + "step": 24700 + }, + { + "epoch": 14.01, + "learning_rate": 3.869873417721519e-06, + "loss": 0.0151, + "step": 24725 + }, + { + "epoch": 14.01, + "learning_rate": 3.863544303797469e-06, + "loss": 0.0132, + "step": 24750 + }, + { + "epoch": 14.01, + "learning_rate": 3.857215189873418e-06, + "loss": 0.012, + "step": 24775 + }, + { + "epoch": 14.01, + "learning_rate": 3.8508860759493675e-06, + "loss": 0.017, + "step": 24800 + }, + { + "epoch": 14.01, + "learning_rate": 3.844556962025317e-06, + "loss": 0.0141, + "step": 24825 + }, + { + "epoch": 14.01, + "learning_rate": 3.838227848101266e-06, + "loss": 0.0166, + "step": 24850 + }, + { + "epoch": 14.01, + "learning_rate": 3.831898734177216e-06, + "loss": 0.0112, + "step": 24875 + }, + { + "epoch": 14.01, + "learning_rate": 3.825569620253165e-06, + "loss": 0.0097, + "step": 24900 + }, + { + "epoch": 14.01, + "learning_rate": 3.819240506329115e-06, + "loss": 0.0138, + "step": 24925 + }, + { + "epoch": 14.01, + "learning_rate": 3.8129113924050636e-06, + "loss": 0.0069, + "step": 24950 + }, + { + "epoch": 14.01, + "learning_rate": 3.806582278481013e-06, + "loss": 0.0111, + "step": 24975 + }, + { + "epoch": 14.01, + "learning_rate": 3.8002531645569625e-06, + "loss": 0.0149, + "step": 25000 + }, + { + "epoch": 14.01, + "eval_loss": 0.2640444040298462, + "eval_runtime": 1478.4977, + "eval_samples_per_second": 7.042, + "eval_steps_per_second": 0.44, + "eval_wer": 24.063805915543345, + "step": 25000 + }, + { + "epoch": 14.01, + "learning_rate": 3.7939240506329115e-06, + "loss": 0.0176, + "step": 25025 + }, + { + "epoch": 14.01, + "learning_rate": 3.787594936708861e-06, + "loss": 0.0104, + "step": 25050 + }, + { + "epoch": 14.01, + "learning_rate": 3.7812658227848103e-06, + "loss": 0.0084, + "step": 25075 + }, + { + "epoch": 14.01, + "learning_rate": 3.7749367088607598e-06, + "loss": 0.0118, + "step": 25100 + }, + { + "epoch": 14.02, + "learning_rate": 3.768607594936709e-06, + "loss": 0.0099, + "step": 25125 + }, + { + "epoch": 14.02, + "learning_rate": 3.762278481012658e-06, + "loss": 0.0089, + "step": 25150 + }, + { + "epoch": 14.02, + "learning_rate": 3.7559493670886076e-06, + "loss": 0.0119, + "step": 25175 + }, + { + "epoch": 14.02, + "learning_rate": 3.749620253164557e-06, + "loss": 0.0118, + "step": 25200 + }, + { + "epoch": 14.02, + "learning_rate": 3.743291139240507e-06, + "loss": 0.0108, + "step": 25225 + }, + { + "epoch": 14.02, + "learning_rate": 3.7369620253164563e-06, + "loss": 0.0101, + "step": 25250 + }, + { + "epoch": 14.02, + "learning_rate": 3.7306329113924058e-06, + "loss": 0.0095, + "step": 25275 + }, + { + "epoch": 14.02, + "learning_rate": 3.7243037974683548e-06, + "loss": 0.0106, + "step": 25300 + }, + { + "epoch": 14.02, + "learning_rate": 3.717974683544304e-06, + "loss": 0.0135, + "step": 25325 + }, + { + "epoch": 14.02, + "learning_rate": 3.7116455696202536e-06, + "loss": 0.0115, + "step": 25350 + }, + { + "epoch": 14.02, + "learning_rate": 3.705316455696203e-06, + "loss": 0.0114, + "step": 25375 + }, + { + "epoch": 14.02, + "learning_rate": 3.698987341772152e-06, + "loss": 0.0119, + "step": 25400 + }, + { + "epoch": 14.02, + "learning_rate": 3.6926582278481015e-06, + "loss": 0.0123, + "step": 25425 + }, + { + "epoch": 14.02, + "learning_rate": 3.686329113924051e-06, + "loss": 0.0138, + "step": 25450 + }, + { + "epoch": 14.02, + "learning_rate": 3.6800000000000003e-06, + "loss": 0.0133, + "step": 25475 + }, + { + "epoch": 14.02, + "learning_rate": 3.6736708860759498e-06, + "loss": 0.0106, + "step": 25500 + }, + { + "epoch": 14.03, + "learning_rate": 3.6673417721518988e-06, + "loss": 0.0099, + "step": 25525 + }, + { + "epoch": 14.03, + "learning_rate": 3.661012658227848e-06, + "loss": 0.0104, + "step": 25550 + }, + { + "epoch": 14.03, + "learning_rate": 3.6546835443037976e-06, + "loss": 0.0143, + "step": 25575 + }, + { + "epoch": 14.03, + "learning_rate": 3.648354430379747e-06, + "loss": 0.0096, + "step": 25600 + }, + { + "epoch": 14.03, + "learning_rate": 3.642025316455696e-06, + "loss": 0.0094, + "step": 25625 + }, + { + "epoch": 14.03, + "learning_rate": 3.6356962025316463e-06, + "loss": 0.0162, + "step": 25650 + }, + { + "epoch": 14.03, + "learning_rate": 3.6293670886075953e-06, + "loss": 0.008, + "step": 25675 + }, + { + "epoch": 14.03, + "learning_rate": 3.6230379746835448e-06, + "loss": 0.0094, + "step": 25700 + }, + { + "epoch": 14.03, + "learning_rate": 3.616708860759494e-06, + "loss": 0.0122, + "step": 25725 + }, + { + "epoch": 14.03, + "learning_rate": 3.6103797468354436e-06, + "loss": 0.0128, + "step": 25750 + }, + { + "epoch": 14.03, + "learning_rate": 3.6040506329113926e-06, + "loss": 0.0117, + "step": 25775 + }, + { + "epoch": 14.03, + "learning_rate": 3.597721518987342e-06, + "loss": 0.0067, + "step": 25800 + }, + { + "epoch": 14.03, + "learning_rate": 3.5913924050632915e-06, + "loss": 0.0078, + "step": 25825 + }, + { + "epoch": 14.03, + "learning_rate": 3.585063291139241e-06, + "loss": 0.0071, + "step": 25850 + }, + { + "epoch": 14.03, + "learning_rate": 3.57873417721519e-06, + "loss": 0.0093, + "step": 25875 + }, + { + "epoch": 14.03, + "learning_rate": 3.5724050632911393e-06, + "loss": 0.0064, + "step": 25900 + }, + { + "epoch": 14.04, + "learning_rate": 3.5660759493670888e-06, + "loss": 0.0067, + "step": 25925 + }, + { + "epoch": 14.04, + "learning_rate": 3.559746835443038e-06, + "loss": 0.0076, + "step": 25950 + }, + { + "epoch": 14.04, + "learning_rate": 3.5534177215189876e-06, + "loss": 0.0099, + "step": 25975 + }, + { + "epoch": 14.04, + "learning_rate": 3.5470886075949366e-06, + "loss": 0.0111, + "step": 26000 + }, + { + "epoch": 14.04, + "eval_loss": 0.2734878659248352, + "eval_runtime": 1456.7487, + "eval_samples_per_second": 7.147, + "eval_steps_per_second": 0.447, + "eval_wer": 24.359745686921737, + "step": 26000 + }, + { + "epoch": 14.04, + "learning_rate": 3.540759493670886e-06, + "loss": 0.0134, + "step": 26025 + }, + { + "epoch": 14.04, + "learning_rate": 3.534430379746836e-06, + "loss": 0.008, + "step": 26050 + }, + { + "epoch": 14.04, + "learning_rate": 3.5281012658227853e-06, + "loss": 0.0138, + "step": 26075 + }, + { + "epoch": 14.04, + "learning_rate": 3.5217721518987348e-06, + "loss": 0.0075, + "step": 26100 + }, + { + "epoch": 14.04, + "learning_rate": 3.515443037974684e-06, + "loss": 0.0097, + "step": 26125 + }, + { + "epoch": 14.04, + "learning_rate": 3.509113924050633e-06, + "loss": 0.0139, + "step": 26150 + }, + { + "epoch": 14.04, + "learning_rate": 3.5027848101265826e-06, + "loss": 0.0099, + "step": 26175 + }, + { + "epoch": 14.04, + "learning_rate": 3.496455696202532e-06, + "loss": 0.0148, + "step": 26200 + }, + { + "epoch": 14.04, + "learning_rate": 3.4901265822784815e-06, + "loss": 0.0119, + "step": 26225 + }, + { + "epoch": 14.04, + "learning_rate": 3.4837974683544305e-06, + "loss": 0.0086, + "step": 26250 + }, + { + "epoch": 15.0, + "learning_rate": 3.47746835443038e-06, + "loss": 0.0091, + "step": 26275 + }, + { + "epoch": 15.0, + "learning_rate": 3.4711392405063293e-06, + "loss": 0.0105, + "step": 26300 + }, + { + "epoch": 15.0, + "learning_rate": 3.465063291139241e-06, + "loss": 0.0138, + "step": 26325 + }, + { + "epoch": 15.0, + "learning_rate": 3.45873417721519e-06, + "loss": 0.0185, + "step": 26350 + }, + { + "epoch": 15.0, + "learning_rate": 3.4524050632911392e-06, + "loss": 0.015, + "step": 26375 + }, + { + "epoch": 15.0, + "learning_rate": 3.4460759493670886e-06, + "loss": 0.0177, + "step": 26400 + }, + { + "epoch": 15.0, + "learning_rate": 3.439746835443038e-06, + "loss": 0.014, + "step": 26425 + }, + { + "epoch": 15.0, + "learning_rate": 3.433417721518988e-06, + "loss": 0.0127, + "step": 26450 + }, + { + "epoch": 15.01, + "learning_rate": 3.4270886075949374e-06, + "loss": 0.0135, + "step": 26475 + }, + { + "epoch": 15.01, + "learning_rate": 3.4207594936708864e-06, + "loss": 0.0171, + "step": 26500 + }, + { + "epoch": 15.01, + "learning_rate": 3.4144303797468358e-06, + "loss": 0.0114, + "step": 26525 + }, + { + "epoch": 15.01, + "learning_rate": 3.4081012658227852e-06, + "loss": 0.0099, + "step": 26550 + }, + { + "epoch": 15.01, + "learning_rate": 3.4017721518987346e-06, + "loss": 0.0126, + "step": 26575 + }, + { + "epoch": 15.01, + "learning_rate": 3.395443037974684e-06, + "loss": 0.0116, + "step": 26600 + }, + { + "epoch": 15.01, + "learning_rate": 3.389113924050633e-06, + "loss": 0.0087, + "step": 26625 + }, + { + "epoch": 15.01, + "learning_rate": 3.3827848101265825e-06, + "loss": 0.0073, + "step": 26650 + }, + { + "epoch": 15.01, + "learning_rate": 3.376455696202532e-06, + "loss": 0.0102, + "step": 26675 + }, + { + "epoch": 15.01, + "learning_rate": 3.3701265822784814e-06, + "loss": 0.0106, + "step": 26700 + }, + { + "epoch": 15.01, + "learning_rate": 3.3637974683544304e-06, + "loss": 0.0094, + "step": 26725 + }, + { + "epoch": 15.01, + "learning_rate": 3.35746835443038e-06, + "loss": 0.0113, + "step": 26750 + }, + { + "epoch": 15.01, + "learning_rate": 3.3511392405063292e-06, + "loss": 0.0169, + "step": 26775 + }, + { + "epoch": 15.01, + "learning_rate": 3.3448101265822786e-06, + "loss": 0.0084, + "step": 26800 + }, + { + "epoch": 15.01, + "learning_rate": 3.338481012658228e-06, + "loss": 0.0153, + "step": 26825 + }, + { + "epoch": 15.01, + "learning_rate": 3.332151898734177e-06, + "loss": 0.012, + "step": 26850 + }, + { + "epoch": 15.02, + "learning_rate": 3.325822784810127e-06, + "loss": 0.0106, + "step": 26875 + }, + { + "epoch": 15.02, + "learning_rate": 3.3194936708860764e-06, + "loss": 0.0099, + "step": 26900 + }, + { + "epoch": 15.02, + "learning_rate": 3.3131645569620258e-06, + "loss": 0.008, + "step": 26925 + }, + { + "epoch": 15.02, + "learning_rate": 3.306835443037975e-06, + "loss": 0.0076, + "step": 26950 + }, + { + "epoch": 15.02, + "learning_rate": 3.3005063291139246e-06, + "loss": 0.0088, + "step": 26975 + }, + { + "epoch": 15.02, + "learning_rate": 3.2941772151898736e-06, + "loss": 0.009, + "step": 27000 + }, + { + "epoch": 15.02, + "eval_loss": 0.2693029046058655, + "eval_runtime": 1479.1374, + "eval_samples_per_second": 7.039, + "eval_steps_per_second": 0.44, + "eval_wer": 24.28819980812696, + "step": 27000 + }, + { + "epoch": 15.02, + "learning_rate": 3.287848101265823e-06, + "loss": 0.0115, + "step": 27025 + }, + { + "epoch": 15.02, + "learning_rate": 3.2815189873417725e-06, + "loss": 0.0099, + "step": 27050 + }, + { + "epoch": 15.02, + "learning_rate": 3.275189873417722e-06, + "loss": 0.0073, + "step": 27075 + }, + { + "epoch": 15.02, + "learning_rate": 3.268860759493671e-06, + "loss": 0.0061, + "step": 27100 + }, + { + "epoch": 15.02, + "learning_rate": 3.2625316455696204e-06, + "loss": 0.007, + "step": 27125 + }, + { + "epoch": 15.02, + "learning_rate": 3.2562025316455698e-06, + "loss": 0.0109, + "step": 27150 + }, + { + "epoch": 15.02, + "learning_rate": 3.2498734177215192e-06, + "loss": 0.0123, + "step": 27175 + }, + { + "epoch": 15.02, + "learning_rate": 3.2435443037974686e-06, + "loss": 0.0121, + "step": 27200 + }, + { + "epoch": 15.02, + "learning_rate": 3.2372151898734176e-06, + "loss": 0.0129, + "step": 27225 + }, + { + "epoch": 15.02, + "learning_rate": 3.230886075949367e-06, + "loss": 0.0128, + "step": 27250 + }, + { + "epoch": 15.03, + "learning_rate": 3.224556962025317e-06, + "loss": 0.0094, + "step": 27275 + }, + { + "epoch": 15.03, + "learning_rate": 3.2182278481012664e-06, + "loss": 0.0081, + "step": 27300 + }, + { + "epoch": 15.03, + "learning_rate": 3.2118987341772158e-06, + "loss": 0.0087, + "step": 27325 + }, + { + "epoch": 15.03, + "learning_rate": 3.2055696202531648e-06, + "loss": 0.0131, + "step": 27350 + }, + { + "epoch": 15.03, + "learning_rate": 3.199240506329114e-06, + "loss": 0.0073, + "step": 27375 + }, + { + "epoch": 15.03, + "learning_rate": 3.1929113924050636e-06, + "loss": 0.0086, + "step": 27400 + }, + { + "epoch": 15.03, + "learning_rate": 3.186582278481013e-06, + "loss": 0.0108, + "step": 27425 + }, + { + "epoch": 15.03, + "learning_rate": 3.1802531645569625e-06, + "loss": 0.01, + "step": 27450 + }, + { + "epoch": 15.03, + "learning_rate": 3.1739240506329115e-06, + "loss": 0.0123, + "step": 27475 + }, + { + "epoch": 15.03, + "learning_rate": 3.167594936708861e-06, + "loss": 0.0081, + "step": 27500 + }, + { + "epoch": 15.03, + "learning_rate": 3.1612658227848104e-06, + "loss": 0.0123, + "step": 27525 + }, + { + "epoch": 15.03, + "learning_rate": 3.1549367088607598e-06, + "loss": 0.0096, + "step": 27550 + }, + { + "epoch": 15.03, + "learning_rate": 3.1486075949367088e-06, + "loss": 0.0054, + "step": 27575 + }, + { + "epoch": 15.03, + "learning_rate": 3.1422784810126582e-06, + "loss": 0.006, + "step": 27600 + }, + { + "epoch": 15.03, + "learning_rate": 3.1359493670886076e-06, + "loss": 0.0089, + "step": 27625 + }, + { + "epoch": 15.03, + "learning_rate": 3.129620253164557e-06, + "loss": 0.0109, + "step": 27650 + }, + { + "epoch": 15.04, + "learning_rate": 3.1232911392405065e-06, + "loss": 0.0089, + "step": 27675 + }, + { + "epoch": 15.04, + "learning_rate": 3.1169620253164563e-06, + "loss": 0.0094, + "step": 27700 + }, + { + "epoch": 15.04, + "learning_rate": 3.1106329113924054e-06, + "loss": 0.0102, + "step": 27725 + }, + { + "epoch": 15.04, + "learning_rate": 3.1043037974683548e-06, + "loss": 0.0097, + "step": 27750 + }, + { + "epoch": 15.04, + "learning_rate": 3.097974683544304e-06, + "loss": 0.0078, + "step": 27775 + }, + { + "epoch": 15.04, + "learning_rate": 3.0916455696202536e-06, + "loss": 0.0073, + "step": 27800 + }, + { + "epoch": 15.04, + "learning_rate": 3.085316455696203e-06, + "loss": 0.0105, + "step": 27825 + }, + { + "epoch": 15.04, + "learning_rate": 3.078987341772152e-06, + "loss": 0.0083, + "step": 27850 + }, + { + "epoch": 15.04, + "learning_rate": 3.0726582278481015e-06, + "loss": 0.0138, + "step": 27875 + }, + { + "epoch": 15.04, + "learning_rate": 3.066329113924051e-06, + "loss": 0.0049, + "step": 27900 + }, + { + "epoch": 15.04, + "learning_rate": 3.0600000000000003e-06, + "loss": 0.0107, + "step": 27925 + }, + { + "epoch": 15.04, + "learning_rate": 3.0536708860759494e-06, + "loss": 0.0105, + "step": 27950 + }, + { + "epoch": 15.04, + "learning_rate": 3.0473417721518988e-06, + "loss": 0.0134, + "step": 27975 + }, + { + "epoch": 15.04, + "learning_rate": 3.041012658227848e-06, + "loss": 0.0137, + "step": 28000 + }, + { + "epoch": 15.04, + "eval_loss": 0.2780563235282898, + "eval_runtime": 1465.9542, + "eval_samples_per_second": 7.102, + "eval_steps_per_second": 0.444, + "eval_wer": 25.010162766874256, + "step": 28000 + }, + { + "epoch": 16.0, + "learning_rate": 3.0346835443037976e-06, + "loss": 0.009, + "step": 28025 + }, + { + "epoch": 16.0, + "learning_rate": 3.028354430379747e-06, + "loss": 0.0081, + "step": 28050 + }, + { + "epoch": 16.0, + "learning_rate": 3.022025316455696e-06, + "loss": 0.0109, + "step": 28075 + }, + { + "epoch": 16.0, + "learning_rate": 3.015696202531646e-06, + "loss": 0.0132, + "step": 28100 + }, + { + "epoch": 16.0, + "learning_rate": 3.0093670886075953e-06, + "loss": 0.0142, + "step": 28125 + }, + { + "epoch": 16.0, + "learning_rate": 3.0030379746835448e-06, + "loss": 0.0152, + "step": 28150 + }, + { + "epoch": 16.0, + "learning_rate": 2.996708860759494e-06, + "loss": 0.0074, + "step": 28175 + }, + { + "epoch": 16.0, + "learning_rate": 2.990379746835443e-06, + "loss": 0.0129, + "step": 28200 + }, + { + "epoch": 16.01, + "learning_rate": 2.9840506329113926e-06, + "loss": 0.0118, + "step": 28225 + }, + { + "epoch": 16.01, + "learning_rate": 2.977721518987342e-06, + "loss": 0.0153, + "step": 28250 + }, + { + "epoch": 16.01, + "learning_rate": 2.9713924050632915e-06, + "loss": 0.0121, + "step": 28275 + }, + { + "epoch": 16.01, + "learning_rate": 2.965063291139241e-06, + "loss": 0.0097, + "step": 28300 + }, + { + "epoch": 16.01, + "learning_rate": 2.95873417721519e-06, + "loss": 0.0083, + "step": 28325 + }, + { + "epoch": 16.01, + "learning_rate": 2.9524050632911393e-06, + "loss": 0.01, + "step": 28350 + }, + { + "epoch": 16.01, + "learning_rate": 2.9460759493670888e-06, + "loss": 0.0084, + "step": 28375 + }, + { + "epoch": 16.01, + "learning_rate": 2.939746835443038e-06, + "loss": 0.0124, + "step": 28400 + }, + { + "epoch": 16.01, + "learning_rate": 2.933417721518987e-06, + "loss": 0.0071, + "step": 28425 + }, + { + "epoch": 16.01, + "learning_rate": 2.9270886075949366e-06, + "loss": 0.0113, + "step": 28450 + }, + { + "epoch": 16.01, + "learning_rate": 2.920759493670886e-06, + "loss": 0.0085, + "step": 28475 + }, + { + "epoch": 16.01, + "learning_rate": 2.914430379746836e-06, + "loss": 0.0133, + "step": 28500 + }, + { + "epoch": 16.01, + "learning_rate": 2.9081012658227853e-06, + "loss": 0.014, + "step": 28525 + }, + { + "epoch": 16.01, + "learning_rate": 2.9017721518987348e-06, + "loss": 0.0105, + "step": 28550 + }, + { + "epoch": 16.01, + "learning_rate": 2.8954430379746838e-06, + "loss": 0.01, + "step": 28575 + }, + { + "epoch": 16.01, + "learning_rate": 2.889113924050633e-06, + "loss": 0.0135, + "step": 28600 + }, + { + "epoch": 16.02, + "learning_rate": 2.8827848101265826e-06, + "loss": 0.0118, + "step": 28625 + }, + { + "epoch": 16.02, + "learning_rate": 2.876455696202532e-06, + "loss": 0.0111, + "step": 28650 + }, + { + "epoch": 16.02, + "learning_rate": 2.8701265822784815e-06, + "loss": 0.008, + "step": 28675 + }, + { + "epoch": 16.02, + "learning_rate": 2.8637974683544305e-06, + "loss": 0.0103, + "step": 28700 + }, + { + "epoch": 16.02, + "learning_rate": 2.85746835443038e-06, + "loss": 0.0082, + "step": 28725 + }, + { + "epoch": 16.02, + "learning_rate": 2.8511392405063293e-06, + "loss": 0.0057, + "step": 28750 + }, + { + "epoch": 16.02, + "learning_rate": 2.8448101265822788e-06, + "loss": 0.0062, + "step": 28775 + }, + { + "epoch": 16.02, + "learning_rate": 2.8384810126582278e-06, + "loss": 0.0092, + "step": 28800 + }, + { + "epoch": 16.02, + "learning_rate": 2.832151898734177e-06, + "loss": 0.0084, + "step": 28825 + }, + { + "epoch": 16.02, + "learning_rate": 2.8258227848101266e-06, + "loss": 0.0099, + "step": 28850 + }, + { + "epoch": 16.02, + "learning_rate": 2.819493670886076e-06, + "loss": 0.0117, + "step": 28875 + }, + { + "epoch": 16.02, + "learning_rate": 2.8131645569620255e-06, + "loss": 0.0117, + "step": 28900 + }, + { + "epoch": 16.02, + "learning_rate": 2.8068354430379753e-06, + "loss": 0.0127, + "step": 28925 + }, + { + "epoch": 16.02, + "learning_rate": 2.8005063291139243e-06, + "loss": 0.0123, + "step": 28950 + }, + { + "epoch": 16.02, + "learning_rate": 2.7941772151898738e-06, + "loss": 0.0128, + "step": 28975 + }, + { + "epoch": 16.02, + "learning_rate": 2.787848101265823e-06, + "loss": 0.0073, + "step": 29000 + }, + { + "epoch": 16.02, + "eval_loss": 0.2685891091823578, + "eval_runtime": 1456.562, + "eval_samples_per_second": 7.148, + "eval_steps_per_second": 0.447, + "eval_wer": 23.278427291500677, + "step": 29000 + }, + { + "epoch": 16.03, + "learning_rate": 2.7815189873417726e-06, + "loss": 0.0068, + "step": 29025 + }, + { + "epoch": 16.03, + "learning_rate": 2.7751898734177216e-06, + "loss": 0.0087, + "step": 29050 + }, + { + "epoch": 16.03, + "learning_rate": 2.768860759493671e-06, + "loss": 0.0084, + "step": 29075 + }, + { + "epoch": 16.03, + "learning_rate": 2.7625316455696205e-06, + "loss": 0.0039, + "step": 29100 + }, + { + "epoch": 16.03, + "learning_rate": 2.75620253164557e-06, + "loss": 0.0066, + "step": 29125 + }, + { + "epoch": 16.03, + "learning_rate": 2.7498734177215193e-06, + "loss": 0.0108, + "step": 29150 + }, + { + "epoch": 16.03, + "learning_rate": 2.7435443037974683e-06, + "loss": 0.0052, + "step": 29175 + }, + { + "epoch": 16.03, + "learning_rate": 2.7372151898734178e-06, + "loss": 0.0061, + "step": 29200 + }, + { + "epoch": 16.03, + "learning_rate": 2.730886075949367e-06, + "loss": 0.0083, + "step": 29225 + }, + { + "epoch": 16.03, + "learning_rate": 2.7245569620253166e-06, + "loss": 0.0062, + "step": 29250 + }, + { + "epoch": 16.03, + "learning_rate": 2.7182278481012656e-06, + "loss": 0.0078, + "step": 29275 + }, + { + "epoch": 16.03, + "learning_rate": 2.711898734177215e-06, + "loss": 0.0127, + "step": 29300 + }, + { + "epoch": 16.03, + "learning_rate": 2.705569620253165e-06, + "loss": 0.0092, + "step": 29325 + }, + { + "epoch": 16.03, + "learning_rate": 2.6992405063291143e-06, + "loss": 0.0087, + "step": 29350 + }, + { + "epoch": 16.03, + "learning_rate": 2.6929113924050638e-06, + "loss": 0.0062, + "step": 29375 + }, + { + "epoch": 16.03, + "learning_rate": 2.686582278481013e-06, + "loss": 0.0065, + "step": 29400 + }, + { + "epoch": 16.04, + "learning_rate": 2.680253164556962e-06, + "loss": 0.0057, + "step": 29425 + }, + { + "epoch": 16.04, + "learning_rate": 2.6739240506329116e-06, + "loss": 0.0071, + "step": 29450 + }, + { + "epoch": 16.04, + "learning_rate": 2.667594936708861e-06, + "loss": 0.01, + "step": 29475 + }, + { + "epoch": 16.04, + "learning_rate": 2.6612658227848105e-06, + "loss": 0.009, + "step": 29500 + }, + { + "epoch": 16.04, + "learning_rate": 2.65493670886076e-06, + "loss": 0.0091, + "step": 29525 + }, + { + "epoch": 16.04, + "learning_rate": 2.648607594936709e-06, + "loss": 0.0104, + "step": 29550 + }, + { + "epoch": 16.04, + "learning_rate": 2.6422784810126583e-06, + "loss": 0.0071, + "step": 29575 + }, + { + "epoch": 16.04, + "learning_rate": 2.6359493670886078e-06, + "loss": 0.0081, + "step": 29600 + }, + { + "epoch": 16.04, + "learning_rate": 2.629620253164557e-06, + "loss": 0.008, + "step": 29625 + }, + { + "epoch": 16.04, + "learning_rate": 2.623291139240506e-06, + "loss": 0.0067, + "step": 29650 + }, + { + "epoch": 16.04, + "learning_rate": 2.6169620253164556e-06, + "loss": 0.0066, + "step": 29675 + }, + { + "epoch": 16.04, + "learning_rate": 2.610632911392405e-06, + "loss": 0.0121, + "step": 29700 + }, + { + "epoch": 16.04, + "learning_rate": 2.6043037974683545e-06, + "loss": 0.0075, + "step": 29725 + }, + { + "epoch": 16.04, + "learning_rate": 2.5979746835443043e-06, + "loss": 0.0111, + "step": 29750 + }, + { + "epoch": 17.0, + "learning_rate": 2.5916455696202538e-06, + "loss": 0.0095, + "step": 29775 + }, + { + "epoch": 17.0, + "learning_rate": 2.5853164556962028e-06, + "loss": 0.0077, + "step": 29800 + }, + { + "epoch": 17.0, + "learning_rate": 2.578987341772152e-06, + "loss": 0.0047, + "step": 29825 + }, + { + "epoch": 17.0, + "learning_rate": 2.5726582278481016e-06, + "loss": 0.0092, + "step": 29850 + }, + { + "epoch": 17.0, + "learning_rate": 2.566329113924051e-06, + "loss": 0.0191, + "step": 29875 + }, + { + "epoch": 17.0, + "learning_rate": 2.56e-06, + "loss": 0.0102, + "step": 29900 + }, + { + "epoch": 17.0, + "learning_rate": 2.5536708860759495e-06, + "loss": 0.0146, + "step": 29925 + }, + { + "epoch": 17.0, + "learning_rate": 2.547341772151899e-06, + "loss": 0.0096, + "step": 29950 + }, + { + "epoch": 17.01, + "learning_rate": 2.5410126582278483e-06, + "loss": 0.0159, + "step": 29975 + }, + { + "epoch": 17.01, + "learning_rate": 2.5346835443037978e-06, + "loss": 0.0094, + "step": 30000 + }, + { + "epoch": 17.01, + "eval_loss": 0.2636851966381073, + "eval_runtime": 1508.7196, + "eval_samples_per_second": 6.901, + "eval_steps_per_second": 0.431, + "eval_wer": 23.029642758418838, + "step": 30000 + }, + { + "epoch": 17.01, + "learning_rate": 2.5283544303797468e-06, + "loss": 0.0078, + "step": 30025 + }, + { + "epoch": 17.01, + "learning_rate": 2.522025316455696e-06, + "loss": 0.01, + "step": 30050 + }, + { + "epoch": 17.01, + "learning_rate": 2.5156962025316456e-06, + "loss": 0.0058, + "step": 30075 + }, + { + "epoch": 17.01, + "learning_rate": 2.509367088607595e-06, + "loss": 0.0092, + "step": 30100 + }, + { + "epoch": 17.01, + "learning_rate": 2.503037974683544e-06, + "loss": 0.0084, + "step": 30125 + }, + { + "epoch": 17.01, + "learning_rate": 2.496708860759494e-06, + "loss": 0.0069, + "step": 30150 + }, + { + "epoch": 17.01, + "learning_rate": 2.4903797468354433e-06, + "loss": 0.0059, + "step": 30175 + }, + { + "epoch": 17.01, + "learning_rate": 2.4840506329113923e-06, + "loss": 0.0097, + "step": 30200 + }, + { + "epoch": 17.01, + "learning_rate": 2.4777215189873418e-06, + "loss": 0.0126, + "step": 30225 + }, + { + "epoch": 17.01, + "learning_rate": 2.4713924050632916e-06, + "loss": 0.0096, + "step": 30250 + }, + { + "epoch": 17.01, + "learning_rate": 2.4650632911392406e-06, + "loss": 0.0094, + "step": 30275 + }, + { + "epoch": 17.01, + "learning_rate": 2.45873417721519e-06, + "loss": 0.0111, + "step": 30300 + }, + { + "epoch": 17.01, + "learning_rate": 2.4524050632911395e-06, + "loss": 0.0062, + "step": 30325 + }, + { + "epoch": 17.01, + "learning_rate": 2.446075949367089e-06, + "loss": 0.0078, + "step": 30350 + }, + { + "epoch": 17.02, + "learning_rate": 2.4397468354430383e-06, + "loss": 0.0043, + "step": 30375 + }, + { + "epoch": 17.02, + "learning_rate": 2.4336708860759494e-06, + "loss": 0.0102, + "step": 30400 + }, + { + "epoch": 17.02, + "learning_rate": 2.4273417721518988e-06, + "loss": 0.0101, + "step": 30425 + }, + { + "epoch": 17.02, + "learning_rate": 2.4210126582278482e-06, + "loss": 0.0066, + "step": 30450 + }, + { + "epoch": 17.02, + "learning_rate": 2.4146835443037976e-06, + "loss": 0.0078, + "step": 30475 + }, + { + "epoch": 17.02, + "learning_rate": 2.408354430379747e-06, + "loss": 0.0074, + "step": 30500 + }, + { + "epoch": 17.02, + "learning_rate": 2.4020253164556965e-06, + "loss": 0.0072, + "step": 30525 + }, + { + "epoch": 17.02, + "learning_rate": 2.395696202531646e-06, + "loss": 0.0109, + "step": 30550 + }, + { + "epoch": 17.02, + "learning_rate": 2.389367088607595e-06, + "loss": 0.0113, + "step": 30575 + }, + { + "epoch": 17.02, + "learning_rate": 2.3830379746835444e-06, + "loss": 0.0109, + "step": 30600 + }, + { + "epoch": 17.02, + "learning_rate": 2.3767088607594938e-06, + "loss": 0.0089, + "step": 30625 + }, + { + "epoch": 17.02, + "learning_rate": 2.3703797468354432e-06, + "loss": 0.0062, + "step": 30650 + }, + { + "epoch": 17.02, + "learning_rate": 2.3640506329113926e-06, + "loss": 0.0101, + "step": 30675 + }, + { + "epoch": 17.02, + "learning_rate": 2.357721518987342e-06, + "loss": 0.0096, + "step": 30700 + }, + { + "epoch": 17.02, + "learning_rate": 2.3513924050632915e-06, + "loss": 0.0093, + "step": 30725 + }, + { + "epoch": 17.02, + "learning_rate": 2.3450632911392405e-06, + "loss": 0.0113, + "step": 30750 + }, + { + "epoch": 17.03, + "learning_rate": 2.33873417721519e-06, + "loss": 0.0094, + "step": 30775 + }, + { + "epoch": 17.03, + "learning_rate": 2.3324050632911394e-06, + "loss": 0.0086, + "step": 30800 + }, + { + "epoch": 17.03, + "learning_rate": 2.3260759493670888e-06, + "loss": 0.0096, + "step": 30825 + }, + { + "epoch": 17.03, + "learning_rate": 2.319746835443038e-06, + "loss": 0.0086, + "step": 30850 + }, + { + "epoch": 17.03, + "learning_rate": 2.3134177215189876e-06, + "loss": 0.0064, + "step": 30875 + }, + { + "epoch": 17.03, + "learning_rate": 2.307088607594937e-06, + "loss": 0.0107, + "step": 30900 + }, + { + "epoch": 17.03, + "learning_rate": 2.3007594936708865e-06, + "loss": 0.0073, + "step": 30925 + }, + { + "epoch": 17.03, + "learning_rate": 2.2944303797468355e-06, + "loss": 0.005, + "step": 30950 + }, + { + "epoch": 17.03, + "learning_rate": 2.288101265822785e-06, + "loss": 0.0055, + "step": 30975 + }, + { + "epoch": 17.03, + "learning_rate": 2.2817721518987344e-06, + "loss": 0.006, + "step": 31000 + }, + { + "epoch": 17.03, + "eval_loss": 0.2709895372390747, + "eval_runtime": 1461.5544, + "eval_samples_per_second": 7.123, + "eval_steps_per_second": 0.445, + "eval_wer": 23.29468771849949, + "step": 31000 + }, + { + "epoch": 17.03, + "learning_rate": 2.2754430379746838e-06, + "loss": 0.0094, + "step": 31025 + }, + { + "epoch": 17.03, + "learning_rate": 2.2691139240506328e-06, + "loss": 0.0088, + "step": 31050 + }, + { + "epoch": 17.03, + "learning_rate": 2.2627848101265826e-06, + "loss": 0.0057, + "step": 31075 + }, + { + "epoch": 17.03, + "learning_rate": 2.256455696202532e-06, + "loss": 0.0066, + "step": 31100 + }, + { + "epoch": 17.03, + "learning_rate": 2.250126582278481e-06, + "loss": 0.008, + "step": 31125 + }, + { + "epoch": 17.03, + "learning_rate": 2.2437974683544305e-06, + "loss": 0.0048, + "step": 31150 + }, + { + "epoch": 17.04, + "learning_rate": 2.23746835443038e-06, + "loss": 0.0074, + "step": 31175 + }, + { + "epoch": 17.04, + "learning_rate": 2.2311392405063294e-06, + "loss": 0.0061, + "step": 31200 + }, + { + "epoch": 17.04, + "learning_rate": 2.2248101265822788e-06, + "loss": 0.0088, + "step": 31225 + }, + { + "epoch": 17.04, + "learning_rate": 2.2184810126582278e-06, + "loss": 0.0087, + "step": 31250 + }, + { + "epoch": 17.04, + "learning_rate": 2.2121518987341776e-06, + "loss": 0.0089, + "step": 31275 + }, + { + "epoch": 17.04, + "learning_rate": 2.2058227848101266e-06, + "loss": 0.0061, + "step": 31300 + }, + { + "epoch": 17.04, + "learning_rate": 2.199493670886076e-06, + "loss": 0.0097, + "step": 31325 + }, + { + "epoch": 17.04, + "learning_rate": 2.1931645569620255e-06, + "loss": 0.0063, + "step": 31350 + }, + { + "epoch": 17.04, + "learning_rate": 2.186835443037975e-06, + "loss": 0.0084, + "step": 31375 + }, + { + "epoch": 17.04, + "learning_rate": 2.1805063291139243e-06, + "loss": 0.0089, + "step": 31400 + }, + { + "epoch": 17.04, + "learning_rate": 2.1741772151898734e-06, + "loss": 0.0077, + "step": 31425 + }, + { + "epoch": 17.04, + "learning_rate": 2.1678481012658228e-06, + "loss": 0.0153, + "step": 31450 + }, + { + "epoch": 17.04, + "learning_rate": 2.1615189873417726e-06, + "loss": 0.0088, + "step": 31475 + }, + { + "epoch": 17.04, + "learning_rate": 2.1551898734177216e-06, + "loss": 0.0095, + "step": 31500 + }, + { + "epoch": 18.0, + "learning_rate": 2.148860759493671e-06, + "loss": 0.0084, + "step": 31525 + }, + { + "epoch": 18.0, + "learning_rate": 2.1425316455696205e-06, + "loss": 0.0054, + "step": 31550 + }, + { + "epoch": 18.0, + "learning_rate": 2.13620253164557e-06, + "loss": 0.013, + "step": 31575 + }, + { + "epoch": 18.0, + "learning_rate": 2.129873417721519e-06, + "loss": 0.0074, + "step": 31600 + }, + { + "epoch": 18.0, + "learning_rate": 2.1235443037974684e-06, + "loss": 0.0083, + "step": 31625 + }, + { + "epoch": 18.0, + "learning_rate": 2.1172151898734178e-06, + "loss": 0.0118, + "step": 31650 + }, + { + "epoch": 18.0, + "learning_rate": 2.110886075949367e-06, + "loss": 0.0095, + "step": 31675 + }, + { + "epoch": 18.0, + "learning_rate": 2.1045569620253166e-06, + "loss": 0.0075, + "step": 31700 + }, + { + "epoch": 18.01, + "learning_rate": 2.098227848101266e-06, + "loss": 0.0149, + "step": 31725 + }, + { + "epoch": 18.01, + "learning_rate": 2.0918987341772155e-06, + "loss": 0.0095, + "step": 31750 + }, + { + "epoch": 18.01, + "learning_rate": 2.085569620253165e-06, + "loss": 0.0098, + "step": 31775 + }, + { + "epoch": 18.01, + "learning_rate": 2.079240506329114e-06, + "loss": 0.0058, + "step": 31800 + }, + { + "epoch": 18.01, + "learning_rate": 2.0729113924050633e-06, + "loss": 0.0072, + "step": 31825 + }, + { + "epoch": 18.01, + "learning_rate": 2.0665822784810128e-06, + "loss": 0.0103, + "step": 31850 + }, + { + "epoch": 18.01, + "learning_rate": 2.060253164556962e-06, + "loss": 0.0085, + "step": 31875 + }, + { + "epoch": 18.01, + "learning_rate": 2.0539240506329116e-06, + "loss": 0.0064, + "step": 31900 + }, + { + "epoch": 18.01, + "learning_rate": 2.047594936708861e-06, + "loss": 0.0077, + "step": 31925 + }, + { + "epoch": 18.01, + "learning_rate": 2.0412658227848105e-06, + "loss": 0.0086, + "step": 31950 + }, + { + "epoch": 18.01, + "learning_rate": 2.0349367088607595e-06, + "loss": 0.0136, + "step": 31975 + }, + { + "epoch": 18.01, + "learning_rate": 2.028607594936709e-06, + "loss": 0.0085, + "step": 32000 + }, + { + "epoch": 18.01, + "eval_loss": 0.26488059759140015, + "eval_runtime": 1466.953, + "eval_samples_per_second": 7.097, + "eval_steps_per_second": 0.444, + "eval_wer": 23.089806338314446, + "step": 32000 + }, + { + "epoch": 18.01, + "learning_rate": 2.0222784810126583e-06, + "loss": 0.011, + "step": 32025 + }, + { + "epoch": 18.01, + "learning_rate": 2.0159493670886078e-06, + "loss": 0.0086, + "step": 32050 + }, + { + "epoch": 18.01, + "learning_rate": 2.009620253164557e-06, + "loss": 0.008, + "step": 32075 + }, + { + "epoch": 18.01, + "learning_rate": 2.0032911392405066e-06, + "loss": 0.0083, + "step": 32100 + }, + { + "epoch": 18.02, + "learning_rate": 1.996962025316456e-06, + "loss": 0.0073, + "step": 32125 + }, + { + "epoch": 18.02, + "learning_rate": 1.9906329113924055e-06, + "loss": 0.0036, + "step": 32150 + }, + { + "epoch": 18.02, + "learning_rate": 1.9843037974683545e-06, + "loss": 0.0096, + "step": 32175 + }, + { + "epoch": 18.02, + "learning_rate": 1.977974683544304e-06, + "loss": 0.0071, + "step": 32200 + }, + { + "epoch": 18.02, + "learning_rate": 1.9716455696202533e-06, + "loss": 0.009, + "step": 32225 + }, + { + "epoch": 18.02, + "learning_rate": 1.9653164556962028e-06, + "loss": 0.0093, + "step": 32250 + }, + { + "epoch": 18.02, + "learning_rate": 1.9589873417721518e-06, + "loss": 0.0071, + "step": 32275 + }, + { + "epoch": 18.02, + "learning_rate": 1.9526582278481016e-06, + "loss": 0.0037, + "step": 32300 + }, + { + "epoch": 18.02, + "learning_rate": 1.946329113924051e-06, + "loss": 0.007, + "step": 32325 + }, + { + "epoch": 18.02, + "learning_rate": 1.94e-06, + "loss": 0.0104, + "step": 32350 + }, + { + "epoch": 18.02, + "learning_rate": 1.9336708860759495e-06, + "loss": 0.0082, + "step": 32375 + }, + { + "epoch": 18.02, + "learning_rate": 1.927341772151899e-06, + "loss": 0.0095, + "step": 32400 + }, + { + "epoch": 18.02, + "learning_rate": 1.9210126582278483e-06, + "loss": 0.0101, + "step": 32425 + }, + { + "epoch": 18.02, + "learning_rate": 1.9146835443037973e-06, + "loss": 0.0122, + "step": 32450 + }, + { + "epoch": 18.02, + "learning_rate": 1.9083544303797468e-06, + "loss": 0.0082, + "step": 32475 + }, + { + "epoch": 18.02, + "learning_rate": 1.9020253164556964e-06, + "loss": 0.0093, + "step": 32500 + }, + { + "epoch": 18.03, + "learning_rate": 1.8956962025316458e-06, + "loss": 0.0073, + "step": 32525 + }, + { + "epoch": 18.03, + "learning_rate": 1.889367088607595e-06, + "loss": 0.0061, + "step": 32550 + }, + { + "epoch": 18.03, + "learning_rate": 1.8830379746835445e-06, + "loss": 0.0072, + "step": 32575 + }, + { + "epoch": 18.03, + "learning_rate": 1.876708860759494e-06, + "loss": 0.0064, + "step": 32600 + }, + { + "epoch": 18.03, + "learning_rate": 1.8703797468354431e-06, + "loss": 0.0084, + "step": 32625 + }, + { + "epoch": 18.03, + "learning_rate": 1.8640506329113926e-06, + "loss": 0.0067, + "step": 32650 + }, + { + "epoch": 18.03, + "learning_rate": 1.8577215189873418e-06, + "loss": 0.0052, + "step": 32675 + }, + { + "epoch": 18.03, + "learning_rate": 1.8513924050632912e-06, + "loss": 0.0085, + "step": 32700 + }, + { + "epoch": 18.03, + "learning_rate": 1.8450632911392408e-06, + "loss": 0.0077, + "step": 32725 + }, + { + "epoch": 18.03, + "learning_rate": 1.83873417721519e-06, + "loss": 0.0068, + "step": 32750 + }, + { + "epoch": 18.03, + "learning_rate": 1.8324050632911395e-06, + "loss": 0.0079, + "step": 32775 + }, + { + "epoch": 18.03, + "learning_rate": 1.8260759493670887e-06, + "loss": 0.008, + "step": 32800 + }, + { + "epoch": 18.03, + "learning_rate": 1.8197468354430381e-06, + "loss": 0.0071, + "step": 32825 + }, + { + "epoch": 18.03, + "learning_rate": 1.8134177215189873e-06, + "loss": 0.0091, + "step": 32850 + }, + { + "epoch": 18.03, + "learning_rate": 1.8070886075949368e-06, + "loss": 0.0059, + "step": 32875 + }, + { + "epoch": 18.03, + "learning_rate": 1.8007594936708862e-06, + "loss": 0.0079, + "step": 32900 + }, + { + "epoch": 18.04, + "learning_rate": 1.7944303797468356e-06, + "loss": 0.0055, + "step": 32925 + }, + { + "epoch": 18.04, + "learning_rate": 1.788101265822785e-06, + "loss": 0.0095, + "step": 32950 + }, + { + "epoch": 18.04, + "learning_rate": 1.7817721518987343e-06, + "loss": 0.0086, + "step": 32975 + }, + { + "epoch": 18.04, + "learning_rate": 1.7754430379746837e-06, + "loss": 0.0057, + "step": 33000 + }, + { + "epoch": 18.04, + "eval_loss": 0.2678743898868561, + "eval_runtime": 1484.126, + "eval_samples_per_second": 7.015, + "eval_steps_per_second": 0.439, + "eval_wer": 23.405258622091416, + "step": 33000 + }, + { + "epoch": 18.04, + "learning_rate": 1.7691139240506331e-06, + "loss": 0.0085, + "step": 33025 + }, + { + "epoch": 18.04, + "learning_rate": 1.7627848101265823e-06, + "loss": 0.0073, + "step": 33050 + }, + { + "epoch": 18.04, + "learning_rate": 1.7564556962025318e-06, + "loss": 0.0045, + "step": 33075 + }, + { + "epoch": 18.04, + "learning_rate": 1.750126582278481e-06, + "loss": 0.0065, + "step": 33100 + }, + { + "epoch": 18.04, + "learning_rate": 1.7437974683544306e-06, + "loss": 0.0076, + "step": 33125 + }, + { + "epoch": 18.04, + "learning_rate": 1.73746835443038e-06, + "loss": 0.007, + "step": 33150 + }, + { + "epoch": 18.04, + "learning_rate": 1.7311392405063293e-06, + "loss": 0.0109, + "step": 33175 + }, + { + "epoch": 18.04, + "learning_rate": 1.7248101265822787e-06, + "loss": 0.0095, + "step": 33200 + }, + { + "epoch": 18.04, + "learning_rate": 1.718481012658228e-06, + "loss": 0.0109, + "step": 33225 + }, + { + "epoch": 18.04, + "learning_rate": 1.7121518987341773e-06, + "loss": 0.0117, + "step": 33250 + }, + { + "epoch": 19.0, + "learning_rate": 1.7058227848101266e-06, + "loss": 0.0038, + "step": 33275 + }, + { + "epoch": 19.0, + "learning_rate": 1.699493670886076e-06, + "loss": 0.013, + "step": 33300 + }, + { + "epoch": 19.0, + "learning_rate": 1.6931645569620256e-06, + "loss": 0.0104, + "step": 33325 + }, + { + "epoch": 19.0, + "learning_rate": 1.6868354430379748e-06, + "loss": 0.0064, + "step": 33350 + }, + { + "epoch": 19.0, + "learning_rate": 1.6805063291139243e-06, + "loss": 0.0137, + "step": 33375 + }, + { + "epoch": 19.0, + "learning_rate": 1.6741772151898735e-06, + "loss": 0.012, + "step": 33400 + }, + { + "epoch": 19.0, + "learning_rate": 1.667848101265823e-06, + "loss": 0.0106, + "step": 33425 + }, + { + "epoch": 19.0, + "learning_rate": 1.6615189873417723e-06, + "loss": 0.0072, + "step": 33450 + }, + { + "epoch": 19.01, + "learning_rate": 1.6551898734177216e-06, + "loss": 0.0154, + "step": 33475 + }, + { + "epoch": 19.01, + "learning_rate": 1.648860759493671e-06, + "loss": 0.0129, + "step": 33500 + }, + { + "epoch": 19.01, + "learning_rate": 1.6425316455696206e-06, + "loss": 0.008, + "step": 33525 + }, + { + "epoch": 19.01, + "learning_rate": 1.6362025316455698e-06, + "loss": 0.0071, + "step": 33550 + }, + { + "epoch": 19.01, + "learning_rate": 1.6298734177215193e-06, + "loss": 0.0088, + "step": 33575 + }, + { + "epoch": 19.01, + "learning_rate": 1.6235443037974685e-06, + "loss": 0.0102, + "step": 33600 + }, + { + "epoch": 19.01, + "learning_rate": 1.617215189873418e-06, + "loss": 0.0064, + "step": 33625 + }, + { + "epoch": 19.01, + "learning_rate": 1.6108860759493671e-06, + "loss": 0.0098, + "step": 33650 + }, + { + "epoch": 19.01, + "learning_rate": 1.6045569620253166e-06, + "loss": 0.009, + "step": 33675 + }, + { + "epoch": 19.01, + "learning_rate": 1.5982278481012658e-06, + "loss": 0.0074, + "step": 33700 + }, + { + "epoch": 19.01, + "learning_rate": 1.5918987341772152e-06, + "loss": 0.0055, + "step": 33725 + }, + { + "epoch": 19.01, + "learning_rate": 1.5855696202531648e-06, + "loss": 0.0059, + "step": 33750 + }, + { + "epoch": 19.01, + "learning_rate": 1.579240506329114e-06, + "loss": 0.0102, + "step": 33775 + }, + { + "epoch": 19.01, + "learning_rate": 1.5729113924050635e-06, + "loss": 0.0111, + "step": 33800 + }, + { + "epoch": 19.01, + "learning_rate": 1.5665822784810127e-06, + "loss": 0.0033, + "step": 33825 + }, + { + "epoch": 19.01, + "learning_rate": 1.5602531645569621e-06, + "loss": 0.0057, + "step": 33850 + }, + { + "epoch": 19.02, + "learning_rate": 1.5539240506329115e-06, + "loss": 0.0074, + "step": 33875 + }, + { + "epoch": 19.02, + "learning_rate": 1.5475949367088608e-06, + "loss": 0.0056, + "step": 33900 + }, + { + "epoch": 19.02, + "learning_rate": 1.5412658227848102e-06, + "loss": 0.0053, + "step": 33925 + }, + { + "epoch": 19.02, + "learning_rate": 1.5349367088607598e-06, + "loss": 0.0049, + "step": 33950 + }, + { + "epoch": 19.02, + "learning_rate": 1.528607594936709e-06, + "loss": 0.0065, + "step": 33975 + }, + { + "epoch": 19.02, + "learning_rate": 1.5222784810126585e-06, + "loss": 0.007, + "step": 34000 + }, + { + "epoch": 19.02, + "eval_loss": 0.2660910487174988, + "eval_runtime": 1471.5922, + "eval_samples_per_second": 7.075, + "eval_steps_per_second": 0.442, + "eval_wer": 23.14834387551017, + "step": 34000 + }, + { + "epoch": 19.02, + "learning_rate": 1.5159493670886077e-06, + "loss": 0.0027, + "step": 34025 + }, + { + "epoch": 19.02, + "learning_rate": 1.5096202531645571e-06, + "loss": 0.0081, + "step": 34050 + }, + { + "epoch": 19.02, + "learning_rate": 1.5032911392405063e-06, + "loss": 0.0044, + "step": 34075 + }, + { + "epoch": 19.02, + "learning_rate": 1.4969620253164558e-06, + "loss": 0.0093, + "step": 34100 + }, + { + "epoch": 19.02, + "learning_rate": 1.490632911392405e-06, + "loss": 0.0067, + "step": 34125 + }, + { + "epoch": 19.02, + "learning_rate": 1.4843037974683546e-06, + "loss": 0.0047, + "step": 34150 + }, + { + "epoch": 19.02, + "learning_rate": 1.477974683544304e-06, + "loss": 0.0119, + "step": 34175 + }, + { + "epoch": 19.02, + "learning_rate": 1.4716455696202533e-06, + "loss": 0.0091, + "step": 34200 + }, + { + "epoch": 19.02, + "learning_rate": 1.4653164556962027e-06, + "loss": 0.0094, + "step": 34225 + }, + { + "epoch": 19.02, + "learning_rate": 1.458987341772152e-06, + "loss": 0.0069, + "step": 34250 + }, + { + "epoch": 19.03, + "learning_rate": 1.4526582278481013e-06, + "loss": 0.0066, + "step": 34275 + }, + { + "epoch": 19.03, + "learning_rate": 1.4463291139240508e-06, + "loss": 0.0072, + "step": 34300 + }, + { + "epoch": 19.03, + "learning_rate": 1.44e-06, + "loss": 0.0067, + "step": 34325 + }, + { + "epoch": 19.03, + "learning_rate": 1.4336708860759496e-06, + "loss": 0.0076, + "step": 34350 + }, + { + "epoch": 19.03, + "learning_rate": 1.427341772151899e-06, + "loss": 0.0083, + "step": 34375 + }, + { + "epoch": 19.03, + "learning_rate": 1.4212658227848103e-06, + "loss": 0.0072, + "step": 34400 + }, + { + "epoch": 19.03, + "learning_rate": 1.4149367088607597e-06, + "loss": 0.0086, + "step": 34425 + }, + { + "epoch": 19.03, + "learning_rate": 1.408607594936709e-06, + "loss": 0.0057, + "step": 34450 + }, + { + "epoch": 19.03, + "learning_rate": 1.4022784810126584e-06, + "loss": 0.0111, + "step": 34475 + }, + { + "epoch": 19.03, + "learning_rate": 1.3959493670886076e-06, + "loss": 0.01, + "step": 34500 + }, + { + "epoch": 19.03, + "learning_rate": 1.389620253164557e-06, + "loss": 0.008, + "step": 34525 + }, + { + "epoch": 19.03, + "learning_rate": 1.3832911392405066e-06, + "loss": 0.0082, + "step": 34550 + }, + { + "epoch": 19.03, + "learning_rate": 1.3769620253164559e-06, + "loss": 0.0062, + "step": 34575 + }, + { + "epoch": 19.03, + "learning_rate": 1.3706329113924053e-06, + "loss": 0.0052, + "step": 34600 + }, + { + "epoch": 19.03, + "learning_rate": 1.3643037974683545e-06, + "loss": 0.007, + "step": 34625 + }, + { + "epoch": 19.03, + "learning_rate": 1.357974683544304e-06, + "loss": 0.0045, + "step": 34650 + }, + { + "epoch": 19.04, + "learning_rate": 1.3516455696202531e-06, + "loss": 0.0089, + "step": 34675 + }, + { + "epoch": 19.04, + "learning_rate": 1.3453164556962026e-06, + "loss": 0.0081, + "step": 34700 + }, + { + "epoch": 19.04, + "learning_rate": 1.338987341772152e-06, + "loss": 0.0101, + "step": 34725 + }, + { + "epoch": 19.04, + "learning_rate": 1.3326582278481014e-06, + "loss": 0.0072, + "step": 34750 + }, + { + "epoch": 19.04, + "learning_rate": 1.3263291139240509e-06, + "loss": 0.005, + "step": 34775 + }, + { + "epoch": 19.04, + "learning_rate": 1.32e-06, + "loss": 0.0083, + "step": 34800 + }, + { + "epoch": 19.04, + "learning_rate": 1.3136708860759495e-06, + "loss": 0.0087, + "step": 34825 + }, + { + "epoch": 19.04, + "learning_rate": 1.307341772151899e-06, + "loss": 0.0067, + "step": 34850 + }, + { + "epoch": 19.04, + "learning_rate": 1.3010126582278481e-06, + "loss": 0.0048, + "step": 34875 + }, + { + "epoch": 19.04, + "learning_rate": 1.2946835443037976e-06, + "loss": 0.0065, + "step": 34900 + }, + { + "epoch": 19.04, + "learning_rate": 1.2883544303797468e-06, + "loss": 0.0076, + "step": 34925 + }, + { + "epoch": 19.04, + "learning_rate": 1.2820253164556964e-06, + "loss": 0.007, + "step": 34950 + }, + { + "epoch": 19.04, + "learning_rate": 1.2756962025316458e-06, + "loss": 0.0044, + "step": 34975 + }, + { + "epoch": 19.04, + "learning_rate": 1.269367088607595e-06, + "loss": 0.0082, + "step": 35000 + }, + { + "epoch": 19.04, + "eval_loss": 0.26719748973846436, + "eval_runtime": 1465.7868, + "eval_samples_per_second": 7.103, + "eval_steps_per_second": 0.444, + "eval_wer": 24.169498691035628, + "step": 35000 + }, + { + "epoch": 20.0, + "learning_rate": 1.2630379746835445e-06, + "loss": 0.0072, + "step": 35025 + }, + { + "epoch": 20.0, + "learning_rate": 1.2567088607594937e-06, + "loss": 0.0078, + "step": 35050 + }, + { + "epoch": 20.0, + "learning_rate": 1.2503797468354431e-06, + "loss": 0.0069, + "step": 35075 + }, + { + "epoch": 20.0, + "learning_rate": 1.2440506329113924e-06, + "loss": 0.0063, + "step": 35100 + }, + { + "epoch": 20.0, + "learning_rate": 1.237721518987342e-06, + "loss": 0.0073, + "step": 35125 + }, + { + "epoch": 20.0, + "learning_rate": 1.2313924050632912e-06, + "loss": 0.0076, + "step": 35150 + }, + { + "epoch": 20.0, + "learning_rate": 1.2250632911392406e-06, + "loss": 0.0084, + "step": 35175 + }, + { + "epoch": 20.0, + "learning_rate": 1.2187341772151899e-06, + "loss": 0.0061, + "step": 35200 + }, + { + "epoch": 20.01, + "learning_rate": 1.2124050632911393e-06, + "loss": 0.0074, + "step": 35225 + }, + { + "epoch": 20.01, + "learning_rate": 1.2060759493670887e-06, + "loss": 0.011, + "step": 35250 + }, + { + "epoch": 20.01, + "learning_rate": 1.1997468354430381e-06, + "loss": 0.0076, + "step": 35275 + }, + { + "epoch": 20.01, + "learning_rate": 1.1934177215189874e-06, + "loss": 0.007, + "step": 35300 + }, + { + "epoch": 20.01, + "learning_rate": 1.1870886075949368e-06, + "loss": 0.0074, + "step": 35325 + }, + { + "epoch": 20.01, + "learning_rate": 1.1807594936708862e-06, + "loss": 0.0077, + "step": 35350 + }, + { + "epoch": 20.01, + "learning_rate": 1.1744303797468354e-06, + "loss": 0.0074, + "step": 35375 + }, + { + "epoch": 20.01, + "learning_rate": 1.1681012658227848e-06, + "loss": 0.007, + "step": 35400 + }, + { + "epoch": 20.01, + "learning_rate": 1.1617721518987343e-06, + "loss": 0.0068, + "step": 35425 + }, + { + "epoch": 20.01, + "learning_rate": 1.1554430379746837e-06, + "loss": 0.0049, + "step": 35450 + }, + { + "epoch": 20.01, + "learning_rate": 1.149113924050633e-06, + "loss": 0.0074, + "step": 35475 + }, + { + "epoch": 20.01, + "learning_rate": 1.1427848101265823e-06, + "loss": 0.0097, + "step": 35500 + }, + { + "epoch": 20.01, + "learning_rate": 1.1364556962025318e-06, + "loss": 0.0065, + "step": 35525 + }, + { + "epoch": 20.01, + "learning_rate": 1.1301265822784812e-06, + "loss": 0.0056, + "step": 35550 + }, + { + "epoch": 20.01, + "learning_rate": 1.1237974683544304e-06, + "loss": 0.0056, + "step": 35575 + }, + { + "epoch": 20.01, + "learning_rate": 1.1174683544303798e-06, + "loss": 0.0056, + "step": 35600 + }, + { + "epoch": 20.02, + "learning_rate": 1.1111392405063293e-06, + "loss": 0.0104, + "step": 35625 + }, + { + "epoch": 20.02, + "learning_rate": 1.1048101265822787e-06, + "loss": 0.0071, + "step": 35650 + }, + { + "epoch": 20.02, + "learning_rate": 1.098481012658228e-06, + "loss": 0.0079, + "step": 35675 + }, + { + "epoch": 20.02, + "learning_rate": 1.0921518987341773e-06, + "loss": 0.0077, + "step": 35700 + }, + { + "epoch": 20.02, + "learning_rate": 1.0858227848101268e-06, + "loss": 0.0038, + "step": 35725 + }, + { + "epoch": 20.02, + "learning_rate": 1.079493670886076e-06, + "loss": 0.0035, + "step": 35750 + }, + { + "epoch": 20.02, + "learning_rate": 1.0731645569620254e-06, + "loss": 0.0047, + "step": 35775 + }, + { + "epoch": 20.02, + "learning_rate": 1.0668354430379746e-06, + "loss": 0.0079, + "step": 35800 + }, + { + "epoch": 20.02, + "learning_rate": 1.0605063291139243e-06, + "loss": 0.006, + "step": 35825 + }, + { + "epoch": 20.02, + "learning_rate": 1.0541772151898735e-06, + "loss": 0.0084, + "step": 35850 + }, + { + "epoch": 20.02, + "learning_rate": 1.047848101265823e-06, + "loss": 0.0073, + "step": 35875 + }, + { + "epoch": 20.02, + "learning_rate": 1.0415189873417721e-06, + "loss": 0.0049, + "step": 35900 + }, + { + "epoch": 20.02, + "learning_rate": 1.0351898734177216e-06, + "loss": 0.0114, + "step": 35925 + }, + { + "epoch": 20.02, + "learning_rate": 1.028860759493671e-06, + "loss": 0.0086, + "step": 35950 + }, + { + "epoch": 20.02, + "learning_rate": 1.0225316455696204e-06, + "loss": 0.0088, + "step": 35975 + }, + { + "epoch": 20.02, + "learning_rate": 1.0162025316455696e-06, + "loss": 0.0054, + "step": 36000 + }, + { + "epoch": 20.02, + "eval_loss": 0.26699596643447876, + "eval_runtime": 1450.3878, + "eval_samples_per_second": 7.178, + "eval_steps_per_second": 0.449, + "eval_wer": 22.689799834143646, + "step": 36000 + }, + { + "epoch": 20.03, + "learning_rate": 1.009873417721519e-06, + "loss": 0.005, + "step": 36025 + }, + { + "epoch": 20.03, + "learning_rate": 1.0035443037974685e-06, + "loss": 0.0058, + "step": 36050 + }, + { + "epoch": 20.03, + "learning_rate": 9.97215189873418e-07, + "loss": 0.0068, + "step": 36075 + }, + { + "epoch": 20.03, + "learning_rate": 9.908860759493671e-07, + "loss": 0.0074, + "step": 36100 + }, + { + "epoch": 20.03, + "learning_rate": 9.845569620253166e-07, + "loss": 0.0081, + "step": 36125 + }, + { + "epoch": 20.03, + "learning_rate": 9.78227848101266e-07, + "loss": 0.0063, + "step": 36150 + }, + { + "epoch": 20.03, + "learning_rate": 9.718987341772152e-07, + "loss": 0.0085, + "step": 36175 + }, + { + "epoch": 20.03, + "learning_rate": 9.655696202531646e-07, + "loss": 0.0059, + "step": 36200 + }, + { + "epoch": 20.03, + "learning_rate": 9.592405063291138e-07, + "loss": 0.0079, + "step": 36225 + }, + { + "epoch": 20.03, + "learning_rate": 9.529113924050634e-07, + "loss": 0.0047, + "step": 36250 + }, + { + "epoch": 20.03, + "learning_rate": 9.465822784810127e-07, + "loss": 0.0058, + "step": 36275 + }, + { + "epoch": 20.03, + "learning_rate": 9.402531645569621e-07, + "loss": 0.0039, + "step": 36300 + }, + { + "epoch": 20.03, + "learning_rate": 9.339240506329115e-07, + "loss": 0.0054, + "step": 36325 + }, + { + "epoch": 20.03, + "learning_rate": 9.275949367088609e-07, + "loss": 0.0074, + "step": 36350 + }, + { + "epoch": 20.03, + "learning_rate": 9.212658227848102e-07, + "loss": 0.0063, + "step": 36375 + }, + { + "epoch": 20.03, + "learning_rate": 9.149367088607595e-07, + "loss": 0.004, + "step": 36400 + }, + { + "epoch": 20.04, + "learning_rate": 9.086075949367088e-07, + "loss": 0.0071, + "step": 36425 + }, + { + "epoch": 20.04, + "learning_rate": 9.022784810126584e-07, + "loss": 0.0075, + "step": 36450 + }, + { + "epoch": 20.04, + "learning_rate": 8.959493670886077e-07, + "loss": 0.006, + "step": 36475 + }, + { + "epoch": 20.04, + "learning_rate": 8.89873417721519e-07, + "loss": 0.0063, + "step": 36500 + }, + { + "epoch": 20.04, + "learning_rate": 8.835443037974684e-07, + "loss": 0.0042, + "step": 36525 + }, + { + "epoch": 20.04, + "learning_rate": 8.772151898734178e-07, + "loss": 0.0038, + "step": 36550 + }, + { + "epoch": 20.04, + "learning_rate": 8.708860759493671e-07, + "loss": 0.0058, + "step": 36575 + }, + { + "epoch": 20.04, + "learning_rate": 8.645569620253165e-07, + "loss": 0.0061, + "step": 36600 + }, + { + "epoch": 20.04, + "learning_rate": 8.582278481012659e-07, + "loss": 0.0046, + "step": 36625 + }, + { + "epoch": 20.04, + "learning_rate": 8.518987341772153e-07, + "loss": 0.006, + "step": 36650 + }, + { + "epoch": 20.04, + "learning_rate": 8.455696202531646e-07, + "loss": 0.0082, + "step": 36675 + }, + { + "epoch": 20.04, + "learning_rate": 8.392405063291139e-07, + "loss": 0.0074, + "step": 36700 + }, + { + "epoch": 20.04, + "learning_rate": 8.329113924050633e-07, + "loss": 0.0107, + "step": 36725 + }, + { + "epoch": 20.04, + "learning_rate": 8.265822784810128e-07, + "loss": 0.0062, + "step": 36750 + }, + { + "epoch": 21.0, + "learning_rate": 8.202531645569621e-07, + "loss": 0.0127, + "step": 36775 + }, + { + "epoch": 21.0, + "learning_rate": 8.139240506329114e-07, + "loss": 0.0061, + "step": 36800 + }, + { + "epoch": 21.0, + "learning_rate": 8.075949367088608e-07, + "loss": 0.005, + "step": 36825 + }, + { + "epoch": 21.0, + "learning_rate": 8.012658227848103e-07, + "loss": 0.0071, + "step": 36850 + }, + { + "epoch": 21.0, + "learning_rate": 7.949367088607596e-07, + "loss": 0.008, + "step": 36875 + }, + { + "epoch": 21.0, + "learning_rate": 7.886075949367089e-07, + "loss": 0.0091, + "step": 36900 + }, + { + "epoch": 21.0, + "learning_rate": 7.822784810126583e-07, + "loss": 0.008, + "step": 36925 + }, + { + "epoch": 21.0, + "learning_rate": 7.759493670886077e-07, + "loss": 0.006, + "step": 36950 + }, + { + "epoch": 21.01, + "learning_rate": 7.69873417721519e-07, + "loss": 0.0083, + "step": 36975 + }, + { + "epoch": 21.01, + "learning_rate": 7.635443037974683e-07, + "loss": 0.0078, + "step": 37000 + }, + { + "epoch": 21.01, + "eval_loss": 0.2638327479362488, + "eval_runtime": 1492.7276, + "eval_samples_per_second": 6.974, + "eval_steps_per_second": 0.436, + "eval_wer": 23.06053756971658, + "step": 37000 + }, + { + "epoch": 21.01, + "learning_rate": 7.572151898734177e-07, + "loss": 0.0122, + "step": 37025 + }, + { + "epoch": 21.01, + "learning_rate": 7.508860759493672e-07, + "loss": 0.0093, + "step": 37050 + }, + { + "epoch": 21.01, + "learning_rate": 7.445569620253165e-07, + "loss": 0.0061, + "step": 37075 + }, + { + "epoch": 21.01, + "learning_rate": 7.382278481012658e-07, + "loss": 0.0063, + "step": 37100 + }, + { + "epoch": 21.01, + "learning_rate": 7.318987341772152e-07, + "loss": 0.011, + "step": 37125 + }, + { + "epoch": 21.01, + "learning_rate": 7.255696202531647e-07, + "loss": 0.0075, + "step": 37150 + }, + { + "epoch": 21.01, + "learning_rate": 7.19240506329114e-07, + "loss": 0.0056, + "step": 37175 + }, + { + "epoch": 21.01, + "learning_rate": 7.129113924050633e-07, + "loss": 0.0044, + "step": 37200 + }, + { + "epoch": 21.01, + "learning_rate": 7.065822784810127e-07, + "loss": 0.0068, + "step": 37225 + }, + { + "epoch": 21.01, + "learning_rate": 7.002531645569621e-07, + "loss": 0.0114, + "step": 37250 + }, + { + "epoch": 21.01, + "learning_rate": 6.939240506329114e-07, + "loss": 0.0104, + "step": 37275 + }, + { + "epoch": 21.01, + "learning_rate": 6.875949367088608e-07, + "loss": 0.0098, + "step": 37300 + }, + { + "epoch": 21.01, + "learning_rate": 6.812658227848102e-07, + "loss": 0.0078, + "step": 37325 + }, + { + "epoch": 21.01, + "learning_rate": 6.749367088607596e-07, + "loss": 0.0075, + "step": 37350 + }, + { + "epoch": 21.02, + "learning_rate": 6.686075949367089e-07, + "loss": 0.0081, + "step": 37375 + }, + { + "epoch": 21.02, + "learning_rate": 6.622784810126582e-07, + "loss": 0.0039, + "step": 37400 + }, + { + "epoch": 21.02, + "learning_rate": 6.559493670886076e-07, + "loss": 0.0081, + "step": 37425 + }, + { + "epoch": 21.02, + "learning_rate": 6.496202531645571e-07, + "loss": 0.0066, + "step": 37450 + }, + { + "epoch": 21.02, + "learning_rate": 6.432911392405064e-07, + "loss": 0.0076, + "step": 37475 + }, + { + "epoch": 21.02, + "learning_rate": 6.369620253164557e-07, + "loss": 0.0067, + "step": 37500 + }, + { + "epoch": 21.02, + "learning_rate": 6.306329113924051e-07, + "loss": 0.0081, + "step": 37525 + }, + { + "epoch": 21.02, + "learning_rate": 6.243037974683545e-07, + "loss": 0.0059, + "step": 37550 + }, + { + "epoch": 21.02, + "learning_rate": 6.179746835443039e-07, + "loss": 0.0072, + "step": 37575 + }, + { + "epoch": 21.02, + "learning_rate": 6.116455696202532e-07, + "loss": 0.0095, + "step": 37600 + }, + { + "epoch": 21.02, + "learning_rate": 6.053164556962026e-07, + "loss": 0.0078, + "step": 37625 + }, + { + "epoch": 21.02, + "learning_rate": 5.98987341772152e-07, + "loss": 0.0072, + "step": 37650 + }, + { + "epoch": 21.02, + "learning_rate": 5.926582278481013e-07, + "loss": 0.0057, + "step": 37675 + }, + { + "epoch": 21.02, + "learning_rate": 5.863291139240506e-07, + "loss": 0.0064, + "step": 37700 + }, + { + "epoch": 21.02, + "learning_rate": 5.800000000000001e-07, + "loss": 0.0069, + "step": 37725 + }, + { + "epoch": 21.02, + "learning_rate": 5.736708860759494e-07, + "loss": 0.0063, + "step": 37750 + }, + { + "epoch": 21.03, + "learning_rate": 5.673417721518988e-07, + "loss": 0.0073, + "step": 37775 + }, + { + "epoch": 21.03, + "learning_rate": 5.610126582278481e-07, + "loss": 0.0058, + "step": 37800 + }, + { + "epoch": 21.03, + "learning_rate": 5.546835443037976e-07, + "loss": 0.0056, + "step": 37825 + }, + { + "epoch": 21.03, + "learning_rate": 5.483544303797469e-07, + "loss": 0.0061, + "step": 37850 + }, + { + "epoch": 21.03, + "learning_rate": 5.420253164556962e-07, + "loss": 0.0039, + "step": 37875 + }, + { + "epoch": 21.03, + "learning_rate": 5.356962025316456e-07, + "loss": 0.0082, + "step": 37900 + }, + { + "epoch": 21.03, + "learning_rate": 5.29367088607595e-07, + "loss": 0.0067, + "step": 37925 + }, + { + "epoch": 21.03, + "learning_rate": 5.230379746835444e-07, + "loss": 0.0096, + "step": 37950 + }, + { + "epoch": 21.03, + "learning_rate": 5.167088607594937e-07, + "loss": 0.004, + "step": 37975 + }, + { + "epoch": 21.03, + "learning_rate": 5.103797468354431e-07, + "loss": 0.0055, + "step": 38000 + }, + { + "epoch": 21.03, + "eval_loss": 0.2642187774181366, + "eval_runtime": 1455.8371, + "eval_samples_per_second": 7.151, + "eval_steps_per_second": 0.447, + "eval_wer": 22.437763215662045, + "step": 38000 + }, + { + "epoch": 21.03, + "learning_rate": 5.040506329113924e-07, + "loss": 0.0067, + "step": 38025 + }, + { + "epoch": 21.03, + "learning_rate": 4.977215189873418e-07, + "loss": 0.0053, + "step": 38050 + }, + { + "epoch": 21.03, + "learning_rate": 4.913924050632912e-07, + "loss": 0.0048, + "step": 38075 + }, + { + "epoch": 21.03, + "learning_rate": 4.850632911392405e-07, + "loss": 0.0091, + "step": 38100 + }, + { + "epoch": 21.03, + "learning_rate": 4.787341772151898e-07, + "loss": 0.0055, + "step": 38125 + }, + { + "epoch": 21.03, + "learning_rate": 4.7240506329113927e-07, + "loss": 0.0052, + "step": 38150 + }, + { + "epoch": 21.04, + "learning_rate": 4.6607594936708865e-07, + "loss": 0.011, + "step": 38175 + }, + { + "epoch": 21.04, + "learning_rate": 4.59746835443038e-07, + "loss": 0.0054, + "step": 38200 + }, + { + "epoch": 21.04, + "learning_rate": 4.5341772151898734e-07, + "loss": 0.0072, + "step": 38225 + }, + { + "epoch": 21.04, + "learning_rate": 4.4708860759493677e-07, + "loss": 0.0044, + "step": 38250 + }, + { + "epoch": 21.04, + "learning_rate": 4.407594936708861e-07, + "loss": 0.0075, + "step": 38275 + }, + { + "epoch": 21.04, + "learning_rate": 4.344303797468355e-07, + "loss": 0.0056, + "step": 38300 + }, + { + "epoch": 21.04, + "learning_rate": 4.2810126582278484e-07, + "loss": 0.0063, + "step": 38325 + }, + { + "epoch": 21.04, + "learning_rate": 4.217721518987342e-07, + "loss": 0.0068, + "step": 38350 + }, + { + "epoch": 21.04, + "learning_rate": 4.1544303797468354e-07, + "loss": 0.0066, + "step": 38375 + }, + { + "epoch": 21.04, + "learning_rate": 4.0911392405063296e-07, + "loss": 0.0035, + "step": 38400 + }, + { + "epoch": 21.04, + "learning_rate": 4.027848101265823e-07, + "loss": 0.0046, + "step": 38425 + }, + { + "epoch": 21.04, + "learning_rate": 3.964556962025317e-07, + "loss": 0.0098, + "step": 38450 + }, + { + "epoch": 21.04, + "learning_rate": 3.9012658227848104e-07, + "loss": 0.0058, + "step": 38475 + }, + { + "epoch": 21.04, + "learning_rate": 3.837974683544304e-07, + "loss": 0.0091, + "step": 38500 + }, + { + "epoch": 22.0, + "learning_rate": 3.774683544303798e-07, + "loss": 0.0105, + "step": 38525 + }, + { + "epoch": 22.0, + "learning_rate": 3.7113924050632916e-07, + "loss": 0.0044, + "step": 38550 + }, + { + "epoch": 22.0, + "learning_rate": 3.648101265822785e-07, + "loss": 0.0068, + "step": 38575 + }, + { + "epoch": 22.0, + "learning_rate": 3.584810126582279e-07, + "loss": 0.0049, + "step": 38600 + }, + { + "epoch": 22.0, + "learning_rate": 3.5215189873417723e-07, + "loss": 0.0063, + "step": 38625 + }, + { + "epoch": 22.0, + "learning_rate": 3.4582278481012666e-07, + "loss": 0.0069, + "step": 38650 + }, + { + "epoch": 22.0, + "learning_rate": 3.39493670886076e-07, + "loss": 0.0075, + "step": 38675 + }, + { + "epoch": 22.0, + "learning_rate": 3.3316455696202536e-07, + "loss": 0.0048, + "step": 38700 + }, + { + "epoch": 22.01, + "learning_rate": 3.2683544303797473e-07, + "loss": 0.0118, + "step": 38725 + }, + { + "epoch": 22.01, + "learning_rate": 3.205063291139241e-07, + "loss": 0.0073, + "step": 38750 + }, + { + "epoch": 22.01, + "learning_rate": 3.1417721518987343e-07, + "loss": 0.0066, + "step": 38775 + }, + { + "epoch": 22.01, + "learning_rate": 3.078481012658228e-07, + "loss": 0.0044, + "step": 38800 + }, + { + "epoch": 22.01, + "learning_rate": 3.015189873417722e-07, + "loss": 0.0079, + "step": 38825 + }, + { + "epoch": 22.01, + "learning_rate": 2.9518987341772155e-07, + "loss": 0.0093, + "step": 38850 + }, + { + "epoch": 22.01, + "learning_rate": 2.888607594936709e-07, + "loss": 0.0059, + "step": 38875 + }, + { + "epoch": 22.01, + "learning_rate": 2.825316455696203e-07, + "loss": 0.0098, + "step": 38900 + }, + { + "epoch": 22.01, + "learning_rate": 2.762025316455697e-07, + "loss": 0.0097, + "step": 38925 + }, + { + "epoch": 22.01, + "learning_rate": 2.69873417721519e-07, + "loss": 0.0069, + "step": 38950 + }, + { + "epoch": 22.01, + "learning_rate": 2.6354430379746837e-07, + "loss": 0.006, + "step": 38975 + }, + { + "epoch": 22.01, + "learning_rate": 2.5721518987341775e-07, + "loss": 0.0085, + "step": 39000 + }, + { + "epoch": 22.01, + "eval_loss": 0.26025164127349854, + "eval_runtime": 1463.2032, + "eval_samples_per_second": 7.115, + "eval_steps_per_second": 0.445, + "eval_wer": 22.616627912648987, + "step": 39000 + }, + { + "epoch": 22.01, + "learning_rate": 2.508860759493671e-07, + "loss": 0.0067, + "step": 39025 + }, + { + "epoch": 22.01, + "learning_rate": 2.445569620253165e-07, + "loss": 0.0085, + "step": 39050 + }, + { + "epoch": 22.01, + "learning_rate": 2.3822784810126584e-07, + "loss": 0.0055, + "step": 39075 + }, + { + "epoch": 22.01, + "learning_rate": 2.3189873417721522e-07, + "loss": 0.0059, + "step": 39100 + }, + { + "epoch": 22.02, + "learning_rate": 2.255696202531646e-07, + "loss": 0.0082, + "step": 39125 + }, + { + "epoch": 22.02, + "learning_rate": 2.1924050632911397e-07, + "loss": 0.0055, + "step": 39150 + }, + { + "epoch": 22.02, + "learning_rate": 2.1291139240506332e-07, + "loss": 0.0044, + "step": 39175 + }, + { + "epoch": 22.02, + "learning_rate": 2.065822784810127e-07, + "loss": 0.0045, + "step": 39200 + }, + { + "epoch": 22.02, + "learning_rate": 2.0025316455696201e-07, + "loss": 0.0035, + "step": 39225 + }, + { + "epoch": 22.02, + "learning_rate": 1.939240506329114e-07, + "loss": 0.0072, + "step": 39250 + }, + { + "epoch": 22.02, + "learning_rate": 1.8759493670886076e-07, + "loss": 0.0058, + "step": 39275 + }, + { + "epoch": 22.02, + "learning_rate": 1.8126582278481014e-07, + "loss": 0.0071, + "step": 39300 + }, + { + "epoch": 22.02, + "learning_rate": 1.7493670886075949e-07, + "loss": 0.0049, + "step": 39325 + }, + { + "epoch": 22.02, + "learning_rate": 1.6860759493670886e-07, + "loss": 0.0068, + "step": 39350 + }, + { + "epoch": 22.02, + "learning_rate": 1.6227848101265824e-07, + "loss": 0.0054, + "step": 39375 + }, + { + "epoch": 22.02, + "learning_rate": 1.559493670886076e-07, + "loss": 0.0099, + "step": 39400 + }, + { + "epoch": 22.02, + "learning_rate": 1.4962025316455699e-07, + "loss": 0.0076, + "step": 39425 + }, + { + "epoch": 22.02, + "learning_rate": 1.4329113924050633e-07, + "loss": 0.0081, + "step": 39450 + }, + { + "epoch": 22.02, + "learning_rate": 1.369620253164557e-07, + "loss": 0.0066, + "step": 39475 + }, + { + "epoch": 22.02, + "learning_rate": 1.3063291139240506e-07, + "loss": 0.0058, + "step": 39500 + }, + { + "epoch": 22.03, + "learning_rate": 1.2430379746835443e-07, + "loss": 0.006, + "step": 39525 + }, + { + "epoch": 22.03, + "learning_rate": 1.179746835443038e-07, + "loss": 0.0033, + "step": 39550 + }, + { + "epoch": 22.03, + "learning_rate": 1.1164556962025317e-07, + "loss": 0.0049, + "step": 39575 + }, + { + "epoch": 22.03, + "learning_rate": 1.0531645569620254e-07, + "loss": 0.0058, + "step": 39600 + }, + { + "epoch": 22.03, + "learning_rate": 9.89873417721519e-08, + "loss": 0.0052, + "step": 39625 + }, + { + "epoch": 22.03, + "learning_rate": 9.265822784810128e-08, + "loss": 0.0082, + "step": 39650 + }, + { + "epoch": 22.03, + "learning_rate": 8.632911392405064e-08, + "loss": 0.0081, + "step": 39675 + }, + { + "epoch": 22.03, + "learning_rate": 8e-08, + "loss": 0.0039, + "step": 39700 + }, + { + "epoch": 22.03, + "learning_rate": 7.367088607594938e-08, + "loss": 0.0038, + "step": 39725 + }, + { + "epoch": 22.03, + "learning_rate": 6.734177215189874e-08, + "loss": 0.0042, + "step": 39750 + }, + { + "epoch": 22.03, + "learning_rate": 6.10126582278481e-08, + "loss": 0.006, + "step": 39775 + }, + { + "epoch": 22.03, + "learning_rate": 5.468354430379747e-08, + "loss": 0.0086, + "step": 39800 + }, + { + "epoch": 22.03, + "learning_rate": 4.8354430379746836e-08, + "loss": 0.0047, + "step": 39825 + }, + { + "epoch": 22.03, + "learning_rate": 4.2025316455696204e-08, + "loss": 0.0063, + "step": 39850 + }, + { + "epoch": 22.03, + "learning_rate": 3.569620253164557e-08, + "loss": 0.0042, + "step": 39875 + }, + { + "epoch": 22.03, + "learning_rate": 2.936708860759494e-08, + "loss": 0.0072, + "step": 39900 + }, + { + "epoch": 22.04, + "learning_rate": 2.3037974683544308e-08, + "loss": 0.0043, + "step": 39925 + }, + { + "epoch": 22.04, + "learning_rate": 1.6708860759493673e-08, + "loss": 0.0063, + "step": 39950 + }, + { + "epoch": 22.04, + "learning_rate": 1.0379746835443038e-08, + "loss": 0.0057, + "step": 39975 + }, + { + "epoch": 22.04, + "learning_rate": 4.050632911392406e-09, + "loss": 0.0042, + "step": 40000 + }, + { + "epoch": 22.04, + "eval_loss": 0.2613997459411621, + "eval_runtime": 1483.0212, + "eval_samples_per_second": 7.02, + "eval_steps_per_second": 0.439, + "eval_wer": 22.74508528593961, + "step": 40000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.846311674167296e+20, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-40000/training_args.bin b/checkpoint-40000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-40000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-5000/config.json b/checkpoint-5000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-5000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-5000/generation_config.json b/checkpoint-5000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-5000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-5000/optimizer.pt b/checkpoint-5000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e1682aebaa777206e58fb9192a7f9fd85270e76 --- /dev/null +++ b/checkpoint-5000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4da5ce5796fc8a9311259300669b3d402ead24695404fbc25e8649e97ce30b9 +size 1934161093 diff --git a/checkpoint-5000/preprocessor_config.json b/checkpoint-5000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-5000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-5000/pytorch_model.bin b/checkpoint-5000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..94dbd6da3c195c1643d46b01d44cbb13a351906b --- /dev/null +++ b/checkpoint-5000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a82358f080773fe723cdff1730518f29b6671f5ccfea72e87ea8a95cfeccb42 +size 967102601 diff --git a/checkpoint-5000/rng_state.pth b/checkpoint-5000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5d6cbd3d5e51165ace118469c950c11b0b6c54ff --- /dev/null +++ b/checkpoint-5000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76c155f005e0710ae0ab58b397fe01fab0001d8cd52f8bfc6de7fbcfbdc6a9ea +size 14575 diff --git a/checkpoint-5000/scaler.pt b/checkpoint-5000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c4da4a0928a16012d878787bd1bbeeb73b756a5 --- /dev/null +++ b/checkpoint-5000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9426169d6b584d51d1506ee6e3849c174b1bc7995d5cc4e64c953eacdb0fe9c3 +size 557 diff --git a/checkpoint-5000/scheduler.pt b/checkpoint-5000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..117ad43e3cd2d180a0218c0e3abbf5dc0fab1da2 --- /dev/null +++ b/checkpoint-5000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7738e48f16a758d2bc02367d580426913e4bf575afe20084cf732147f0d900e2 +size 627 diff --git a/checkpoint-5000/trainer_state.json b/checkpoint-5000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a532d99ee207b4abcbde37e886a44330f8aa7b4f --- /dev/null +++ b/checkpoint-5000/trainer_state.json @@ -0,0 +1,1261 @@ +{ + "best_metric": 33.46883689165677, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-5000", + "epoch": 2.03745, + "global_step": 5000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.30810603175936e+19, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-5000/training_args.bin b/checkpoint-5000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-5000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-6000/config.json b/checkpoint-6000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-6000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-6000/generation_config.json b/checkpoint-6000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-6000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-6000/optimizer.pt b/checkpoint-6000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..380646cfa5056c2becb0ed05f145c45c6436e737 --- /dev/null +++ b/checkpoint-6000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdf1e8a803fcceae7e3f19eb7a5c7cc5488746d3325560c7ed9429c64b792e3b +size 1934161093 diff --git a/checkpoint-6000/preprocessor_config.json b/checkpoint-6000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-6000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-6000/pytorch_model.bin b/checkpoint-6000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..7f7cdb49491921272e81c586e987e91a75ae90ea --- /dev/null +++ b/checkpoint-6000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e1d68c57723589280d9e31c9225622e62edf73874fa6471b055976ba2bcc348 +size 967102601 diff --git a/checkpoint-6000/rng_state.pth b/checkpoint-6000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8678cceebd7de4dee9ece7967568a553eacc930f --- /dev/null +++ b/checkpoint-6000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5349a7944242b1d4f9abf295b5ca297a99008101813640073d8db07b3c1cc864 +size 14575 diff --git a/checkpoint-6000/scaler.pt b/checkpoint-6000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8053fbdbedb91ed0550c619795cc6bfc770c0192 --- /dev/null +++ b/checkpoint-6000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57215ad64ce43211c5423ead9917c7b8cdd4d004da098b9b4faa077110cf2cd3 +size 557 diff --git a/checkpoint-6000/scheduler.pt b/checkpoint-6000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd20202f9830d7ba2ffb0c3c05d7d4fdd32c25a5 --- /dev/null +++ b/checkpoint-6000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a97e8de057820ca2d856fb5eefd59a48bf2ee7adcd20cf5efac56710edfeb1bf +size 627 diff --git a/checkpoint-6000/trainer_state.json b/checkpoint-6000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8157728b59c4738cc77da4462c288b8728e4bc47 --- /dev/null +++ b/checkpoint-6000/trainer_state.json @@ -0,0 +1,1510 @@ +{ + "best_metric": 33.46883689165677, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-5000", + "epoch": 3.018675, + "global_step": 6000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.76955408687104e+19, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-6000/training_args.bin b/checkpoint-6000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-6000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-7000/config.json b/checkpoint-7000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-7000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-7000/generation_config.json b/checkpoint-7000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-7000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-7000/optimizer.pt b/checkpoint-7000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..60c13f4498d29993b93161967a9e81eee2b1e006 --- /dev/null +++ b/checkpoint-7000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cfc8a807e9583753eb464e874335404b26deb3f07e703d48a7eddcbeeb022f4 +size 1934161093 diff --git a/checkpoint-7000/preprocessor_config.json b/checkpoint-7000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-7000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-7000/pytorch_model.bin b/checkpoint-7000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..f49e7690c6515bd926b6296cd6744753be805ac1 --- /dev/null +++ b/checkpoint-7000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87574fa5915b388bfc7876b0826dffd53e01eb63b41a496e83d5c2f06209ce0d +size 967102601 diff --git a/checkpoint-7000/rng_state.pth b/checkpoint-7000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f79235b99b1fa65868b249ee949128a814080b40 --- /dev/null +++ b/checkpoint-7000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03db523fd8ca466e54b3592710f9a070aa269f249146f992005fdc4857185472 +size 14575 diff --git a/checkpoint-7000/scaler.pt b/checkpoint-7000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d6a5904019978d2792df1dfc0bba78607c6eb1a1 --- /dev/null +++ b/checkpoint-7000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2365276a80437e170a0355d065ea038602f6b24c942f0240aa34581da4424480 +size 557 diff --git a/checkpoint-7000/scheduler.pt b/checkpoint-7000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3cc426191398f324e0a2ca4b3ab33d78705268ce --- /dev/null +++ b/checkpoint-7000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:396b96362101b7ab9c9d4da5341e31509c0b7c0ec0955a4b571a02a98084a38b +size 627 diff --git a/checkpoint-7000/trainer_state.json b/checkpoint-7000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..deef7db1b1b9c14f3ac0ea004865201c5c336a51 --- /dev/null +++ b/checkpoint-7000/trainer_state.json @@ -0,0 +1,1759 @@ +{ + "best_metric": 33.46883689165677, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-5000", + "epoch": 3.043675, + "global_step": 7000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 3.23129072738304e+19, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-7000/training_args.bin b/checkpoint-7000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-7000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-8000/config.json b/checkpoint-8000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-8000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-8000/generation_config.json b/checkpoint-8000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-8000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-8000/optimizer.pt b/checkpoint-8000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f1434dcf9d1881cd7120ac9409957de0148b0f37 --- /dev/null +++ b/checkpoint-8000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d354851a6ff5d3c1263989819b92ccdeb0c65ec0bcdcc79c6adb9949ae9470d4 +size 1934161093 diff --git a/checkpoint-8000/preprocessor_config.json b/checkpoint-8000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-8000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-8000/pytorch_model.bin b/checkpoint-8000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..022f9d2aca976f86cb2eb5a2e1508bc98de0c0b2 --- /dev/null +++ b/checkpoint-8000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c097cc7fd6c738a06bd32d911ab456ac9e4baa76a4b85303bc003a11b63b11f +size 967102601 diff --git a/checkpoint-8000/rng_state.pth b/checkpoint-8000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..72769a3b76c329ca9a45c827e11c1dd2ac2904d6 --- /dev/null +++ b/checkpoint-8000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82d43c7e29b6cb93079eff9bbb259ce867742e12de41ae882f086cbb7a61d984 +size 14639 diff --git a/checkpoint-8000/scaler.pt b/checkpoint-8000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d5c83bf5ea2f2a586de9959cf1c4d464947fd3fc --- /dev/null +++ b/checkpoint-8000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3d65d7a466d029a3fcfa89917eba38615300c8a311fff5458c522c9bd30c1ac +size 557 diff --git a/checkpoint-8000/scheduler.pt b/checkpoint-8000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e01a8dea432add8bf26fd81919438d8774f97c0 --- /dev/null +++ b/checkpoint-8000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbe11f7b32bbe306e1661da02f269c698f845283005b70c823277ce6fb342196 +size 627 diff --git a/checkpoint-8000/trainer_state.json b/checkpoint-8000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..beb45a278b961187f25e014de44cd129873797b6 --- /dev/null +++ b/checkpoint-8000/trainer_state.json @@ -0,0 +1,2008 @@ +{ + "best_metric": 33.46883689165677, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-5000", + "epoch": 4.0249, + "global_step": 8000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 3.69273878249472e+19, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-8000/training_args.bin b/checkpoint-8000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-8000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091 diff --git a/checkpoint-9000/config.json b/checkpoint-9000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67578242e1e1fc9c6bb84715a095bfd5d92618e2 --- /dev/null +++ b/checkpoint-9000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.05, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.30.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-9000/generation_config.json b/checkpoint-9000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c842130b506e1b3d0c192596d447bbcb3d5e9ab --- /dev/null +++ b/checkpoint-9000/generation_config.json @@ -0,0 +1,221 @@ +{ + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.30.0.dev0" +} diff --git a/checkpoint-9000/optimizer.pt b/checkpoint-9000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..33cdb337572229bdfb7b2393f8e5dd5c0d83051d --- /dev/null +++ b/checkpoint-9000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f4e79ff10ea3cdf0cd3e4a58865fc75ac4c8e26c6d1a76d5278a476d54c14c3 +size 1934161093 diff --git a/checkpoint-9000/preprocessor_config.json b/checkpoint-9000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-9000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-9000/pytorch_model.bin b/checkpoint-9000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..1fb7e91c480db774d49c5d39aaa0a6d4b01796bd --- /dev/null +++ b/checkpoint-9000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c11765f015cac60387652e9c1ae5e041c22f7d59604eb2fe194ea6624e6d6d9 +size 967102601 diff --git a/checkpoint-9000/rng_state.pth b/checkpoint-9000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..564292f5e2b20a903b0585c95e2aefe650ede76b --- /dev/null +++ b/checkpoint-9000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ad7674be124fe7c6501a9ce45c8acfd8731d15a1de59d3b258801fd0f397091 +size 14575 diff --git a/checkpoint-9000/scaler.pt b/checkpoint-9000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..fada82dcb0011d8f49ffb09a31a740b268330d16 --- /dev/null +++ b/checkpoint-9000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2493e3e862ded8cace49a18f79913a012606a3f6946cb02196132f59fcaf1c6e +size 557 diff --git a/checkpoint-9000/scheduler.pt b/checkpoint-9000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..52a591db79b088657737b3a4597f6af319456bd6 --- /dev/null +++ b/checkpoint-9000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c2f54359dbaa01722d017bdd485c6781194c5d313ebf09caeb8511f34eab334 +size 627 diff --git a/checkpoint-9000/trainer_state.json b/checkpoint-9000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..581858571a6396d77bd3ad65ad7d2c2cc0754870 --- /dev/null +++ b/checkpoint-9000/trainer_state.json @@ -0,0 +1,2257 @@ +{ + "best_metric": 31.58262735979447, + "best_model_checkpoint": "./whisper-small-fa-aug/checkpoint-9000", + "epoch": 5.006125, + "global_step": 9000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0497, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.42e-06, + "loss": 2.0102, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5948, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.42e-06, + "loss": 1.4094, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 2.92e-06, + "loss": 1.2611, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.1366, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.920000000000001e-06, + "loss": 1.0817, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.42e-06, + "loss": 1.0324, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.92e-06, + "loss": 0.9176, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7975, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 0.7079, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.42e-06, + "loss": 0.5832, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 6.92e-06, + "loss": 0.4469, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 7.420000000000001e-06, + "loss": 0.3913, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 7.92e-06, + "loss": 0.4189, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 8.42e-06, + "loss": 0.3756, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3876, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 9.42e-06, + "loss": 0.3802, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4061, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 9.994683544303798e-06, + "loss": 0.3885, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 9.988354430379748e-06, + "loss": 0.3449, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 9.982025316455697e-06, + "loss": 0.3292, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 9.975696202531647e-06, + "loss": 0.3439, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 9.969367088607596e-06, + "loss": 0.3031, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 9.963037974683545e-06, + "loss": 0.3159, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 9.956708860759495e-06, + "loss": 0.3304, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 9.950379746835444e-06, + "loss": 0.2918, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 9.944050632911392e-06, + "loss": 0.2785, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 9.937721518987341e-06, + "loss": 0.2618, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 9.931392405063291e-06, + "loss": 0.2862, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 9.92506329113924e-06, + "loss": 0.2962, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 9.91873417721519e-06, + "loss": 0.2935, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 9.912405063291141e-06, + "loss": 0.2827, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 9.90607594936709e-06, + "loss": 0.2702, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 9.89974683544304e-06, + "loss": 0.2987, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 9.89341772151899e-06, + "loss": 0.3067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 9.887088607594939e-06, + "loss": 0.2962, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 9.880759493670888e-06, + "loss": 0.2807, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 9.874430379746836e-06, + "loss": 0.2533, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_loss": 0.3274388611316681, + "eval_runtime": 1481.9106, + "eval_samples_per_second": 7.025, + "eval_steps_per_second": 0.439, + "eval_wer": 40.657571667832, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 9.868101265822785e-06, + "loss": 0.21, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 9.861772151898735e-06, + "loss": 0.2463, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 9.855443037974684e-06, + "loss": 0.2688, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 9.849113924050633e-06, + "loss": 0.2293, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 9.842784810126583e-06, + "loss": 0.2283, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 9.836455696202532e-06, + "loss": 0.228, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 9.830126582278482e-06, + "loss": 0.1919, + "step": 1175 + }, + { + "epoch": 0.03, + "learning_rate": 9.823797468354431e-06, + "loss": 0.2173, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 9.81746835443038e-06, + "loss": 0.2066, + "step": 1225 + }, + { + "epoch": 0.03, + "learning_rate": 9.81113924050633e-06, + "loss": 0.1826, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 9.80481012658228e-06, + "loss": 0.1855, + "step": 1275 + }, + { + "epoch": 0.03, + "learning_rate": 9.798481012658229e-06, + "loss": 0.1702, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 9.792151898734178e-06, + "loss": 0.1714, + "step": 1325 + }, + { + "epoch": 0.03, + "learning_rate": 9.785822784810128e-06, + "loss": 0.1686, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 9.779493670886077e-06, + "loss": 0.1762, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 9.773164556962027e-06, + "loss": 0.1722, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 9.766835443037976e-06, + "loss": 0.2051, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 9.760506329113924e-06, + "loss": 0.2071, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 9.754177215189873e-06, + "loss": 0.1664, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 9.747848101265823e-06, + "loss": 0.1871, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 9.741518987341772e-06, + "loss": 0.1893, + "step": 1525 + }, + { + "epoch": 0.04, + "learning_rate": 9.735189873417721e-06, + "loss": 0.1756, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 9.728860759493671e-06, + "loss": 0.1687, + "step": 1575 + }, + { + "epoch": 0.04, + "learning_rate": 9.72253164556962e-06, + "loss": 0.1685, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 9.71620253164557e-06, + "loss": 0.1805, + "step": 1625 + }, + { + "epoch": 0.04, + "learning_rate": 9.70987341772152e-06, + "loss": 0.1868, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 9.70354430379747e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 0.04, + "learning_rate": 9.69721518987342e-06, + "loss": 0.2019, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 9.69088607594937e-06, + "loss": 0.2076, + "step": 1725 + }, + { + "epoch": 0.04, + "learning_rate": 9.684556962025317e-06, + "loss": 0.1971, + "step": 1750 + }, + { + "epoch": 1.0, + "learning_rate": 9.678227848101266e-06, + "loss": 0.2424, + "step": 1775 + }, + { + "epoch": 1.0, + "learning_rate": 9.671898734177216e-06, + "loss": 0.2346, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 9.665569620253165e-06, + "loss": 0.2436, + "step": 1825 + }, + { + "epoch": 1.0, + "learning_rate": 9.659240506329115e-06, + "loss": 0.2613, + "step": 1850 + }, + { + "epoch": 1.0, + "learning_rate": 9.652911392405064e-06, + "loss": 0.2781, + "step": 1875 + }, + { + "epoch": 1.0, + "learning_rate": 9.646582278481013e-06, + "loss": 0.2465, + "step": 1900 + }, + { + "epoch": 1.0, + "learning_rate": 9.640253164556963e-06, + "loss": 0.2386, + "step": 1925 + }, + { + "epoch": 1.0, + "learning_rate": 9.633924050632912e-06, + "loss": 0.2298, + "step": 1950 + }, + { + "epoch": 1.01, + "learning_rate": 9.627594936708862e-06, + "loss": 0.264, + "step": 1975 + }, + { + "epoch": 1.01, + "learning_rate": 9.621265822784811e-06, + "loss": 0.2077, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.25397753715515137, + "eval_runtime": 1477.9319, + "eval_samples_per_second": 7.044, + "eval_steps_per_second": 0.44, + "eval_wer": 35.12252231743606, + "step": 2000 + }, + { + "epoch": 1.01, + "learning_rate": 9.61493670886076e-06, + "loss": 0.192, + "step": 2025 + }, + { + "epoch": 1.01, + "learning_rate": 9.60860759493671e-06, + "loss": 0.1903, + "step": 2050 + }, + { + "epoch": 1.01, + "learning_rate": 9.60227848101266e-06, + "loss": 0.2135, + "step": 2075 + }, + { + "epoch": 1.01, + "learning_rate": 9.595949367088609e-06, + "loss": 0.1917, + "step": 2100 + }, + { + "epoch": 1.01, + "learning_rate": 9.589620253164558e-06, + "loss": 0.1579, + "step": 2125 + }, + { + "epoch": 1.01, + "learning_rate": 9.583291139240508e-06, + "loss": 0.1779, + "step": 2150 + }, + { + "epoch": 1.01, + "learning_rate": 9.576962025316457e-06, + "loss": 0.1504, + "step": 2175 + }, + { + "epoch": 1.01, + "learning_rate": 9.570632911392405e-06, + "loss": 0.1574, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 9.564303797468354e-06, + "loss": 0.1757, + "step": 2225 + }, + { + "epoch": 1.01, + "learning_rate": 9.557974683544304e-06, + "loss": 0.1996, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 9.551645569620253e-06, + "loss": 0.1716, + "step": 2275 + }, + { + "epoch": 1.01, + "learning_rate": 9.545316455696203e-06, + "loss": 0.1509, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 9.538987341772152e-06, + "loss": 0.1402, + "step": 2325 + }, + { + "epoch": 1.01, + "learning_rate": 9.532658227848101e-06, + "loss": 0.1612, + "step": 2350 + }, + { + "epoch": 1.02, + "learning_rate": 9.526329113924051e-06, + "loss": 0.1357, + "step": 2375 + }, + { + "epoch": 1.02, + "learning_rate": 9.52e-06, + "loss": 0.1426, + "step": 2400 + }, + { + "epoch": 1.02, + "learning_rate": 9.51367088607595e-06, + "loss": 0.1644, + "step": 2425 + }, + { + "epoch": 1.02, + "learning_rate": 9.5073417721519e-06, + "loss": 0.1196, + "step": 2450 + }, + { + "epoch": 1.02, + "learning_rate": 9.501012658227849e-06, + "loss": 0.1332, + "step": 2475 + }, + { + "epoch": 1.02, + "learning_rate": 9.494683544303798e-06, + "loss": 0.1096, + "step": 2500 + }, + { + "epoch": 1.02, + "learning_rate": 9.488354430379747e-06, + "loss": 0.1263, + "step": 2525 + }, + { + "epoch": 1.02, + "learning_rate": 9.482025316455697e-06, + "loss": 0.1467, + "step": 2550 + }, + { + "epoch": 1.02, + "learning_rate": 9.475696202531646e-06, + "loss": 0.1372, + "step": 2575 + }, + { + "epoch": 1.02, + "learning_rate": 9.469367088607596e-06, + "loss": 0.1256, + "step": 2600 + }, + { + "epoch": 1.02, + "learning_rate": 9.463037974683545e-06, + "loss": 0.1319, + "step": 2625 + }, + { + "epoch": 1.02, + "learning_rate": 9.456708860759495e-06, + "loss": 0.1385, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 9.450379746835444e-06, + "loss": 0.147, + "step": 2675 + }, + { + "epoch": 1.02, + "learning_rate": 9.444050632911393e-06, + "loss": 0.1527, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 9.437721518987343e-06, + "loss": 0.1357, + "step": 2725 + }, + { + "epoch": 1.02, + "learning_rate": 9.431392405063292e-06, + "loss": 0.122, + "step": 2750 + }, + { + "epoch": 1.03, + "learning_rate": 9.425063291139242e-06, + "loss": 0.0918, + "step": 2775 + }, + { + "epoch": 1.03, + "learning_rate": 9.418734177215191e-06, + "loss": 0.1218, + "step": 2800 + }, + { + "epoch": 1.03, + "learning_rate": 9.41240506329114e-06, + "loss": 0.139, + "step": 2825 + }, + { + "epoch": 1.03, + "learning_rate": 9.40607594936709e-06, + "loss": 0.1205, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 9.39974683544304e-06, + "loss": 0.1158, + "step": 2875 + }, + { + "epoch": 1.03, + "learning_rate": 9.393417721518989e-06, + "loss": 0.1156, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 9.387088607594937e-06, + "loss": 0.1044, + "step": 2925 + }, + { + "epoch": 1.03, + "learning_rate": 9.380759493670886e-06, + "loss": 0.112, + "step": 2950 + }, + { + "epoch": 1.03, + "learning_rate": 9.374430379746835e-06, + "loss": 0.1023, + "step": 2975 + }, + { + "epoch": 1.03, + "learning_rate": 9.368101265822785e-06, + "loss": 0.0966, + "step": 3000 + }, + { + "epoch": 1.03, + "eval_loss": 0.2638459801673889, + "eval_runtime": 1500.936, + "eval_samples_per_second": 6.936, + "eval_steps_per_second": 0.434, + "eval_wer": 37.93069806013106, + "step": 3000 + }, + { + "epoch": 1.03, + "learning_rate": 9.361772151898734e-06, + "loss": 0.0942, + "step": 3025 + }, + { + "epoch": 1.03, + "learning_rate": 9.355443037974684e-06, + "loss": 0.0826, + "step": 3050 + }, + { + "epoch": 1.03, + "learning_rate": 9.349113924050633e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.03, + "learning_rate": 9.342784810126583e-06, + "loss": 0.0956, + "step": 3100 + }, + { + "epoch": 1.03, + "learning_rate": 9.336455696202532e-06, + "loss": 0.0935, + "step": 3125 + }, + { + "epoch": 1.03, + "learning_rate": 9.330126582278481e-06, + "loss": 0.0923, + "step": 3150 + }, + { + "epoch": 1.04, + "learning_rate": 9.323797468354431e-06, + "loss": 0.1163, + "step": 3175 + }, + { + "epoch": 1.04, + "learning_rate": 9.31746835443038e-06, + "loss": 0.1099, + "step": 3200 + }, + { + "epoch": 1.04, + "learning_rate": 9.31113924050633e-06, + "loss": 0.0879, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 9.30481012658228e-06, + "loss": 0.0994, + "step": 3250 + }, + { + "epoch": 1.04, + "learning_rate": 9.298481012658229e-06, + "loss": 0.1069, + "step": 3275 + }, + { + "epoch": 1.04, + "learning_rate": 9.292151898734178e-06, + "loss": 0.0957, + "step": 3300 + }, + { + "epoch": 1.04, + "learning_rate": 9.285822784810127e-06, + "loss": 0.0866, + "step": 3325 + }, + { + "epoch": 1.04, + "learning_rate": 9.279493670886077e-06, + "loss": 0.0965, + "step": 3350 + }, + { + "epoch": 1.04, + "learning_rate": 9.273164556962026e-06, + "loss": 0.0952, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 9.266835443037976e-06, + "loss": 0.1178, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 9.260506329113925e-06, + "loss": 0.1154, + "step": 3425 + }, + { + "epoch": 1.04, + "learning_rate": 9.254177215189875e-06, + "loss": 0.1075, + "step": 3450 + }, + { + "epoch": 1.04, + "learning_rate": 9.247848101265824e-06, + "loss": 0.1161, + "step": 3475 + }, + { + "epoch": 1.04, + "learning_rate": 9.241518987341773e-06, + "loss": 0.1153, + "step": 3500 + }, + { + "epoch": 2.0, + "learning_rate": 9.235189873417723e-06, + "loss": 0.1343, + "step": 3525 + }, + { + "epoch": 2.0, + "learning_rate": 9.228860759493672e-06, + "loss": 0.1374, + "step": 3550 + }, + { + "epoch": 2.0, + "learning_rate": 9.222531645569622e-06, + "loss": 0.1436, + "step": 3575 + }, + { + "epoch": 2.0, + "learning_rate": 9.216202531645571e-06, + "loss": 0.1561, + "step": 3600 + }, + { + "epoch": 2.0, + "learning_rate": 9.20987341772152e-06, + "loss": 0.1709, + "step": 3625 + }, + { + "epoch": 2.0, + "learning_rate": 9.20354430379747e-06, + "loss": 0.142, + "step": 3650 + }, + { + "epoch": 2.0, + "learning_rate": 9.197215189873418e-06, + "loss": 0.1389, + "step": 3675 + }, + { + "epoch": 2.0, + "learning_rate": 9.190886075949367e-06, + "loss": 0.1398, + "step": 3700 + }, + { + "epoch": 2.01, + "learning_rate": 9.184556962025317e-06, + "loss": 0.1526, + "step": 3725 + }, + { + "epoch": 2.01, + "learning_rate": 9.178227848101266e-06, + "loss": 0.1273, + "step": 3750 + }, + { + "epoch": 2.01, + "learning_rate": 9.171898734177215e-06, + "loss": 0.1119, + "step": 3775 + }, + { + "epoch": 2.01, + "learning_rate": 9.165569620253165e-06, + "loss": 0.1088, + "step": 3800 + }, + { + "epoch": 2.01, + "learning_rate": 9.159493670886076e-06, + "loss": 0.1362, + "step": 3825 + }, + { + "epoch": 2.01, + "learning_rate": 9.153164556962026e-06, + "loss": 0.1247, + "step": 3850 + }, + { + "epoch": 2.01, + "learning_rate": 9.146835443037975e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 2.01, + "learning_rate": 9.140506329113925e-06, + "loss": 0.1218, + "step": 3900 + }, + { + "epoch": 2.01, + "learning_rate": 9.134177215189874e-06, + "loss": 0.0846, + "step": 3925 + }, + { + "epoch": 2.01, + "learning_rate": 9.127848101265823e-06, + "loss": 0.0964, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 9.121518987341773e-06, + "loss": 0.1166, + "step": 3975 + }, + { + "epoch": 2.01, + "learning_rate": 9.115189873417722e-06, + "loss": 0.1212, + "step": 4000 + }, + { + "epoch": 2.01, + "eval_loss": 0.22517943382263184, + "eval_runtime": 1537.556, + "eval_samples_per_second": 6.771, + "eval_steps_per_second": 0.423, + "eval_wer": 69.04990325045935, + "step": 4000 + }, + { + "epoch": 2.01, + "learning_rate": 9.108860759493672e-06, + "loss": 0.1076, + "step": 4025 + }, + { + "epoch": 2.01, + "learning_rate": 9.102531645569621e-06, + "loss": 0.086, + "step": 4050 + }, + { + "epoch": 2.01, + "learning_rate": 9.09620253164557e-06, + "loss": 0.0773, + "step": 4075 + }, + { + "epoch": 2.01, + "learning_rate": 9.08987341772152e-06, + "loss": 0.098, + "step": 4100 + }, + { + "epoch": 2.02, + "learning_rate": 9.08354430379747e-06, + "loss": 0.0747, + "step": 4125 + }, + { + "epoch": 2.02, + "learning_rate": 9.077215189873419e-06, + "loss": 0.0895, + "step": 4150 + }, + { + "epoch": 2.02, + "learning_rate": 9.070886075949368e-06, + "loss": 0.0957, + "step": 4175 + }, + { + "epoch": 2.02, + "learning_rate": 9.064556962025318e-06, + "loss": 0.0789, + "step": 4200 + }, + { + "epoch": 2.02, + "learning_rate": 9.058227848101267e-06, + "loss": 0.0718, + "step": 4225 + }, + { + "epoch": 2.02, + "learning_rate": 9.051898734177215e-06, + "loss": 0.0737, + "step": 4250 + }, + { + "epoch": 2.02, + "learning_rate": 9.045569620253164e-06, + "loss": 0.0731, + "step": 4275 + }, + { + "epoch": 2.02, + "learning_rate": 9.039240506329114e-06, + "loss": 0.087, + "step": 4300 + }, + { + "epoch": 2.02, + "learning_rate": 9.032911392405063e-06, + "loss": 0.0875, + "step": 4325 + }, + { + "epoch": 2.02, + "learning_rate": 9.026582278481013e-06, + "loss": 0.0768, + "step": 4350 + }, + { + "epoch": 2.02, + "learning_rate": 9.020253164556962e-06, + "loss": 0.0842, + "step": 4375 + }, + { + "epoch": 2.02, + "learning_rate": 9.013924050632911e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.02, + "learning_rate": 9.007594936708861e-06, + "loss": 0.0949, + "step": 4425 + }, + { + "epoch": 2.02, + "learning_rate": 9.00126582278481e-06, + "loss": 0.0989, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 8.99493670886076e-06, + "loss": 0.0908, + "step": 4475 + }, + { + "epoch": 2.02, + "learning_rate": 8.98860759493671e-06, + "loss": 0.0783, + "step": 4500 + }, + { + "epoch": 2.03, + "learning_rate": 8.982278481012659e-06, + "loss": 0.0651, + "step": 4525 + }, + { + "epoch": 2.03, + "learning_rate": 8.975949367088608e-06, + "loss": 0.0791, + "step": 4550 + }, + { + "epoch": 2.03, + "learning_rate": 8.969620253164557e-06, + "loss": 0.0829, + "step": 4575 + }, + { + "epoch": 2.03, + "learning_rate": 8.963291139240507e-06, + "loss": 0.0797, + "step": 4600 + }, + { + "epoch": 2.03, + "learning_rate": 8.956962025316456e-06, + "loss": 0.0743, + "step": 4625 + }, + { + "epoch": 2.03, + "learning_rate": 8.950632911392406e-06, + "loss": 0.0814, + "step": 4650 + }, + { + "epoch": 2.03, + "learning_rate": 8.944303797468355e-06, + "loss": 0.0594, + "step": 4675 + }, + { + "epoch": 2.03, + "learning_rate": 8.937974683544305e-06, + "loss": 0.0709, + "step": 4700 + }, + { + "epoch": 2.03, + "learning_rate": 8.931645569620254e-06, + "loss": 0.0621, + "step": 4725 + }, + { + "epoch": 2.03, + "learning_rate": 8.925316455696203e-06, + "loss": 0.0599, + "step": 4750 + }, + { + "epoch": 2.03, + "learning_rate": 8.918987341772153e-06, + "loss": 0.0625, + "step": 4775 + }, + { + "epoch": 2.03, + "learning_rate": 8.912658227848102e-06, + "loss": 0.0541, + "step": 4800 + }, + { + "epoch": 2.03, + "learning_rate": 8.906329113924052e-06, + "loss": 0.059, + "step": 4825 + }, + { + "epoch": 2.03, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0576, + "step": 4850 + }, + { + "epoch": 2.03, + "learning_rate": 8.89367088607595e-06, + "loss": 0.0623, + "step": 4875 + }, + { + "epoch": 2.03, + "learning_rate": 8.8873417721519e-06, + "loss": 0.0592, + "step": 4900 + }, + { + "epoch": 2.04, + "learning_rate": 8.88101265822785e-06, + "loss": 0.0692, + "step": 4925 + }, + { + "epoch": 2.04, + "learning_rate": 8.874683544303799e-06, + "loss": 0.077, + "step": 4950 + }, + { + "epoch": 2.04, + "learning_rate": 8.868354430379748e-06, + "loss": 0.0662, + "step": 4975 + }, + { + "epoch": 2.04, + "learning_rate": 8.862025316455696e-06, + "loss": 0.0605, + "step": 5000 + }, + { + "epoch": 2.04, + "eval_loss": 0.2468421906232834, + "eval_runtime": 1442.9116, + "eval_samples_per_second": 7.215, + "eval_steps_per_second": 0.451, + "eval_wer": 33.46883689165677, + "step": 5000 + }, + { + "epoch": 2.04, + "learning_rate": 8.855696202531645e-06, + "loss": 0.0769, + "step": 5025 + }, + { + "epoch": 2.04, + "learning_rate": 8.849367088607595e-06, + "loss": 0.0601, + "step": 5050 + }, + { + "epoch": 2.04, + "learning_rate": 8.843037974683544e-06, + "loss": 0.0698, + "step": 5075 + }, + { + "epoch": 2.04, + "learning_rate": 8.836708860759494e-06, + "loss": 0.0593, + "step": 5100 + }, + { + "epoch": 2.04, + "learning_rate": 8.830379746835443e-06, + "loss": 0.0521, + "step": 5125 + }, + { + "epoch": 2.04, + "learning_rate": 8.824050632911393e-06, + "loss": 0.0687, + "step": 5150 + }, + { + "epoch": 2.04, + "learning_rate": 8.817721518987342e-06, + "loss": 0.0818, + "step": 5175 + }, + { + "epoch": 2.04, + "learning_rate": 8.811392405063291e-06, + "loss": 0.0706, + "step": 5200 + }, + { + "epoch": 2.04, + "learning_rate": 8.805063291139241e-06, + "loss": 0.0734, + "step": 5225 + }, + { + "epoch": 2.04, + "learning_rate": 8.79873417721519e-06, + "loss": 0.0795, + "step": 5250 + }, + { + "epoch": 3.0, + "learning_rate": 8.79240506329114e-06, + "loss": 0.0905, + "step": 5275 + }, + { + "epoch": 3.0, + "learning_rate": 8.78607594936709e-06, + "loss": 0.086, + "step": 5300 + }, + { + "epoch": 3.0, + "learning_rate": 8.779746835443039e-06, + "loss": 0.105, + "step": 5325 + }, + { + "epoch": 3.0, + "learning_rate": 8.773417721518988e-06, + "loss": 0.0981, + "step": 5350 + }, + { + "epoch": 3.0, + "learning_rate": 8.767088607594937e-06, + "loss": 0.1131, + "step": 5375 + }, + { + "epoch": 3.0, + "learning_rate": 8.760759493670887e-06, + "loss": 0.0916, + "step": 5400 + }, + { + "epoch": 3.0, + "learning_rate": 8.754430379746836e-06, + "loss": 0.0974, + "step": 5425 + }, + { + "epoch": 3.0, + "learning_rate": 8.748101265822786e-06, + "loss": 0.0899, + "step": 5450 + }, + { + "epoch": 3.01, + "learning_rate": 8.741772151898735e-06, + "loss": 0.1104, + "step": 5475 + }, + { + "epoch": 3.01, + "learning_rate": 8.735443037974685e-06, + "loss": 0.0818, + "step": 5500 + }, + { + "epoch": 3.01, + "learning_rate": 8.729113924050634e-06, + "loss": 0.0764, + "step": 5525 + }, + { + "epoch": 3.01, + "learning_rate": 8.722784810126583e-06, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 3.01, + "learning_rate": 8.716455696202533e-06, + "loss": 0.0891, + "step": 5575 + }, + { + "epoch": 3.01, + "learning_rate": 8.710126582278482e-06, + "loss": 0.0869, + "step": 5600 + }, + { + "epoch": 3.01, + "learning_rate": 8.703797468354432e-06, + "loss": 0.0631, + "step": 5625 + }, + { + "epoch": 3.01, + "learning_rate": 8.697468354430381e-06, + "loss": 0.0795, + "step": 5650 + }, + { + "epoch": 3.01, + "learning_rate": 8.69113924050633e-06, + "loss": 0.0603, + "step": 5675 + }, + { + "epoch": 3.01, + "learning_rate": 8.68481012658228e-06, + "loss": 0.064, + "step": 5700 + }, + { + "epoch": 3.01, + "learning_rate": 8.67848101265823e-06, + "loss": 0.0738, + "step": 5725 + }, + { + "epoch": 3.01, + "learning_rate": 8.672151898734177e-06, + "loss": 0.0796, + "step": 5750 + }, + { + "epoch": 3.01, + "learning_rate": 8.665822784810127e-06, + "loss": 0.0723, + "step": 5775 + }, + { + "epoch": 3.01, + "learning_rate": 8.659493670886076e-06, + "loss": 0.0592, + "step": 5800 + }, + { + "epoch": 3.01, + "learning_rate": 8.653164556962025e-06, + "loss": 0.051, + "step": 5825 + }, + { + "epoch": 3.01, + "learning_rate": 8.646835443037975e-06, + "loss": 0.0716, + "step": 5850 + }, + { + "epoch": 3.02, + "learning_rate": 8.640506329113924e-06, + "loss": 0.0598, + "step": 5875 + }, + { + "epoch": 3.02, + "learning_rate": 8.634177215189874e-06, + "loss": 0.0524, + "step": 5900 + }, + { + "epoch": 3.02, + "learning_rate": 8.627848101265823e-06, + "loss": 0.0623, + "step": 5925 + }, + { + "epoch": 3.02, + "learning_rate": 8.621518987341773e-06, + "loss": 0.054, + "step": 5950 + }, + { + "epoch": 3.02, + "learning_rate": 8.615189873417722e-06, + "loss": 0.057, + "step": 5975 + }, + { + "epoch": 3.02, + "learning_rate": 8.608860759493671e-06, + "loss": 0.0479, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.24428367614746094, + "eval_runtime": 1457.5568, + "eval_samples_per_second": 7.143, + "eval_steps_per_second": 0.447, + "eval_wer": 36.51929299663409, + "step": 6000 + }, + { + "epoch": 3.02, + "learning_rate": 8.602531645569621e-06, + "loss": 0.0501, + "step": 6025 + }, + { + "epoch": 3.02, + "learning_rate": 8.59620253164557e-06, + "loss": 0.0527, + "step": 6050 + }, + { + "epoch": 3.02, + "learning_rate": 8.58987341772152e-06, + "loss": 0.0558, + "step": 6075 + }, + { + "epoch": 3.02, + "learning_rate": 8.58354430379747e-06, + "loss": 0.0548, + "step": 6100 + }, + { + "epoch": 3.02, + "learning_rate": 8.577215189873419e-06, + "loss": 0.0503, + "step": 6125 + }, + { + "epoch": 3.02, + "learning_rate": 8.570886075949368e-06, + "loss": 0.0641, + "step": 6150 + }, + { + "epoch": 3.02, + "learning_rate": 8.564556962025316e-06, + "loss": 0.0748, + "step": 6175 + }, + { + "epoch": 3.02, + "learning_rate": 8.558227848101267e-06, + "loss": 0.0775, + "step": 6200 + }, + { + "epoch": 3.02, + "learning_rate": 8.551898734177216e-06, + "loss": 0.0662, + "step": 6225 + }, + { + "epoch": 3.02, + "learning_rate": 8.545569620253166e-06, + "loss": 0.0591, + "step": 6250 + }, + { + "epoch": 3.03, + "learning_rate": 8.539240506329115e-06, + "loss": 0.0421, + "step": 6275 + }, + { + "epoch": 3.03, + "learning_rate": 8.532911392405065e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 3.03, + "learning_rate": 8.526582278481014e-06, + "loss": 0.0574, + "step": 6325 + }, + { + "epoch": 3.03, + "learning_rate": 8.520253164556963e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 3.03, + "learning_rate": 8.513924050632913e-06, + "loss": 0.0486, + "step": 6375 + }, + { + "epoch": 3.03, + "learning_rate": 8.507594936708862e-06, + "loss": 0.0502, + "step": 6400 + }, + { + "epoch": 3.03, + "learning_rate": 8.501265822784812e-06, + "loss": 0.0415, + "step": 6425 + }, + { + "epoch": 3.03, + "learning_rate": 8.494936708860761e-06, + "loss": 0.0449, + "step": 6450 + }, + { + "epoch": 3.03, + "learning_rate": 8.488607594936709e-06, + "loss": 0.0525, + "step": 6475 + }, + { + "epoch": 3.03, + "learning_rate": 8.482278481012658e-06, + "loss": 0.0401, + "step": 6500 + }, + { + "epoch": 3.03, + "learning_rate": 8.475949367088608e-06, + "loss": 0.0502, + "step": 6525 + }, + { + "epoch": 3.03, + "learning_rate": 8.469620253164557e-06, + "loss": 0.0453, + "step": 6550 + }, + { + "epoch": 3.03, + "learning_rate": 8.463291139240507e-06, + "loss": 0.0442, + "step": 6575 + }, + { + "epoch": 3.03, + "learning_rate": 8.456962025316456e-06, + "loss": 0.0423, + "step": 6600 + }, + { + "epoch": 3.03, + "learning_rate": 8.450632911392405e-06, + "loss": 0.0518, + "step": 6625 + }, + { + "epoch": 3.03, + "learning_rate": 8.444303797468355e-06, + "loss": 0.0479, + "step": 6650 + }, + { + "epoch": 3.04, + "learning_rate": 8.437974683544304e-06, + "loss": 0.0521, + "step": 6675 + }, + { + "epoch": 3.04, + "learning_rate": 8.431645569620254e-06, + "loss": 0.0436, + "step": 6700 + }, + { + "epoch": 3.04, + "learning_rate": 8.425316455696203e-06, + "loss": 0.0539, + "step": 6725 + }, + { + "epoch": 3.04, + "learning_rate": 8.418987341772153e-06, + "loss": 0.0446, + "step": 6750 + }, + { + "epoch": 3.04, + "learning_rate": 8.412658227848102e-06, + "loss": 0.0521, + "step": 6775 + }, + { + "epoch": 3.04, + "learning_rate": 8.406329113924051e-06, + "loss": 0.0391, + "step": 6800 + }, + { + "epoch": 3.04, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0513, + "step": 6825 + }, + { + "epoch": 3.04, + "learning_rate": 8.39367088607595e-06, + "loss": 0.0449, + "step": 6850 + }, + { + "epoch": 3.04, + "learning_rate": 8.3873417721519e-06, + "loss": 0.0463, + "step": 6875 + }, + { + "epoch": 3.04, + "learning_rate": 8.38101265822785e-06, + "loss": 0.0495, + "step": 6900 + }, + { + "epoch": 3.04, + "learning_rate": 8.374683544303797e-06, + "loss": 0.0628, + "step": 6925 + }, + { + "epoch": 3.04, + "learning_rate": 8.368354430379746e-06, + "loss": 0.0594, + "step": 6950 + }, + { + "epoch": 3.04, + "learning_rate": 8.362025316455696e-06, + "loss": 0.0573, + "step": 6975 + }, + { + "epoch": 3.04, + "learning_rate": 8.355696202531645e-06, + "loss": 0.057, + "step": 7000 + }, + { + "epoch": 3.04, + "eval_loss": 0.25129273533821106, + "eval_runtime": 1479.2043, + "eval_samples_per_second": 7.038, + "eval_steps_per_second": 0.44, + "eval_wer": 57.010683100538216, + "step": 7000 + }, + { + "epoch": 4.0, + "learning_rate": 8.349367088607596e-06, + "loss": 0.0534, + "step": 7025 + }, + { + "epoch": 4.0, + "learning_rate": 8.343037974683546e-06, + "loss": 0.0663, + "step": 7050 + }, + { + "epoch": 4.0, + "learning_rate": 8.336708860759495e-06, + "loss": 0.0698, + "step": 7075 + }, + { + "epoch": 4.0, + "learning_rate": 8.330379746835445e-06, + "loss": 0.0633, + "step": 7100 + }, + { + "epoch": 4.0, + "learning_rate": 8.324050632911394e-06, + "loss": 0.0808, + "step": 7125 + }, + { + "epoch": 4.0, + "learning_rate": 8.317721518987343e-06, + "loss": 0.0699, + "step": 7150 + }, + { + "epoch": 4.0, + "learning_rate": 8.311392405063293e-06, + "loss": 0.0736, + "step": 7175 + }, + { + "epoch": 4.0, + "learning_rate": 8.305063291139242e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 4.01, + "learning_rate": 8.29873417721519e-06, + "loss": 0.0718, + "step": 7225 + }, + { + "epoch": 4.01, + "learning_rate": 8.29240506329114e-06, + "loss": 0.0674, + "step": 7250 + }, + { + "epoch": 4.01, + "learning_rate": 8.286075949367089e-06, + "loss": 0.0568, + "step": 7275 + }, + { + "epoch": 4.01, + "learning_rate": 8.279746835443038e-06, + "loss": 0.0497, + "step": 7300 + }, + { + "epoch": 4.01, + "learning_rate": 8.273417721518988e-06, + "loss": 0.0563, + "step": 7325 + }, + { + "epoch": 4.01, + "learning_rate": 8.267088607594937e-06, + "loss": 0.0649, + "step": 7350 + }, + { + "epoch": 4.01, + "learning_rate": 8.260759493670887e-06, + "loss": 0.042, + "step": 7375 + }, + { + "epoch": 4.01, + "learning_rate": 8.254430379746836e-06, + "loss": 0.0559, + "step": 7400 + }, + { + "epoch": 4.01, + "learning_rate": 8.248101265822785e-06, + "loss": 0.0476, + "step": 7425 + }, + { + "epoch": 4.01, + "learning_rate": 8.241772151898735e-06, + "loss": 0.0427, + "step": 7450 + }, + { + "epoch": 4.01, + "learning_rate": 8.235443037974684e-06, + "loss": 0.0533, + "step": 7475 + }, + { + "epoch": 4.01, + "learning_rate": 8.229113924050634e-06, + "loss": 0.0633, + "step": 7500 + }, + { + "epoch": 4.01, + "learning_rate": 8.222784810126583e-06, + "loss": 0.055, + "step": 7525 + }, + { + "epoch": 4.01, + "learning_rate": 8.216455696202533e-06, + "loss": 0.043, + "step": 7550 + }, + { + "epoch": 4.01, + "learning_rate": 8.210126582278482e-06, + "loss": 0.0421, + "step": 7575 + }, + { + "epoch": 4.01, + "learning_rate": 8.203797468354431e-06, + "loss": 0.0519, + "step": 7600 + }, + { + "epoch": 4.02, + "learning_rate": 8.197468354430381e-06, + "loss": 0.0398, + "step": 7625 + }, + { + "epoch": 4.02, + "learning_rate": 8.19113924050633e-06, + "loss": 0.041, + "step": 7650 + }, + { + "epoch": 4.02, + "learning_rate": 8.184810126582278e-06, + "loss": 0.0477, + "step": 7675 + }, + { + "epoch": 4.02, + "learning_rate": 8.178481012658227e-06, + "loss": 0.046, + "step": 7700 + }, + { + "epoch": 4.02, + "learning_rate": 8.172151898734177e-06, + "loss": 0.0465, + "step": 7725 + }, + { + "epoch": 4.02, + "learning_rate": 8.165822784810126e-06, + "loss": 0.0359, + "step": 7750 + }, + { + "epoch": 4.02, + "learning_rate": 8.159493670886076e-06, + "loss": 0.0362, + "step": 7775 + }, + { + "epoch": 4.02, + "learning_rate": 8.153164556962025e-06, + "loss": 0.04, + "step": 7800 + }, + { + "epoch": 4.02, + "learning_rate": 8.146835443037975e-06, + "loss": 0.0433, + "step": 7825 + }, + { + "epoch": 4.02, + "learning_rate": 8.140506329113926e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.02, + "learning_rate": 8.134177215189875e-06, + "loss": 0.0396, + "step": 7875 + }, + { + "epoch": 4.02, + "learning_rate": 8.127848101265825e-06, + "loss": 0.0472, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 8.121518987341774e-06, + "loss": 0.0533, + "step": 7925 + }, + { + "epoch": 4.02, + "learning_rate": 8.115189873417722e-06, + "loss": 0.0564, + "step": 7950 + }, + { + "epoch": 4.02, + "learning_rate": 8.108860759493671e-06, + "loss": 0.0508, + "step": 7975 + }, + { + "epoch": 4.02, + "learning_rate": 8.10253164556962e-06, + "loss": 0.0375, + "step": 8000 + }, + { + "epoch": 4.02, + "eval_loss": 0.252152681350708, + "eval_runtime": 1456.2607, + "eval_samples_per_second": 7.149, + "eval_steps_per_second": 0.447, + "eval_wer": 44.38120945056017, + "step": 8000 + }, + { + "epoch": 4.03, + "learning_rate": 8.09620253164557e-06, + "loss": 0.0296, + "step": 8025 + }, + { + "epoch": 4.03, + "learning_rate": 8.08987341772152e-06, + "loss": 0.0395, + "step": 8050 + }, + { + "epoch": 4.03, + "learning_rate": 8.083544303797469e-06, + "loss": 0.0409, + "step": 8075 + }, + { + "epoch": 4.03, + "learning_rate": 8.077215189873418e-06, + "loss": 0.0343, + "step": 8100 + }, + { + "epoch": 4.03, + "learning_rate": 8.070886075949368e-06, + "loss": 0.0321, + "step": 8125 + }, + { + "epoch": 4.03, + "learning_rate": 8.064556962025317e-06, + "loss": 0.0333, + "step": 8150 + }, + { + "epoch": 4.03, + "learning_rate": 8.058227848101267e-06, + "loss": 0.0327, + "step": 8175 + }, + { + "epoch": 4.03, + "learning_rate": 8.051898734177216e-06, + "loss": 0.0318, + "step": 8200 + }, + { + "epoch": 4.03, + "learning_rate": 8.045569620253165e-06, + "loss": 0.0405, + "step": 8225 + }, + { + "epoch": 4.03, + "learning_rate": 8.039240506329115e-06, + "loss": 0.0382, + "step": 8250 + }, + { + "epoch": 4.03, + "learning_rate": 8.032911392405064e-06, + "loss": 0.0317, + "step": 8275 + }, + { + "epoch": 4.03, + "learning_rate": 8.026582278481014e-06, + "loss": 0.0285, + "step": 8300 + }, + { + "epoch": 4.03, + "learning_rate": 8.020253164556963e-06, + "loss": 0.0285, + "step": 8325 + }, + { + "epoch": 4.03, + "learning_rate": 8.013924050632913e-06, + "loss": 0.0269, + "step": 8350 + }, + { + "epoch": 4.03, + "learning_rate": 8.007594936708862e-06, + "loss": 0.0334, + "step": 8375 + }, + { + "epoch": 4.03, + "learning_rate": 8.00126582278481e-06, + "loss": 0.0406, + "step": 8400 + }, + { + "epoch": 4.04, + "learning_rate": 7.99493670886076e-06, + "loss": 0.038, + "step": 8425 + }, + { + "epoch": 4.04, + "learning_rate": 7.988607594936709e-06, + "loss": 0.0315, + "step": 8450 + }, + { + "epoch": 4.04, + "learning_rate": 7.982278481012658e-06, + "loss": 0.0329, + "step": 8475 + }, + { + "epoch": 4.04, + "learning_rate": 7.975949367088607e-06, + "loss": 0.0373, + "step": 8500 + }, + { + "epoch": 4.04, + "learning_rate": 7.969620253164557e-06, + "loss": 0.0514, + "step": 8525 + }, + { + "epoch": 4.04, + "learning_rate": 7.963291139240506e-06, + "loss": 0.0349, + "step": 8550 + }, + { + "epoch": 4.04, + "learning_rate": 7.956962025316456e-06, + "loss": 0.0385, + "step": 8575 + }, + { + "epoch": 4.04, + "learning_rate": 7.950632911392405e-06, + "loss": 0.0367, + "step": 8600 + }, + { + "epoch": 4.04, + "learning_rate": 7.944303797468355e-06, + "loss": 0.0317, + "step": 8625 + }, + { + "epoch": 4.04, + "learning_rate": 7.937974683544304e-06, + "loss": 0.0368, + "step": 8650 + }, + { + "epoch": 4.04, + "learning_rate": 7.931645569620255e-06, + "loss": 0.045, + "step": 8675 + }, + { + "epoch": 4.04, + "learning_rate": 7.925316455696203e-06, + "loss": 0.0456, + "step": 8700 + }, + { + "epoch": 4.04, + "learning_rate": 7.918987341772152e-06, + "loss": 0.0421, + "step": 8725 + }, + { + "epoch": 4.04, + "learning_rate": 7.912658227848102e-06, + "loss": 0.043, + "step": 8750 + }, + { + "epoch": 5.0, + "learning_rate": 7.906329113924051e-06, + "loss": 0.0428, + "step": 8775 + }, + { + "epoch": 5.0, + "learning_rate": 7.9e-06, + "loss": 0.0459, + "step": 8800 + }, + { + "epoch": 5.0, + "learning_rate": 7.89367088607595e-06, + "loss": 0.0467, + "step": 8825 + }, + { + "epoch": 5.0, + "learning_rate": 7.8873417721519e-06, + "loss": 0.0462, + "step": 8850 + }, + { + "epoch": 5.0, + "learning_rate": 7.881012658227849e-06, + "loss": 0.0578, + "step": 8875 + }, + { + "epoch": 5.0, + "learning_rate": 7.874683544303798e-06, + "loss": 0.0422, + "step": 8900 + }, + { + "epoch": 5.0, + "learning_rate": 7.868354430379748e-06, + "loss": 0.0546, + "step": 8925 + }, + { + "epoch": 5.0, + "learning_rate": 7.862025316455697e-06, + "loss": 0.0515, + "step": 8950 + }, + { + "epoch": 5.01, + "learning_rate": 7.855696202531647e-06, + "loss": 0.0541, + "step": 8975 + }, + { + "epoch": 5.01, + "learning_rate": 7.849367088607596e-06, + "loss": 0.0479, + "step": 9000 + }, + { + "epoch": 5.01, + "eval_loss": 0.2375001162290573, + "eval_runtime": 1456.1088, + "eval_samples_per_second": 7.15, + "eval_steps_per_second": 0.447, + "eval_wer": 31.58262735979447, + "step": 9000 + } + ], + "max_steps": 40000, + "num_train_epochs": 9223372036854775807, + "total_flos": 4.1541868376064e+19, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-9000/training_args.bin b/checkpoint-9000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..564d185423f68ad0f8f970d2500e546cd559019a --- /dev/null +++ b/checkpoint-9000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0289e1bf8fd1a33f7c3c9d3e15cc9bdd13d10e9774e0ada934c0b9d71501a993 +size 4091