Upload 10 files

Browse files

Files changed (10) hide show

README.md +62 -3
config.json +51 -0
generation_config.json +252 -0
merges.txt +0 -0
preprocessor_config.json +14 -0
pytorch_model.bin +3 -0
special_tokens_map.json +133 -0
tokenizer.json +0 -0
trainer_state.json +1149 -0
vocab.json +0 -0

README.md CHANGED Viewed

@@ -1,3 +1,62 @@
----
-license: apache-2.0
----

+---
+language:
+- ar
+metrics:
+- wer
+base_model:
+- openai/whisper-medium
+pipeline_tag: automatic-speech-recognition
+tags:
+- whisper
+- arabic
+- pytorch
+license: apache-2.0
+---
+# WhisperLevantineArabic
+**Fine-tuned Whisper model for the Levantine Dialect (Israeli-Arabic)**
+## Model Description
+This model is a fine-tuned version of [Whisper Medium](https://github.com/openai/whisper) tailored specifically for transcribing Levantine Arabic, focusing on the Israeli dialect. It is designed to improve automatic speech recognition (ASR) performance for this particular variant of Arabic.
+- **Base Model**: Whisper Medium
+- **Fine-tuned for**: Levantine Arabic (Israeli Dialect)
+- **WER on test set**: 14%
+## Training Data
+The dataset used for training and fine-tuning this model consists of approximately 2,200 hours of transcribed audio, primarily featuring Israeli Levantine Arabic, along with some general Levantine Arabic content. The data sources include:
+1. **Self-maintained Collection**: 2,000 hours of audio data curated by the team, covering a wide range of Israeli Levantine Arabic speech.
+2. **[MGB-2 Corpus (Filtered)](https://huggingface.co/datasets/BelalElhossany/mgb2_audios_transcriptions_preprocessed)**: 200 hours of broadcast media in Arabic.
+3. **[CommonVoice18 (Filtered)](https://huggingface.co/datasets/fsicoli/common_voice_18_0)**: A filtered portion of the CommonVoice18 dataset.
+Filtering was applied using the [AlcLaM](https://arxiv.org/abs/2407.13097) Arabic language model to ensure relevance to Levantine Arabic.
+- **Total Dataset Size**: ~2,200 hours
+- **Sampling Rate**: 16kHz
+- **Annotation**: Human-transcribed and annotated for high accuracy.
+## How to Use
+The model is compatible with 16kHz audio input. Ensure your files are at the same sample rate for optimal results. You can load the model as follows:
+```python
+from transformers import WhisperProcessor, WhisperForConditionalGeneration
+import torch
+# Load the model and processor
+processor = WhisperProcessor.from_pretrained("HebArabNlpProject/whisperLevantine")
+model = WhisperForConditionalGeneration.from_pretrained("HebArabNlpProject/whisperLevantine").to("cuda" if torch.cuda.is_available() else "cpu")
+# Example usage: processing audio input
+file_path = ...  # wav filepath goes here
+audio_input, samplerate = torchaudio.load(file_path)
+inputs = processor(audio_input.squeeze(), return_tensors="pt", sampling_rate=samplerate).to("cuda" if torch.cuda.is_available() else "cpu")
+# Run inference
+with torch.no_grad():
+    generated_ids = model.generate(inputs["input_features"])
+transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(transcription[0])

config.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "_name_or_path": "whisper-medium-ar-aug24-cont2/checkpoint-3000",
+  "activation_dropout": 0.0,
+  "activation_function": "gelu",
+  "apply_spec_augment": false,
+  "architectures": [
+    "WhisperForConditionalGeneration"
+  ],
+  "attention_dropout": 0.0,
+  "begin_suppress_tokens": [
+    220,
+    50257
+  ],
+  "bos_token_id": 50257,
+  "classifier_proj_size": 256,
+  "d_model": 1024,
+  "decoder_attention_heads": 16,
+  "decoder_ffn_dim": 4096,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 24,
+  "decoder_start_token_id": 50258,
+  "dropout": 0.0,
+  "encoder_attention_heads": 16,
+  "encoder_ffn_dim": 4096,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 24,
+  "eos_token_id": 50257,
+  "forced_decoder_ids": null,
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": 448,
+  "max_source_positions": 1500,
+  "max_target_positions": 448,
+  "median_filter_width": 7,
+  "model_type": "whisper",
+  "num_hidden_layers": 24,
+  "num_mel_bins": 80,
+  "pad_token_id": 50257,
+  "scale_embedding": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.42.3",
+  "use_cache": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 51865
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,252 @@

+{
+  "alignment_heads": [
+    [
+      9,
+      19
+    ],
+    [
+      11,
+      2
+    ],
+    [
+      11,
+      4
+    ],
+    [
+      11,
+      17
+    ],
+    [
+      22,
+      7
+    ],
+    [
+      22,
+      11
+    ],
+    [
+      22,
+      17
+    ],
+    [
+      23,
+      2
+    ],
+    [
+      23,
+      15
+    ]
+  ],
+  "begin_suppress_tokens": [
+    220,
+    50257
+  ],
+  "bos_token_id": 50257,
+  "decoder_start_token_id": 50258,
+  "eos_token_id": 50257,
+  "is_multilingual": true,
+  "lang_to_id": {
+    "<|af|>": 50327,
+    "<|am|>": 50334,
+    "<|ar|>": 50272,
+    "<|as|>": 50350,
+    "<|az|>": 50304,
+    "<|ba|>": 50355,
+    "<|be|>": 50330,
+    "<|bg|>": 50292,
+    "<|bn|>": 50302,
+    "<|bo|>": 50347,
+    "<|br|>": 50309,
+    "<|bs|>": 50315,
+    "<|ca|>": 50270,
+    "<|cs|>": 50283,
+    "<|cy|>": 50297,
+    "<|da|>": 50285,
+    "<|de|>": 50261,
+    "<|el|>": 50281,
+    "<|en|>": 50259,
+    "<|es|>": 50262,
+    "<|et|>": 50307,
+    "<|eu|>": 50310,
+    "<|fa|>": 50300,
+    "<|fi|>": 50277,
+    "<|fo|>": 50338,
+    "<|fr|>": 50265,
+    "<|gl|>": 50319,
+    "<|gu|>": 50333,
+    "<|haw|>": 50352,
+    "<|ha|>": 50354,
+    "<|he|>": 50279,
+    "<|hi|>": 50276,
+    "<|hr|>": 50291,
+    "<|ht|>": 50339,
+    "<|hu|>": 50286,
+    "<|hy|>": 50312,
+    "<|id|>": 50275,
+    "<|is|>": 50311,
+    "<|it|>": 50274,
+    "<|ja|>": 50266,
+    "<|jw|>": 50356,
+    "<|ka|>": 50329,
+    "<|kk|>": 50316,
+    "<|km|>": 50323,
+    "<|kn|>": 50306,
+    "<|ko|>": 50264,
+    "<|la|>": 50294,
+    "<|lb|>": 50345,
+    "<|ln|>": 50353,
+    "<|lo|>": 50336,
+    "<|lt|>": 50293,
+    "<|lv|>": 50301,
+    "<|mg|>": 50349,
+    "<|mi|>": 50295,
+    "<|mk|>": 50308,
+    "<|ml|>": 50296,
+    "<|mn|>": 50314,
+    "<|mr|>": 50320,
+    "<|ms|>": 50282,
+    "<|mt|>": 50343,
+    "<|my|>": 50346,
+    "<|ne|>": 50313,
+    "<|nl|>": 50271,
+    "<|nn|>": 50342,
+    "<|no|>": 50288,
+    "<|oc|>": 50328,
+    "<|pa|>": 50321,
+    "<|pl|>": 50269,
+    "<|ps|>": 50340,
+    "<|pt|>": 50267,
+    "<|ro|>": 50284,
+    "<|ru|>": 50263,
+    "<|sa|>": 50344,
+    "<|sd|>": 50332,
+    "<|si|>": 50322,
+    "<|sk|>": 50298,
+    "<|sl|>": 50305,
+    "<|sn|>": 50324,
+    "<|so|>": 50326,
+    "<|sq|>": 50317,
+    "<|sr|>": 50303,
+    "<|su|>": 50357,
+    "<|sv|>": 50273,
+    "<|sw|>": 50318,
+    "<|ta|>": 50287,
+    "<|te|>": 50299,
+    "<|tg|>": 50331,
+    "<|th|>": 50289,
+    "<|tk|>": 50341,
+    "<|tl|>": 50348,
+    "<|tr|>": 50268,
+    "<|tt|>": 50351,
+    "<|uk|>": 50280,
+    "<|ur|>": 50290,
+    "<|uz|>": 50337,
+    "<|vi|>": 50278,
+    "<|yi|>": 50335,
+    "<|yo|>": 50325,
+    "<|zh|>": 50260
+  },
+  "language": "arabic",
+  "max_initial_timestamp_index": 50,
+  "max_length": 448,
+  "no_timestamps_token_id": 50363,
+  "pad_token_id": 50257,
+  "prev_sot_token_id": 50361,
+  "return_timestamps": false,
+  "suppress_tokens": [
+    1,
+    2,
+    7,
+    8,
+    9,
+    10,
+    14,
+    25,
+    26,
+    27,
+    28,
+    29,
+    31,
+    58,
+    59,
+    60,
+    61,
+    62,
+    63,
+    90,
+    91,
+    92,
+    93,
+    359,
+    503,
+    522,
+    542,
+    873,
+    893,
+    902,
+    918,
+    922,
+    931,
+    1350,
+    1853,
+    1982,
+    2460,
+    2627,
+    3246,
+    3253,
+    3268,
+    3536,
+    3846,
+    3961,
+    4183,
+    4667,
+    6585,
+    6647,
+    7273,
+    9061,
+    9383,
+    10428,
+    10929,
+    11938,
+    12033,
+    12331,
+    12562,
+    13793,
+    14157,
+    14635,
+    15265,
+    15618,
+    16553,
+    16604,
+    18362,
+    18956,
+    20075,
+    21675,
+    22520,
+    26130,
+    26161,
+    26435,
+    28279,
+    29464,
+    31650,
+    32302,
+    32470,
+    36865,
+    42863,
+    47425,
+    49870,
+    50254,
+    50258,
+    50358,
+    50359,
+    50360,
+    50361,
+    50362
+  ],
+  "task": "transcribe",
+  "task_to_id": {
+    "transcribe": 50359,
+    "translate": 50358
+  },
+  "transformers_version": "4.42.3"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "chunk_length": 30,
+  "feature_extractor_type": "WhisperFeatureExtractor",
+  "feature_size": 80,
+  "hop_length": 160,
+  "n_fft": 400,
+  "n_samples": 480000,
+  "nb_max_frames": 3000,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "WhisperProcessor",
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:02ecb7d0e9c7b74395f834ae1ddd1823bfdde14df6a601ebc6722be468ec7264
+size 3055783638

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "additional_special_tokens": [
+    "<|endoftext|>",
+    "<|startoftranscript|>",
+    "<|en|>",
+    "<|zh|>",
+    "<|de|>",
+    "<|es|>",
+    "<|ru|>",
+    "<|ko|>",
+    "<|fr|>",
+    "<|ja|>",
+    "<|pt|>",
+    "<|tr|>",
+    "<|pl|>",
+    "<|ca|>",
+    "<|nl|>",
+    "<|ar|>",
+    "<|sv|>",
+    "<|it|>",
+    "<|id|>",
+    "<|hi|>",
+    "<|fi|>",
+    "<|vi|>",
+    "<|he|>",
+    "<|uk|>",
+    "<|el|>",
+    "<|ms|>",
+    "<|cs|>",
+    "<|ro|>",
+    "<|da|>",
+    "<|hu|>",
+    "<|ta|>",
+    "<|no|>",
+    "<|th|>",
+    "<|ur|>",
+    "<|hr|>",
+    "<|bg|>",
+    "<|lt|>",
+    "<|la|>",
+    "<|mi|>",
+    "<|ml|>",
+    "<|cy|>",
+    "<|sk|>",
+    "<|te|>",
+    "<|fa|>",
+    "<|lv|>",
+    "<|bn|>",
+    "<|sr|>",
+    "<|az|>",
+    "<|sl|>",
+    "<|kn|>",
+    "<|et|>",
+    "<|mk|>",
+    "<|br|>",
+    "<|eu|>",
+    "<|is|>",
+    "<|hy|>",
+    "<|ne|>",
+    "<|mn|>",
+    "<|bs|>",
+    "<|kk|>",
+    "<|sq|>",
+    "<|sw|>",
+    "<|gl|>",
+    "<|mr|>",
+    "<|pa|>",
+    "<|si|>",
+    "<|km|>",
+    "<|sn|>",
+    "<|yo|>",
+    "<|so|>",
+    "<|af|>",
+    "<|oc|>",
+    "<|ka|>",
+    "<|be|>",
+    "<|tg|>",
+    "<|sd|>",
+    "<|gu|>",
+    "<|am|>",
+    "<|yi|>",
+    "<|lo|>",
+    "<|uz|>",
+    "<|fo|>",
+    "<|ht|>",
+    "<|ps|>",
+    "<|tk|>",
+    "<|nn|>",
+    "<|mt|>",
+    "<|sa|>",
+    "<|lb|>",
+    "<|my|>",
+    "<|bo|>",
+    "<|tl|>",
+    "<|mg|>",
+    "<|as|>",
+    "<|tt|>",
+    "<|haw|>",
+    "<|ln|>",
+    "<|ha|>",
+    "<|ba|>",
+    "<|jw|>",
+    "<|su|>",
+    "<|translate|>",
+    "<|transcribe|>",
+    "<|startoflm|>",
+    "<|startofprev|>",
+    "<|nocaptions|>",
+    "<|notimestamps|>"
+  ],
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|endoftext|>",
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1149 @@

+{
+  "best_metric": 13.76934528961673,
+  "best_model_checkpoint": "/speechbrain/data/whis/whisper-medium-ar-aug30-cont3/checkpoint-3600",
+  "epoch": 0.7889546351084813,
+  "eval_steps": 300,
+  "global_step": 3600,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.005478851632697786,
+      "grad_norm": 0.5829852819442749,
+      "learning_rate": 2.5000000000000004e-07,
+      "loss": 0.1288,
+      "step": 25
+    },
+    {
+      "epoch": 0.010957703265395573,
+      "grad_norm": 0.5360992550849915,
+      "learning_rate": 5.000000000000001e-07,
+      "loss": 0.13,
+      "step": 50
+    },
+    {
+      "epoch": 0.01643655489809336,
+      "grad_norm": 0.5283806324005127,
+      "learning_rate": 7.5e-07,
+      "loss": 0.1272,
+      "step": 75
+    },
+    {
+      "epoch": 0.021915406530791146,
+      "grad_norm": 0.5019442439079285,
+      "learning_rate": 1.0000000000000002e-06,
+      "loss": 0.1282,
+      "step": 100
+    },
+    {
+      "epoch": 0.027394258163488932,
+      "grad_norm": 0.4962950050830841,
+      "learning_rate": 1.25e-06,
+      "loss": 0.1225,
+      "step": 125
+    },
+    {
+      "epoch": 0.03287310979618672,
+      "grad_norm": 0.4953068792819977,
+      "learning_rate": 1.5e-06,
+      "loss": 0.1201,
+      "step": 150
+    },
+    {
+      "epoch": 0.03835196142888451,
+      "grad_norm": 0.5007308125495911,
+      "learning_rate": 1.75e-06,
+      "loss": 0.1216,
+      "step": 175
+    },
+    {
+      "epoch": 0.04383081306158229,
+      "grad_norm": 0.5254662036895752,
+      "learning_rate": 2.0000000000000003e-06,
+      "loss": 0.1179,
+      "step": 200
+    },
+    {
+      "epoch": 0.04930966469428008,
+      "grad_norm": 0.45517662167549133,
+      "learning_rate": 2.25e-06,
+      "loss": 0.1171,
+      "step": 225
+    },
+    {
+      "epoch": 0.054788516326977864,
+      "grad_norm": 0.4700426459312439,
+      "learning_rate": 2.5e-06,
+      "loss": 0.1135,
+      "step": 250
+    },
+    {
+      "epoch": 0.060267367959675654,
+      "grad_norm": 0.48102590441703796,
+      "learning_rate": 2.7500000000000004e-06,
+      "loss": 0.1108,
+      "step": 275
+    },
+    {
+      "epoch": 0.06574621959237344,
+      "grad_norm": 0.46885138750076294,
+      "learning_rate": 3e-06,
+      "loss": 0.111,
+      "step": 300
+    },
+    {
+      "epoch": 0.06574621959237344,
+      "eval_loss": 0.10784981399774551,
+      "eval_runtime": 1329.228,
+      "eval_samples_per_second": 2.505,
+      "eval_steps_per_second": 0.053,
+      "eval_wer": 13.824164156539604,
+      "step": 300
+    },
+    {
+      "epoch": 0.07122507122507123,
+      "grad_norm": 0.5266237854957581,
+      "learning_rate": 3.2500000000000002e-06,
+      "loss": 0.1135,
+      "step": 325
+    },
+    {
+      "epoch": 0.07670392285776902,
+      "grad_norm": 0.5073336958885193,
+      "learning_rate": 3.5e-06,
+      "loss": 0.1102,
+      "step": 350
+    },
+    {
+      "epoch": 0.08218277449046679,
+      "grad_norm": 0.4480571150779724,
+      "learning_rate": 3.7500000000000005e-06,
+      "loss": 0.1083,
+      "step": 375
+    },
+    {
+      "epoch": 0.08766162612316458,
+      "grad_norm": 0.5027902126312256,
+      "learning_rate": 4.000000000000001e-06,
+      "loss": 0.1056,
+      "step": 400
+    },
+    {
+      "epoch": 0.09314047775586237,
+      "grad_norm": 0.5120052695274353,
+      "learning_rate": 4.25e-06,
+      "loss": 0.1039,
+      "step": 425
+    },
+    {
+      "epoch": 0.09861932938856016,
+      "grad_norm": 0.48518097400665283,
+      "learning_rate": 4.5e-06,
+      "loss": 0.1043,
+      "step": 450
+    },
+    {
+      "epoch": 0.10409818102125794,
+      "grad_norm": 0.5022168159484863,
+      "learning_rate": 4.75e-06,
+      "loss": 0.1037,
+      "step": 475
+    },
+    {
+      "epoch": 0.10957703265395573,
+      "grad_norm": 0.4528847932815552,
+      "learning_rate": 5e-06,
+      "loss": 0.0985,
+      "step": 500
+    },
+    {
+      "epoch": 0.11505588428665352,
+      "grad_norm": 0.49148285388946533,
+      "learning_rate": 4.9986227412957256e-06,
+      "loss": 0.1004,
+      "step": 525
+    },
+    {
+      "epoch": 0.12053473591935131,
+      "grad_norm": 0.4991222321987152,
+      "learning_rate": 4.99724548259145e-06,
+      "loss": 0.1,
+      "step": 550
+    },
+    {
+      "epoch": 0.12601358755204908,
+      "grad_norm": 0.4580934941768646,
+      "learning_rate": 4.995868223887175e-06,
+      "loss": 0.0961,
+      "step": 575
+    },
+    {
+      "epoch": 0.13149243918474687,
+      "grad_norm": 0.4897126853466034,
+      "learning_rate": 4.9944909651829e-06,
+      "loss": 0.1,
+      "step": 600
+    },
+    {
+      "epoch": 0.13149243918474687,
+      "eval_loss": 0.1092229038476944,
+      "eval_runtime": 1332.663,
+      "eval_samples_per_second": 2.499,
+      "eval_steps_per_second": 0.053,
+      "eval_wer": 13.810892220337223,
+      "step": 600
+    },
+    {
+      "epoch": 0.13697129081744466,
+      "grad_norm": 0.47739720344543457,
+      "learning_rate": 4.993113706478625e-06,
+      "loss": 0.1049,
+      "step": 625
+    },
+    {
+      "epoch": 0.14245014245014245,
+      "grad_norm": 0.5223620533943176,
+      "learning_rate": 4.9917364477743505e-06,
+      "loss": 0.1001,
+      "step": 650
+    },
+    {
+      "epoch": 0.14792899408284024,
+      "grad_norm": 0.4523641765117645,
+      "learning_rate": 4.990359189070076e-06,
+      "loss": 0.0986,
+      "step": 675
+    },
+    {
+      "epoch": 0.15340784571553803,
+      "grad_norm": 0.47216111421585083,
+      "learning_rate": 4.988981930365801e-06,
+      "loss": 0.1015,
+      "step": 700
+    },
+    {
+      "epoch": 0.15888669734823582,
+      "grad_norm": 0.5023489594459534,
+      "learning_rate": 4.987604671661525e-06,
+      "loss": 0.0987,
+      "step": 725
+    },
+    {
+      "epoch": 0.16436554898093358,
+      "grad_norm": 0.5276343822479248,
+      "learning_rate": 4.98622741295725e-06,
+      "loss": 0.1003,
+      "step": 750
+    },
+    {
+      "epoch": 0.16984440061363137,
+      "grad_norm": 0.49953222274780273,
+      "learning_rate": 4.9848501542529754e-06,
+      "loss": 0.0983,
+      "step": 775
+    },
+    {
+      "epoch": 0.17532325224632916,
+      "grad_norm": 0.45781460404396057,
+      "learning_rate": 4.983472895548701e-06,
+      "loss": 0.0978,
+      "step": 800
+    },
+    {
+      "epoch": 0.18080210387902695,
+      "grad_norm": 0.4533933997154236,
+      "learning_rate": 4.982095636844425e-06,
+      "loss": 0.0984,
+      "step": 825
+    },
+    {
+      "epoch": 0.18628095551172474,
+      "grad_norm": 0.5115811824798584,
+      "learning_rate": 4.98071837814015e-06,
+      "loss": 0.097,
+      "step": 850
+    },
+    {
+      "epoch": 0.19175980714442253,
+      "grad_norm": 0.5033650994300842,
+      "learning_rate": 4.979341119435875e-06,
+      "loss": 0.0985,
+      "step": 875
+    },
+    {
+      "epoch": 0.19723865877712032,
+      "grad_norm": 0.4879961609840393,
+      "learning_rate": 4.9779638607316e-06,
+      "loss": 0.0968,
+      "step": 900
+    },
+    {
+      "epoch": 0.19723865877712032,
+      "eval_loss": 0.10996146500110626,
+      "eval_runtime": 1382.8033,
+      "eval_samples_per_second": 2.408,
+      "eval_steps_per_second": 0.051,
+      "eval_wer": 13.83570497062863,
+      "step": 900
+    },
+    {
+      "epoch": 0.2027175104098181,
+      "grad_norm": 0.4946504831314087,
+      "learning_rate": 4.9765866020273255e-06,
+      "loss": 0.0975,
+      "step": 925
+    },
+    {
+      "epoch": 0.20819636204251588,
+      "grad_norm": 0.5116554498672485,
+      "learning_rate": 4.975209343323051e-06,
+      "loss": 0.0973,
+      "step": 950
+    },
+    {
+      "epoch": 0.21367521367521367,
+      "grad_norm": 0.5216028094291687,
+      "learning_rate": 4.973832084618776e-06,
+      "loss": 0.097,
+      "step": 975
+    },
+    {
+      "epoch": 0.21915406530791146,
+      "grad_norm": 0.5032294392585754,
+      "learning_rate": 4.9724548259145e-06,
+      "loss": 0.0909,
+      "step": 1000
+    },
+    {
+      "epoch": 0.22463291694060925,
+      "grad_norm": 0.526467502117157,
+      "learning_rate": 4.971077567210225e-06,
+      "loss": 0.0969,
+      "step": 1025
+    },
+    {
+      "epoch": 0.23011176857330704,
+      "grad_norm": 0.488610178232193,
+      "learning_rate": 4.9697003085059505e-06,
+      "loss": 0.098,
+      "step": 1050
+    },
+    {
+      "epoch": 0.23559062020600482,
+      "grad_norm": 0.47755196690559387,
+      "learning_rate": 4.968323049801675e-06,
+      "loss": 0.0931,
+      "step": 1075
+    },
+    {
+      "epoch": 0.24106947183870261,
+      "grad_norm": 0.5348175168037415,
+      "learning_rate": 4.9669457910974e-06,
+      "loss": 0.0952,
+      "step": 1100
+    },
+    {
+      "epoch": 0.2465483234714004,
+      "grad_norm": 0.48804572224617004,
+      "learning_rate": 4.965568532393125e-06,
+      "loss": 0.0954,
+      "step": 1125
+    },
+    {
+      "epoch": 0.25202717510409817,
+      "grad_norm": 0.48517024517059326,
+      "learning_rate": 4.96419127368885e-06,
+      "loss": 0.0958,
+      "step": 1150
+    },
+    {
+      "epoch": 0.25750602673679596,
+      "grad_norm": 0.5918833017349243,
+      "learning_rate": 4.9628140149845745e-06,
+      "loss": 0.0958,
+      "step": 1175
+    },
+    {
+      "epoch": 0.26298487836949375,
+      "grad_norm": 0.5274895429611206,
+      "learning_rate": 4.9614367562803e-06,
+      "loss": 0.0967,
+      "step": 1200
+    },
+    {
+      "epoch": 0.26298487836949375,
+      "eval_loss": 0.11020273715257645,
+      "eval_runtime": 1336.5505,
+      "eval_samples_per_second": 2.491,
+      "eval_steps_per_second": 0.052,
+      "eval_wer": 14.059019723251279,
+      "step": 1200
+    },
+    {
+      "epoch": 0.26846373000219154,
+      "grad_norm": 0.4892116189002991,
+      "learning_rate": 4.960059497576025e-06,
+      "loss": 0.0929,
+      "step": 1225
+    },
+    {
+      "epoch": 0.2739425816348893,
+      "grad_norm": 0.4998278319835663,
+      "learning_rate": 4.95868223887175e-06,
+      "loss": 0.0948,
+      "step": 1250
+    },
+    {
+      "epoch": 0.2794214332675871,
+      "grad_norm": 0.5273219347000122,
+      "learning_rate": 4.957304980167475e-06,
+      "loss": 0.0907,
+      "step": 1275
+    },
+    {
+      "epoch": 0.2849002849002849,
+      "grad_norm": 0.47056299448013306,
+      "learning_rate": 4.9559277214632e-06,
+      "loss": 0.091,
+      "step": 1300
+    },
+    {
+      "epoch": 0.2903791365329827,
+      "grad_norm": 0.4882357716560364,
+      "learning_rate": 4.9545504627589255e-06,
+      "loss": 0.0941,
+      "step": 1325
+    },
+    {
+      "epoch": 0.2958579881656805,
+      "grad_norm": 0.5165619850158691,
+      "learning_rate": 4.95317320405465e-06,
+      "loss": 0.0921,
+      "step": 1350
+    },
+    {
+      "epoch": 0.3013368397983783,
+      "grad_norm": 0.4642132520675659,
+      "learning_rate": 4.951795945350375e-06,
+      "loss": 0.0914,
+      "step": 1375
+    },
+    {
+      "epoch": 0.30681569143107607,
+      "grad_norm": 0.5326189398765564,
+      "learning_rate": 4.9504186866461e-06,
+      "loss": 0.0959,
+      "step": 1400
+    },
+    {
+      "epoch": 0.31229454306377386,
+      "grad_norm": 0.44957414269447327,
+      "learning_rate": 4.949041427941824e-06,
+      "loss": 0.0871,
+      "step": 1425
+    },
+    {
+      "epoch": 0.31777339469647164,
+      "grad_norm": 0.4865795373916626,
+      "learning_rate": 4.9476641692375496e-06,
+      "loss": 0.0891,
+      "step": 1450
+    },
+    {
+      "epoch": 0.3232522463291694,
+      "grad_norm": 0.49055206775665283,
+      "learning_rate": 4.946286910533275e-06,
+      "loss": 0.0953,
+      "step": 1475
+    },
+    {
+      "epoch": 0.32873109796186717,
+      "grad_norm": 0.49437183141708374,
+      "learning_rate": 4.944909651829e-06,
+      "loss": 0.0896,
+      "step": 1500
+    },
+    {
+      "epoch": 0.32873109796186717,
+      "eval_loss": 0.11099947988986969,
+      "eval_runtime": 1334.6828,
+      "eval_samples_per_second": 2.495,
+      "eval_steps_per_second": 0.052,
+      "eval_wer": 13.836282011333079,
+      "step": 1500
+    },
+    {
+      "epoch": 0.33420994959456496,
+      "grad_norm": 0.5322751998901367,
+      "learning_rate": 4.943532393124725e-06,
+      "loss": 0.0929,
+      "step": 1525
+    },
+    {
+      "epoch": 0.33968880122726275,
+      "grad_norm": 0.5024107098579407,
+      "learning_rate": 4.94215513442045e-06,
+      "loss": 0.0922,
+      "step": 1550
+    },
+    {
+      "epoch": 0.34516765285996054,
+      "grad_norm": 0.4347039759159088,
+      "learning_rate": 4.940777875716175e-06,
+      "loss": 0.0908,
+      "step": 1575
+    },
+    {
+      "epoch": 0.35064650449265833,
+      "grad_norm": 0.5164802074432373,
+      "learning_rate": 4.9394006170119e-06,
+      "loss": 0.0939,
+      "step": 1600
+    },
+    {
+      "epoch": 0.3561253561253561,
+      "grad_norm": 0.4986899793148041,
+      "learning_rate": 4.938023358307625e-06,
+      "loss": 0.0883,
+      "step": 1625
+    },
+    {
+      "epoch": 0.3616042077580539,
+      "grad_norm": 0.5192301869392395,
+      "learning_rate": 4.93664609960335e-06,
+      "loss": 0.0915,
+      "step": 1650
+    },
+    {
+      "epoch": 0.3670830593907517,
+      "grad_norm": 0.5347697734832764,
+      "learning_rate": 4.935268840899075e-06,
+      "loss": 0.0884,
+      "step": 1675
+    },
+    {
+      "epoch": 0.3725619110234495,
+      "grad_norm": 0.47178414463996887,
+      "learning_rate": 4.9338915821947994e-06,
+      "loss": 0.0922,
+      "step": 1700
+    },
+    {
+      "epoch": 0.3780407626561473,
+      "grad_norm": 0.4868011772632599,
+      "learning_rate": 4.932514323490525e-06,
+      "loss": 0.0925,
+      "step": 1725
+    },
+    {
+      "epoch": 0.38351961428884507,
+      "grad_norm": 0.491805762052536,
+      "learning_rate": 4.93113706478625e-06,
+      "loss": 0.091,
+      "step": 1750
+    },
+    {
+      "epoch": 0.38899846592154286,
+      "grad_norm": 0.5111169219017029,
+      "learning_rate": 4.929759806081975e-06,
+      "loss": 0.0888,
+      "step": 1775
+    },
+    {
+      "epoch": 0.39447731755424065,
+      "grad_norm": 0.4957449436187744,
+      "learning_rate": 4.9283825473777e-06,
+      "loss": 0.0907,
+      "step": 1800
+    },
+    {
+      "epoch": 0.39447731755424065,
+      "eval_loss": 0.11147266626358032,
+      "eval_runtime": 1330.9042,
+      "eval_samples_per_second": 2.502,
+      "eval_steps_per_second": 0.053,
+      "eval_wer": 14.133457974125493,
+      "step": 1800
+    },
+    {
+      "epoch": 0.39995616918693844,
+      "grad_norm": 0.46782732009887695,
+      "learning_rate": 4.927005288673425e-06,
+      "loss": 0.0882,
+      "step": 1825
+    },
+    {
+      "epoch": 0.4054350208196362,
+      "grad_norm": 0.4959644079208374,
+      "learning_rate": 4.92562802996915e-06,
+      "loss": 0.0925,
+      "step": 1850
+    },
+    {
+      "epoch": 0.410913872452334,
+      "grad_norm": 0.4934210479259491,
+      "learning_rate": 4.924250771264875e-06,
+      "loss": 0.0901,
+      "step": 1875
+    },
+    {
+      "epoch": 0.41639272408503175,
+      "grad_norm": 0.520613968372345,
+      "learning_rate": 4.9228735125606e-06,
+      "loss": 0.0893,
+      "step": 1900
+    },
+    {
+      "epoch": 0.42187157571772954,
+      "grad_norm": 0.48207858204841614,
+      "learning_rate": 4.921496253856325e-06,
+      "loss": 0.0918,
+      "step": 1925
+    },
+    {
+      "epoch": 0.42735042735042733,
+      "grad_norm": 0.5212067365646362,
+      "learning_rate": 4.920118995152049e-06,
+      "loss": 0.0915,
+      "step": 1950
+    },
+    {
+      "epoch": 0.4328292789831251,
+      "grad_norm": 0.4570591449737549,
+      "learning_rate": 4.9187417364477744e-06,
+      "loss": 0.0879,
+      "step": 1975
+    },
+    {
+      "epoch": 0.4383081306158229,
+      "grad_norm": 0.5075387954711914,
+      "learning_rate": 4.9173644777435e-06,
+      "loss": 0.0921,
+      "step": 2000
+    },
+    {
+      "epoch": 0.4437869822485207,
+      "grad_norm": 0.4904765784740448,
+      "learning_rate": 4.915987219039225e-06,
+      "loss": 0.0892,
+      "step": 2025
+    },
+    {
+      "epoch": 0.4492658338812185,
+      "grad_norm": 0.4949191212654114,
+      "learning_rate": 4.91460996033495e-06,
+      "loss": 0.0909,
+      "step": 2050
+    },
+    {
+      "epoch": 0.4547446855139163,
+      "grad_norm": 0.5112493634223938,
+      "learning_rate": 4.913232701630675e-06,
+      "loss": 0.089,
+      "step": 2075
+    },
+    {
+      "epoch": 0.46022353714661407,
+      "grad_norm": 0.47857844829559326,
+      "learning_rate": 4.9118554429264e-06,
+      "loss": 0.0901,
+      "step": 2100
+    },
+    {
+      "epoch": 0.46022353714661407,
+      "eval_loss": 0.112032450735569,
+      "eval_runtime": 1387.0176,
+      "eval_samples_per_second": 2.401,
+      "eval_steps_per_second": 0.05,
+      "eval_wer": 13.988043716603768,
+      "step": 2100
+    },
+    {
+      "epoch": 0.46570238877931186,
+      "grad_norm": 0.4768081307411194,
+      "learning_rate": 4.9104781842221245e-06,
+      "loss": 0.0874,
+      "step": 2125
+    },
+    {
+      "epoch": 0.47118124041200965,
+      "grad_norm": 0.4740845859050751,
+      "learning_rate": 4.90910092551785e-06,
+      "loss": 0.0885,
+      "step": 2150
+    },
+    {
+      "epoch": 0.47666009204470744,
+      "grad_norm": 0.4519156813621521,
+      "learning_rate": 4.907723666813575e-06,
+      "loss": 0.0867,
+      "step": 2175
+    },
+    {
+      "epoch": 0.48213894367740523,
+      "grad_norm": 0.5068197250366211,
+      "learning_rate": 4.9063464081093e-06,
+      "loss": 0.0878,
+      "step": 2200
+    },
+    {
+      "epoch": 0.487617795310103,
+      "grad_norm": 0.49033084511756897,
+      "learning_rate": 4.904969149405024e-06,
+      "loss": 0.0862,
+      "step": 2225
+    },
+    {
+      "epoch": 0.4930966469428008,
+      "grad_norm": 0.4625925123691559,
+      "learning_rate": 4.9035918907007495e-06,
+      "loss": 0.0866,
+      "step": 2250
+    },
+    {
+      "epoch": 0.4985754985754986,
+      "grad_norm": 0.5056318640708923,
+      "learning_rate": 4.902214631996475e-06,
+      "loss": 0.086,
+      "step": 2275
+    },
+    {
+      "epoch": 0.5040543502081963,
+      "grad_norm": 0.46904438734054565,
+      "learning_rate": 4.9008373732922e-06,
+      "loss": 0.0836,
+      "step": 2300
+    },
+    {
+      "epoch": 0.5095332018408941,
+      "grad_norm": 0.5033324360847473,
+      "learning_rate": 4.899460114587924e-06,
+      "loss": 0.0879,
+      "step": 2325
+    },
+    {
+      "epoch": 0.5150120534735919,
+      "grad_norm": 0.5081333518028259,
+      "learning_rate": 4.898082855883649e-06,
+      "loss": 0.0867,
+      "step": 2350
+    },
+    {
+      "epoch": 0.5204909051062897,
+      "grad_norm": 0.44954633712768555,
+      "learning_rate": 4.896705597179374e-06,
+      "loss": 0.0859,
+      "step": 2375
+    },
+    {
+      "epoch": 0.5259697567389875,
+      "grad_norm": 0.5036991238594055,
+      "learning_rate": 4.8953283384750996e-06,
+      "loss": 0.0823,
+      "step": 2400
+    },
+    {
+      "epoch": 0.5259697567389875,
+      "eval_loss": 0.11307456344366074,
+      "eval_runtime": 1331.1273,
+      "eval_samples_per_second": 2.502,
+      "eval_steps_per_second": 0.053,
+      "eval_wer": 13.977079943219195,
+      "step": 2400
+    },
+    {
+      "epoch": 0.5314486083716853,
+      "grad_norm": 0.48715198040008545,
+      "learning_rate": 4.893951079770825e-06,
+      "loss": 0.0853,
+      "step": 2425
+    },
+    {
+      "epoch": 0.5369274600043831,
+      "grad_norm": 0.5139690041542053,
+      "learning_rate": 4.89257382106655e-06,
+      "loss": 0.0874,
+      "step": 2450
+    },
+    {
+      "epoch": 0.5424063116370809,
+      "grad_norm": 0.49623942375183105,
+      "learning_rate": 4.891196562362275e-06,
+      "loss": 0.0893,
+      "step": 2475
+    },
+    {
+      "epoch": 0.5478851632697787,
+      "grad_norm": 0.5240609645843506,
+      "learning_rate": 4.889819303657999e-06,
+      "loss": 0.0857,
+      "step": 2500
+    },
+    {
+      "epoch": 0.5533640149024764,
+      "grad_norm": 0.5464821457862854,
+      "learning_rate": 4.8884420449537245e-06,
+      "loss": 0.0858,
+      "step": 2525
+    },
+    {
+      "epoch": 0.5588428665351742,
+      "grad_norm": 0.49569082260131836,
+      "learning_rate": 4.88706478624945e-06,
+      "loss": 0.085,
+      "step": 2550
+    },
+    {
+      "epoch": 0.564321718167872,
+      "grad_norm": 0.5617781281471252,
+      "learning_rate": 4.885687527545174e-06,
+      "loss": 0.0861,
+      "step": 2575
+    },
+    {
+      "epoch": 0.5698005698005698,
+      "grad_norm": 0.538022518157959,
+      "learning_rate": 4.884310268840899e-06,
+      "loss": 0.0868,
+      "step": 2600
+    },
+    {
+      "epoch": 0.5752794214332676,
+      "grad_norm": 0.4421217143535614,
+      "learning_rate": 4.882933010136624e-06,
+      "loss": 0.085,
+      "step": 2625
+    },
+    {
+      "epoch": 0.5807582730659654,
+      "grad_norm": 0.4933975040912628,
+      "learning_rate": 4.881555751432349e-06,
+      "loss": 0.0836,
+      "step": 2650
+    },
+    {
+      "epoch": 0.5862371246986632,
+      "grad_norm": 0.5269121527671814,
+      "learning_rate": 4.880178492728075e-06,
+      "loss": 0.0855,
+      "step": 2675
+    },
+    {
+      "epoch": 0.591715976331361,
+      "grad_norm": 0.49818453192710876,
+      "learning_rate": 4.8788012340238e-06,
+      "loss": 0.0818,
+      "step": 2700
+    },
+    {
+      "epoch": 0.591715976331361,
+      "eval_loss": 0.11333612352609634,
+      "eval_runtime": 1354.7808,
+      "eval_samples_per_second": 2.458,
+      "eval_steps_per_second": 0.052,
+      "eval_wer": 14.018049833235235,
+      "step": 2700
+    },
+    {
+      "epoch": 0.5971948279640588,
+      "grad_norm": 0.5359761714935303,
+      "learning_rate": 4.877423975319525e-06,
+      "loss": 0.0803,
+      "step": 2725
+    },
+    {
+      "epoch": 0.6026736795967566,
+      "grad_norm": 0.5219433903694153,
+      "learning_rate": 4.876046716615249e-06,
+      "loss": 0.0835,
+      "step": 2750
+    },
+    {
+      "epoch": 0.6081525312294543,
+      "grad_norm": 0.4877767562866211,
+      "learning_rate": 4.874669457910974e-06,
+      "loss": 0.0827,
+      "step": 2775
+    },
+    {
+      "epoch": 0.6136313828621521,
+      "grad_norm": 0.47034549713134766,
+      "learning_rate": 4.8732921992066995e-06,
+      "loss": 0.0803,
+      "step": 2800
+    },
+    {
+      "epoch": 0.6191102344948499,
+      "grad_norm": 0.5331267714500427,
+      "learning_rate": 4.871914940502425e-06,
+      "loss": 0.0827,
+      "step": 2825
+    },
+    {
+      "epoch": 0.6245890861275477,
+      "grad_norm": 0.5360026955604553,
+      "learning_rate": 4.870537681798149e-06,
+      "loss": 0.0848,
+      "step": 2850
+    },
+    {
+      "epoch": 0.6300679377602455,
+      "grad_norm": 0.5023711323738098,
+      "learning_rate": 4.869160423093874e-06,
+      "loss": 0.0816,
+      "step": 2875
+    },
+    {
+      "epoch": 0.6355467893929433,
+      "grad_norm": 0.43558841943740845,
+      "learning_rate": 4.867783164389599e-06,
+      "loss": 0.0823,
+      "step": 2900
+    },
+    {
+      "epoch": 0.6410256410256411,
+      "grad_norm": 0.52950519323349,
+      "learning_rate": 4.8664059056853244e-06,
+      "loss": 0.0832,
+      "step": 2925
+    },
+    {
+      "epoch": 0.6465044926583388,
+      "grad_norm": 0.49947696924209595,
+      "learning_rate": 4.86502864698105e-06,
+      "loss": 0.0825,
+      "step": 2950
+    },
+    {
+      "epoch": 0.6519833442910365,
+      "grad_norm": 0.4842943549156189,
+      "learning_rate": 4.863651388276775e-06,
+      "loss": 0.0821,
+      "step": 2975
+    },
+    {
+      "epoch": 0.6574621959237343,
+      "grad_norm": 0.6517378091812134,
+      "learning_rate": 4.8622741295725e-06,
+      "loss": 0.0945,
+      "step": 3000
+    },
+    {
+      "epoch": 0.6574621959237343,
+      "eval_loss": 0.11205233633518219,
+      "eval_runtime": 1330.2589,
+      "eval_samples_per_second": 2.503,
+      "eval_steps_per_second": 0.053,
+      "eval_wer": 13.976502902514742,
+      "step": 3000
+    },
+    {
+      "epoch": 0.6629410475564321,
+      "grad_norm": 0.5949074029922485,
+      "learning_rate": 4.860896870868224e-06,
+      "loss": 0.1253,
+      "step": 3025
+    },
+    {
+      "epoch": 0.6684198991891299,
+      "grad_norm": 0.5628743171691895,
+      "learning_rate": 4.859519612163949e-06,
+      "loss": 0.1232,
+      "step": 3050
+    },
+    {
+      "epoch": 0.6738987508218277,
+      "grad_norm": 0.5839057564735413,
+      "learning_rate": 4.8581423534596745e-06,
+      "loss": 0.1228,
+      "step": 3075
+    },
+    {
+      "epoch": 0.6793776024545255,
+      "grad_norm": 0.5640609860420227,
+      "learning_rate": 4.856765094755399e-06,
+      "loss": 0.1181,
+      "step": 3100
+    },
+    {
+      "epoch": 0.6848564540872233,
+      "grad_norm": 0.6778563261032104,
+      "learning_rate": 4.855387836051124e-06,
+      "loss": 0.1208,
+      "step": 3125
+    },
+    {
+      "epoch": 0.6903353057199211,
+      "grad_norm": 0.603071928024292,
+      "learning_rate": 4.854010577346849e-06,
+      "loss": 0.1193,
+      "step": 3150
+    },
+    {
+      "epoch": 0.6958141573526189,
+      "grad_norm": 0.6698121428489685,
+      "learning_rate": 4.852633318642574e-06,
+      "loss": 0.1209,
+      "step": 3175
+    },
+    {
+      "epoch": 0.7012930089853167,
+      "grad_norm": 0.5631791353225708,
+      "learning_rate": 4.8512560599382995e-06,
+      "loss": 0.1223,
+      "step": 3200
+    },
+    {
+      "epoch": 0.7067718606180144,
+      "grad_norm": 0.5904573798179626,
+      "learning_rate": 4.849878801234025e-06,
+      "loss": 0.119,
+      "step": 3225
+    },
+    {
+      "epoch": 0.7122507122507122,
+      "grad_norm": 0.6524720788002014,
+      "learning_rate": 4.84850154252975e-06,
+      "loss": 0.1194,
+      "step": 3250
+    },
+    {
+      "epoch": 0.71772956388341,
+      "grad_norm": 0.6679468154907227,
+      "learning_rate": 4.847124283825474e-06,
+      "loss": 0.1224,
+      "step": 3275
+    },
+    {
+      "epoch": 0.7232084155161078,
+      "grad_norm": 0.5249156951904297,
+      "learning_rate": 4.845747025121199e-06,
+      "loss": 0.1193,
+      "step": 3300
+    },
+    {
+      "epoch": 0.7232084155161078,
+      "eval_loss": 0.10731059312820435,
+      "eval_runtime": 1334.5691,
+      "eval_samples_per_second": 2.495,
+      "eval_steps_per_second": 0.052,
+      "eval_wer": 13.910143221502844,
+      "step": 3300
+    },
+    {
+      "epoch": 0.7286872671488056,
+      "grad_norm": 0.6170015931129456,
+      "learning_rate": 4.844369766416924e-06,
+      "loss": 0.118,
+      "step": 3325
+    },
+    {
+      "epoch": 0.7341661187815034,
+      "grad_norm": 0.622870922088623,
+      "learning_rate": 4.8429925077126496e-06,
+      "loss": 0.1213,
+      "step": 3350
+    },
+    {
+      "epoch": 0.7396449704142012,
+      "grad_norm": 0.6220366358757019,
+      "learning_rate": 4.841615249008374e-06,
+      "loss": 0.1199,
+      "step": 3375
+    },
+    {
+      "epoch": 0.745123822046899,
+      "grad_norm": 0.6058914661407471,
+      "learning_rate": 4.840237990304099e-06,
+      "loss": 0.1177,
+      "step": 3400
+    },
+    {
+      "epoch": 0.7506026736795968,
+      "grad_norm": 0.618475079536438,
+      "learning_rate": 4.838860731599824e-06,
+      "loss": 0.1193,
+      "step": 3425
+    },
+    {
+      "epoch": 0.7560815253122946,
+      "grad_norm": 0.6013332009315491,
+      "learning_rate": 4.8374834728955485e-06,
+      "loss": 0.1185,
+      "step": 3450
+    },
+    {
+      "epoch": 0.7615603769449923,
+      "grad_norm": 0.5604269504547119,
+      "learning_rate": 4.836106214191274e-06,
+      "loss": 0.1169,
+      "step": 3475
+    },
+    {
+      "epoch": 0.7670392285776901,
+      "grad_norm": 0.5583498477935791,
+      "learning_rate": 4.834728955486999e-06,
+      "loss": 0.1205,
+      "step": 3500
+    },
+    {
+      "epoch": 0.7725180802103879,
+      "grad_norm": 0.5525631904602051,
+      "learning_rate": 4.833351696782724e-06,
+      "loss": 0.1192,
+      "step": 3525
+    },
+    {
+      "epoch": 0.7779969318430857,
+      "grad_norm": 0.5905235409736633,
+      "learning_rate": 4.831974438078449e-06,
+      "loss": 0.1192,
+      "step": 3550
+    },
+    {
+      "epoch": 0.7834757834757835,
+      "grad_norm": 0.5701056122779846,
+      "learning_rate": 4.830597179374174e-06,
+      "loss": 0.1189,
+      "step": 3575
+    },
+    {
+      "epoch": 0.7889546351084813,
+      "grad_norm": 0.5544924139976501,
+      "learning_rate": 4.829219920669899e-06,
+      "loss": 0.1179,
+      "step": 3600
+    },
+    {
+      "epoch": 0.7889546351084813,
+      "eval_loss": 0.10721833258867264,
+      "eval_runtime": 1334.3717,
+      "eval_samples_per_second": 2.496,
+      "eval_steps_per_second": 0.052,
+      "eval_wer": 13.76934528961673,
+      "step": 3600
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 91260,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
+  "save_steps": 300,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.527210695946341e+20,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff