Mohamed2210 commited on Feb 19

Commit

f56d59e

verified ·

1 Parent(s): 1df81c3

Initial commit of wav2vec2 model

Browse files

Files changed (28) hide show

README.md +63 -0
added_tokens.json +4 -0
checkpoint-3600/config.json +116 -0
checkpoint-3600/model.safetensors +3 -0
checkpoint-3600/optimizer.pt +3 -0
checkpoint-3600/preprocessor_config.json +9 -0
checkpoint-3600/rng_state.pth +3 -0
checkpoint-3600/scaler.pt +3 -0
checkpoint-3600/scheduler.pt +3 -0
checkpoint-3600/trainer_state.json +1122 -0
checkpoint-3600/training_args.bin +3 -0
checkpoint-4000/config.json +116 -0
checkpoint-4000/model.safetensors +3 -0
checkpoint-4000/optimizer.pt +3 -0
checkpoint-4000/preprocessor_config.json +9 -0
checkpoint-4000/rng_state.pth +3 -0
checkpoint-4000/scaler.pt +3 -0
checkpoint-4000/scheduler.pt +3 -0
checkpoint-4000/trainer_state.json +1243 -0
checkpoint-4000/training_args.bin +3 -0
config.json +116 -0
model.safetensors +3 -0
preprocessor_config.json +9 -0
runs/Feb18_22-23-26_893f794f09cc/events.out.tfevents.1739918097.893f794f09cc.31.0 +3 -0
special_tokens_map.json +6 -0
tokenizer_config.json +48 -0
training_args.bin +3 -0
vocab.json +58 -0

README.md ADDED Viewed

	@@ -0,0 +1,63 @@

+---
+library_name: transformers
+license: apache-2.0
+base_model: jonatasgrosman/wav2vec2-large-xlsr-53-arabic
+tags:
+- generated_from_trainer
+datasets:
+- common_voice_11_0
+model-index:
+- name: wav2vec2-large-xlsr-ar
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# wav2vec2-large-xlsr-ar
+This model is a fine-tuned version of [jonatasgrosman/wav2vec2-large-xlsr-53-arabic](https://huggingface.co/jonatasgrosman/wav2vec2-large-xlsr-53-arabic) on the common_voice_11_0 dataset.
+It achieves the following results on the evaluation set:
+- eval_loss: 0.8298
+- eval_wer: 0.6859
+- eval_runtime: 192.8428
+- eval_samples_per_second: 15.515
+- eval_steps_per_second: 1.939
+- epoch: 5.1217
+- step: 4000
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0003
+- train_batch_size: 16
+- eval_batch_size: 8
+- seed: 42
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 32
+- optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 500
+- num_epochs: 10
+- mixed_precision_training: Native AMP
+### Framework versions
+- Transformers 4.49.0
+- Pytorch 2.5.1+cu121
+- Datasets 3.3.1
+- Tokenizers 0.21.0

added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "</s>": 57,
+  "<s>": 56
+}

checkpoint-3600/config.json ADDED Viewed

	@@ -0,0 +1,116 @@

+{
+  "_name_or_path": "jonatasgrosman/wav2vec2-large-xlsr-53-arabic",
+  "activation_dropout": 0.05,
+  "adapter_attn_dim": null,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForCTC"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 256,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": true,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.0,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_channel_length": 10,
+  "mask_channel_min_space": 1,
+  "mask_channel_other": 0.0,
+  "mask_channel_prob": 0.0,
+  "mask_channel_selection": "static",
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_min_space": 1,
+  "mask_time_other": 0.0,
+  "mask_time_prob": 0.05,
+  "mask_time_selection": "static",
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 24,
+  "num_negatives": 100,
+  "output_hidden_size": 1024,
+  "pad_token_id": 55,
+  "proj_codevector_dim": 256,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "use_weighted_layer_sum": false,
+  "vocab_size": 58,
+  "xvector_output_dim": 512
+}

checkpoint-3600/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d774b699a30727eb33a5e7217da3e149f61b2d95b6bd7b5e736dc668157ca205
+size 1262045280

checkpoint-3600/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3ed6a832e72a245371d5398a7bd48dca9483460443752acdfa374b33280553c5
+size 2490635318

checkpoint-3600/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}

checkpoint-3600/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:771b93fdea688e3ab7e29159ff3cadf87094f642ead96ea353a019167376a1c7
+size 14244

checkpoint-3600/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fa16ed8883894544f2c7e1b4e8394eab28d70a7345c1739e0f82c96289b9ceee
+size 988

checkpoint-3600/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f0a13d8337e5762d7c6ad79cc94c41699eda28a00da0ddc051c003391dec30b3
+size 1064

checkpoint-3600/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1122 @@

+{
+  "best_metric": 0.678503046127067,
+  "best_model_checkpoint": "./wav2vec2-large-xlsr-ar/checkpoint-3600",
+  "epoch": 4.609865470852018,
+  "eval_steps": 400,
+  "global_step": 3600,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.032030749519538756,
+      "grad_norm": 40.31281661987305,
+      "learning_rate": 1.3799999999999998e-05,
+      "loss": 25.5997,
+      "step": 25
+    },
+    {
+      "epoch": 0.06406149903907751,
+      "grad_norm": 70.23408508300781,
+      "learning_rate": 2.7599999999999997e-05,
+      "loss": 30.9717,
+      "step": 50
+    },
+    {
+      "epoch": 0.09609224855861627,
+      "grad_norm": 3.012640953063965,
+      "learning_rate": 4.259999999999999e-05,
+      "loss": 9.1896,
+      "step": 75
+    },
+    {
+      "epoch": 0.12812299807815503,
+      "grad_norm": 30.363168716430664,
+      "learning_rate": 5.76e-05,
+      "loss": 12.5434,
+      "step": 100
+    },
+    {
+      "epoch": 0.1601537475976938,
+      "grad_norm": 13.795882225036621,
+      "learning_rate": 7.259999999999999e-05,
+      "loss": 5.814,
+      "step": 125
+    },
+    {
+      "epoch": 0.19218449711723254,
+      "grad_norm": 99.32404327392578,
+      "learning_rate": 8.759999999999999e-05,
+      "loss": 9.7835,
+      "step": 150
+    },
+    {
+      "epoch": 0.2242152466367713,
+      "grad_norm": 4.108926773071289,
+      "learning_rate": 0.0001026,
+      "loss": 5.1191,
+      "step": 175
+    },
+    {
+      "epoch": 0.25624599615631005,
+      "grad_norm": 7.492372512817383,
+      "learning_rate": 0.0001176,
+      "loss": 3.7315,
+      "step": 200
+    },
+    {
+      "epoch": 0.2882767456758488,
+      "grad_norm": 7.183516502380371,
+      "learning_rate": 0.0001326,
+      "loss": 3.6219,
+      "step": 225
+    },
+    {
+      "epoch": 0.3203074951953876,
+      "grad_norm": 2.490111827850342,
+      "learning_rate": 0.00014759999999999998,
+      "loss": 3.6824,
+      "step": 250
+    },
+    {
+      "epoch": 0.3523382447149263,
+      "grad_norm": 0.5032612681388855,
+      "learning_rate": 0.0001626,
+      "loss": 3.5972,
+      "step": 275
+    },
+    {
+      "epoch": 0.3843689942344651,
+      "grad_norm": 3.7791531085968018,
+      "learning_rate": 0.00017759999999999998,
+      "loss": 3.6051,
+      "step": 300
+    },
+    {
+      "epoch": 0.41639974375400385,
+      "grad_norm": 2.6355323791503906,
+      "learning_rate": 0.0001926,
+      "loss": 3.5084,
+      "step": 325
+    },
+    {
+      "epoch": 0.4484304932735426,
+      "grad_norm": 1.8424248695373535,
+      "learning_rate": 0.00020759999999999998,
+      "loss": 3.6254,
+      "step": 350
+    },
+    {
+      "epoch": 0.48046124279308133,
+      "grad_norm": 2.8042099475860596,
+      "learning_rate": 0.0002226,
+      "loss": 3.5539,
+      "step": 375
+    },
+    {
+      "epoch": 0.5124919923126201,
+      "grad_norm": 1.1292295455932617,
+      "learning_rate": 0.0002376,
+      "loss": 3.5401,
+      "step": 400
+    },
+    {
+      "epoch": 0.5124919923126201,
+      "eval_loss": 3.4790358543395996,
+      "eval_runtime": 190.1673,
+      "eval_samples_per_second": 15.734,
+      "eval_steps_per_second": 1.967,
+      "eval_wer": 1.0,
+      "step": 400
+    },
+    {
+      "epoch": 0.5445227418321589,
+      "grad_norm": 5.499414443969727,
+      "learning_rate": 0.00025259999999999996,
+      "loss": 3.4746,
+      "step": 425
+    },
+    {
+      "epoch": 0.5765534913516976,
+      "grad_norm": 3.2174575328826904,
+      "learning_rate": 0.0002676,
+      "loss": 3.5553,
+      "step": 450
+    },
+    {
+      "epoch": 0.6085842408712364,
+      "grad_norm": 0.8652946352958679,
+      "learning_rate": 0.0002826,
+      "loss": 3.4252,
+      "step": 475
+    },
+    {
+      "epoch": 0.6406149903907752,
+      "grad_norm": 2.645606517791748,
+      "learning_rate": 0.00029759999999999997,
+      "loss": 3.5508,
+      "step": 500
+    },
+    {
+      "epoch": 0.672645739910314,
+      "grad_norm": 0.9275538921356201,
+      "learning_rate": 0.00029913698630136987,
+      "loss": 3.4857,
+      "step": 525
+    },
+    {
+      "epoch": 0.7046764894298526,
+      "grad_norm": 1.0166261196136475,
+      "learning_rate": 0.00029810958904109586,
+      "loss": 3.5138,
+      "step": 550
+    },
+    {
+      "epoch": 0.7367072389493914,
+      "grad_norm": 4.685708045959473,
+      "learning_rate": 0.0002970821917808219,
+      "loss": 3.4017,
+      "step": 575
+    },
+    {
+      "epoch": 0.7687379884689302,
+      "grad_norm": 1.1367671489715576,
+      "learning_rate": 0.0002960547945205479,
+      "loss": 3.4927,
+      "step": 600
+    },
+    {
+      "epoch": 0.8007687379884689,
+      "grad_norm": 2.971071481704712,
+      "learning_rate": 0.00029502739726027395,
+      "loss": 3.4265,
+      "step": 625
+    },
+    {
+      "epoch": 0.8327994875080077,
+      "grad_norm": 2.657762289047241,
+      "learning_rate": 0.000294,
+      "loss": 3.4645,
+      "step": 650
+    },
+    {
+      "epoch": 0.8648302370275465,
+      "grad_norm": 2.87245774269104,
+      "learning_rate": 0.000292972602739726,
+      "loss": 3.3777,
+      "step": 675
+    },
+    {
+      "epoch": 0.8968609865470852,
+      "grad_norm": 1.2535868883132935,
+      "learning_rate": 0.00029194520547945203,
+      "loss": 3.4021,
+      "step": 700
+    },
+    {
+      "epoch": 0.928891736066624,
+      "grad_norm": 0.7670681476593018,
+      "learning_rate": 0.00029091780821917807,
+      "loss": 3.3667,
+      "step": 725
+    },
+    {
+      "epoch": 0.9609224855861627,
+      "grad_norm": 1.4213225841522217,
+      "learning_rate": 0.0002898904109589041,
+      "loss": 3.3387,
+      "step": 750
+    },
+    {
+      "epoch": 0.9929532351057014,
+      "grad_norm": 1.161726951599121,
+      "learning_rate": 0.0002888630136986301,
+      "loss": 2.7481,
+      "step": 775
+    },
+    {
+      "epoch": 1.0243433696348494,
+      "grad_norm": 2.047264337539673,
+      "learning_rate": 0.00028783561643835616,
+      "loss": 2.002,
+      "step": 800
+    },
+    {
+      "epoch": 1.0243433696348494,
+      "eval_loss": 1.6117621660232544,
+      "eval_runtime": 190.6575,
+      "eval_samples_per_second": 15.693,
+      "eval_steps_per_second": 1.962,
+      "eval_wer": 0.9870322019147084,
+      "step": 800
+    },
+    {
+      "epoch": 1.0563741191543883,
+      "grad_norm": 3.6471316814422607,
+      "learning_rate": 0.00028680821917808215,
+      "loss": 1.7004,
+      "step": 825
+    },
+    {
+      "epoch": 1.088404868673927,
+      "grad_norm": 1.8413678407669067,
+      "learning_rate": 0.0002857808219178082,
+      "loss": 1.4543,
+      "step": 850
+    },
+    {
+      "epoch": 1.1204356181934658,
+      "grad_norm": 2.3082125186920166,
+      "learning_rate": 0.00028475342465753424,
+      "loss": 1.3853,
+      "step": 875
+    },
+    {
+      "epoch": 1.1524663677130045,
+      "grad_norm": 1.7227452993392944,
+      "learning_rate": 0.00028372602739726023,
+      "loss": 1.336,
+      "step": 900
+    },
+    {
+      "epoch": 1.1844971172325431,
+      "grad_norm": 2.1250977516174316,
+      "learning_rate": 0.0002826986301369863,
+      "loss": 1.349,
+      "step": 925
+    },
+    {
+      "epoch": 1.216527866752082,
+      "grad_norm": 1.6314936876296997,
+      "learning_rate": 0.0002816712328767123,
+      "loss": 1.2656,
+      "step": 950
+    },
+    {
+      "epoch": 1.2485586162716207,
+      "grad_norm": 2.001681089401245,
+      "learning_rate": 0.00028064383561643837,
+      "loss": 1.2556,
+      "step": 975
+    },
+    {
+      "epoch": 1.2805893657911596,
+      "grad_norm": 2.1396918296813965,
+      "learning_rate": 0.00027961643835616436,
+      "loss": 1.1932,
+      "step": 1000
+    },
+    {
+      "epoch": 1.3126201153106982,
+      "grad_norm": 1.9855870008468628,
+      "learning_rate": 0.0002785890410958904,
+      "loss": 1.1991,
+      "step": 1025
+    },
+    {
+      "epoch": 1.344650864830237,
+      "grad_norm": 1.1789072751998901,
+      "learning_rate": 0.0002775616438356164,
+      "loss": 1.1041,
+      "step": 1050
+    },
+    {
+      "epoch": 1.3766816143497758,
+      "grad_norm": 2.304903507232666,
+      "learning_rate": 0.00027653424657534244,
+      "loss": 1.1538,
+      "step": 1075
+    },
+    {
+      "epoch": 1.4087123638693146,
+      "grad_norm": 2.459096670150757,
+      "learning_rate": 0.00027550684931506843,
+      "loss": 1.2379,
+      "step": 1100
+    },
+    {
+      "epoch": 1.4407431133888533,
+      "grad_norm": 1.886155605316162,
+      "learning_rate": 0.00027447945205479453,
+      "loss": 1.1326,
+      "step": 1125
+    },
+    {
+      "epoch": 1.472773862908392,
+      "grad_norm": 1.1992785930633545,
+      "learning_rate": 0.0002734520547945205,
+      "loss": 1.0691,
+      "step": 1150
+    },
+    {
+      "epoch": 1.5048046124279308,
+      "grad_norm": 3.249142646789551,
+      "learning_rate": 0.00027242465753424657,
+      "loss": 1.0511,
+      "step": 1175
+    },
+    {
+      "epoch": 1.5368353619474697,
+      "grad_norm": 3.3806302547454834,
+      "learning_rate": 0.00027139726027397256,
+      "loss": 1.0618,
+      "step": 1200
+    },
+    {
+      "epoch": 1.5368353619474697,
+      "eval_loss": 1.0360716581344604,
+      "eval_runtime": 191.2082,
+      "eval_samples_per_second": 15.648,
+      "eval_steps_per_second": 1.956,
+      "eval_wer": 0.8269799825935596,
+      "step": 1200
+    },
+    {
+      "epoch": 1.5688661114670084,
+      "grad_norm": 2.508125066757202,
+      "learning_rate": 0.0002703698630136986,
+      "loss": 1.0652,
+      "step": 1225
+    },
+    {
+      "epoch": 1.600896860986547,
+      "grad_norm": 1.3717399835586548,
+      "learning_rate": 0.00026934246575342465,
+      "loss": 1.0354,
+      "step": 1250
+    },
+    {
+      "epoch": 1.6329276105060857,
+      "grad_norm": 1.5011590719223022,
+      "learning_rate": 0.00026831506849315064,
+      "loss": 1.0427,
+      "step": 1275
+    },
+    {
+      "epoch": 1.6649583600256246,
+      "grad_norm": 1.6448092460632324,
+      "learning_rate": 0.0002672876712328767,
+      "loss": 1.0498,
+      "step": 1300
+    },
+    {
+      "epoch": 1.6969891095451635,
+      "grad_norm": 1.4456716775894165,
+      "learning_rate": 0.0002662602739726027,
+      "loss": 1.0481,
+      "step": 1325
+    },
+    {
+      "epoch": 1.7290198590647021,
+      "grad_norm": 1.2869809865951538,
+      "learning_rate": 0.0002652328767123288,
+      "loss": 1.0271,
+      "step": 1350
+    },
+    {
+      "epoch": 1.7610506085842408,
+      "grad_norm": 4.315392971038818,
+      "learning_rate": 0.00026420547945205477,
+      "loss": 0.978,
+      "step": 1375
+    },
+    {
+      "epoch": 1.7930813581037797,
+      "grad_norm": 1.3269984722137451,
+      "learning_rate": 0.0002631780821917808,
+      "loss": 0.9891,
+      "step": 1400
+    },
+    {
+      "epoch": 1.8251121076233185,
+      "grad_norm": 1.6529700756072998,
+      "learning_rate": 0.0002621506849315068,
+      "loss": 0.9917,
+      "step": 1425
+    },
+    {
+      "epoch": 1.8571428571428572,
+      "grad_norm": 2.1668319702148438,
+      "learning_rate": 0.00026112328767123285,
+      "loss": 0.9745,
+      "step": 1450
+    },
+    {
+      "epoch": 1.8891736066623959,
+      "grad_norm": 6.553292751312256,
+      "learning_rate": 0.0002600958904109589,
+      "loss": 0.9898,
+      "step": 1475
+    },
+    {
+      "epoch": 1.9212043561819345,
+      "grad_norm": 1.2242108583450317,
+      "learning_rate": 0.0002590684931506849,
+      "loss": 0.9363,
+      "step": 1500
+    },
+    {
+      "epoch": 1.9532351057014734,
+      "grad_norm": 2.026926040649414,
+      "learning_rate": 0.00025804109589041093,
+      "loss": 0.9731,
+      "step": 1525
+    },
+    {
+      "epoch": 1.9852658552210123,
+      "grad_norm": 1.560719609260559,
+      "learning_rate": 0.000257013698630137,
+      "loss": 0.8685,
+      "step": 1550
+    },
+    {
+      "epoch": 2.01665598975016,
+      "grad_norm": 1.0007785558700562,
+      "learning_rate": 0.000255986301369863,
+      "loss": 0.8586,
+      "step": 1575
+    },
+    {
+      "epoch": 2.048686739269699,
+      "grad_norm": 1.0924744606018066,
+      "learning_rate": 0.000254958904109589,
+      "loss": 0.8025,
+      "step": 1600
+    },
+    {
+      "epoch": 2.048686739269699,
+      "eval_loss": 0.8233081102371216,
+      "eval_runtime": 191.163,
+      "eval_samples_per_second": 15.652,
+      "eval_steps_per_second": 1.956,
+      "eval_wer": 0.751348999129678,
+      "step": 1600
+    },
+    {
+      "epoch": 2.0807174887892375,
+      "grad_norm": 1.9123071432113647,
+      "learning_rate": 0.00025393150684931506,
+      "loss": 0.8919,
+      "step": 1625
+    },
+    {
+      "epoch": 2.1127482383087766,
+      "grad_norm": 1.7388331890106201,
+      "learning_rate": 0.00025290410958904105,
+      "loss": 0.8667,
+      "step": 1650
+    },
+    {
+      "epoch": 2.144778987828315,
+      "grad_norm": 1.247045874595642,
+      "learning_rate": 0.0002518767123287671,
+      "loss": 0.8716,
+      "step": 1675
+    },
+    {
+      "epoch": 2.176809737347854,
+      "grad_norm": 2.1057279109954834,
+      "learning_rate": 0.00025084931506849314,
+      "loss": 0.7654,
+      "step": 1700
+    },
+    {
+      "epoch": 2.2088404868673925,
+      "grad_norm": 1.0074440240859985,
+      "learning_rate": 0.00024982191780821913,
+      "loss": 0.8732,
+      "step": 1725
+    },
+    {
+      "epoch": 2.2408712363869316,
+      "grad_norm": 2.159853219985962,
+      "learning_rate": 0.0002487945205479452,
+      "loss": 0.7941,
+      "step": 1750
+    },
+    {
+      "epoch": 2.2729019859064703,
+      "grad_norm": 1.8050284385681152,
+      "learning_rate": 0.0002477671232876712,
+      "loss": 0.8496,
+      "step": 1775
+    },
+    {
+      "epoch": 2.304932735426009,
+      "grad_norm": 1.3987536430358887,
+      "learning_rate": 0.00024673972602739727,
+      "loss": 0.8332,
+      "step": 1800
+    },
+    {
+      "epoch": 2.3369634849455476,
+      "grad_norm": 2.5281410217285156,
+      "learning_rate": 0.00024571232876712326,
+      "loss": 0.8135,
+      "step": 1825
+    },
+    {
+      "epoch": 2.3689942344650863,
+      "grad_norm": 1.914908766746521,
+      "learning_rate": 0.0002446849315068493,
+      "loss": 0.8067,
+      "step": 1850
+    },
+    {
+      "epoch": 2.4010249839846254,
+      "grad_norm": 1.1049237251281738,
+      "learning_rate": 0.00024365753424657533,
+      "loss": 0.8027,
+      "step": 1875
+    },
+    {
+      "epoch": 2.433055733504164,
+      "grad_norm": 2.2418999671936035,
+      "learning_rate": 0.00024263013698630134,
+      "loss": 0.7857,
+      "step": 1900
+    },
+    {
+      "epoch": 2.4650864830237027,
+      "grad_norm": 1.4093470573425293,
+      "learning_rate": 0.00024160273972602736,
+      "loss": 0.8265,
+      "step": 1925
+    },
+    {
+      "epoch": 2.4971172325432414,
+      "grad_norm": 1.396606206893921,
+      "learning_rate": 0.0002405753424657534,
+      "loss": 0.7601,
+      "step": 1950
+    },
+    {
+      "epoch": 2.5291479820627805,
+      "grad_norm": 1.2854044437408447,
+      "learning_rate": 0.00023954794520547945,
+      "loss": 0.8408,
+      "step": 1975
+    },
+    {
+      "epoch": 2.561178731582319,
+      "grad_norm": 2.2053070068359375,
+      "learning_rate": 0.00023852054794520547,
+      "loss": 0.7199,
+      "step": 2000
+    },
+    {
+      "epoch": 2.561178731582319,
+      "eval_loss": 0.7817878127098083,
+      "eval_runtime": 192.0652,
+      "eval_samples_per_second": 15.578,
+      "eval_steps_per_second": 1.947,
+      "eval_wer": 0.7203655352480418,
+      "step": 2000
+    },
+    {
+      "epoch": 2.593209481101858,
+      "grad_norm": 1.6103401184082031,
+      "learning_rate": 0.0002374931506849315,
+      "loss": 0.7848,
+      "step": 2025
+    },
+    {
+      "epoch": 2.6252402306213964,
+      "grad_norm": 3.00805401802063,
+      "learning_rate": 0.0002364657534246575,
+      "loss": 0.7485,
+      "step": 2050
+    },
+    {
+      "epoch": 2.657270980140935,
+      "grad_norm": 1.0826023817062378,
+      "learning_rate": 0.00023543835616438353,
+      "loss": 0.8131,
+      "step": 2075
+    },
+    {
+      "epoch": 2.689301729660474,
+      "grad_norm": 2.3294951915740967,
+      "learning_rate": 0.00023441095890410955,
+      "loss": 0.7616,
+      "step": 2100
+    },
+    {
+      "epoch": 2.721332479180013,
+      "grad_norm": 1.232429027557373,
+      "learning_rate": 0.0002333835616438356,
+      "loss": 0.7925,
+      "step": 2125
+    },
+    {
+      "epoch": 2.7533632286995515,
+      "grad_norm": 1.8985693454742432,
+      "learning_rate": 0.00023235616438356164,
+      "loss": 0.7829,
+      "step": 2150
+    },
+    {
+      "epoch": 2.78539397821909,
+      "grad_norm": 1.1546630859375,
+      "learning_rate": 0.00023132876712328765,
+      "loss": 0.8053,
+      "step": 2175
+    },
+    {
+      "epoch": 2.8174247277386293,
+      "grad_norm": 1.2817527055740356,
+      "learning_rate": 0.00023030136986301367,
+      "loss": 0.7452,
+      "step": 2200
+    },
+    {
+      "epoch": 2.849455477258168,
+      "grad_norm": 1.7231945991516113,
+      "learning_rate": 0.00022927397260273972,
+      "loss": 0.7817,
+      "step": 2225
+    },
+    {
+      "epoch": 2.8814862267777066,
+      "grad_norm": 2.686530113220215,
+      "learning_rate": 0.00022824657534246574,
+      "loss": 0.7506,
+      "step": 2250
+    },
+    {
+      "epoch": 2.9135169762972453,
+      "grad_norm": 1.3286162614822388,
+      "learning_rate": 0.00022721917808219176,
+      "loss": 0.7947,
+      "step": 2275
+    },
+    {
+      "epoch": 2.945547725816784,
+      "grad_norm": 1.3969508409500122,
+      "learning_rate": 0.00022619178082191777,
+      "loss": 0.7174,
+      "step": 2300
+    },
+    {
+      "epoch": 2.977578475336323,
+      "grad_norm": 1.835070013999939,
+      "learning_rate": 0.0002251643835616438,
+      "loss": 0.7531,
+      "step": 2325
+    },
+    {
+      "epoch": 3.008968609865471,
+      "grad_norm": 2.8138489723205566,
+      "learning_rate": 0.00022413698630136986,
+      "loss": 0.7638,
+      "step": 2350
+    },
+    {
+      "epoch": 3.0409993593850095,
+      "grad_norm": 1.0872498750686646,
+      "learning_rate": 0.00022310958904109588,
+      "loss": 0.6102,
+      "step": 2375
+    },
+    {
+      "epoch": 3.073030108904548,
+      "grad_norm": 2.0995171070098877,
+      "learning_rate": 0.0002220821917808219,
+      "loss": 0.675,
+      "step": 2400
+    },
+    {
+      "epoch": 3.073030108904548,
+      "eval_loss": 0.8002874255180359,
+      "eval_runtime": 191.1551,
+      "eval_samples_per_second": 15.652,
+      "eval_steps_per_second": 1.957,
+      "eval_wer": 0.7121845082680592,
+      "step": 2400
+    },
+    {
+      "epoch": 3.1050608584240873,
+      "grad_norm": 1.8755576610565186,
+      "learning_rate": 0.00022105479452054792,
+      "loss": 0.5922,
+      "step": 2425
+    },
+    {
+      "epoch": 3.137091607943626,
+      "grad_norm": 0.8880970478057861,
+      "learning_rate": 0.00022002739726027397,
+      "loss": 0.6929,
+      "step": 2450
+    },
+    {
+      "epoch": 3.1691223574631646,
+      "grad_norm": 2.357203722000122,
+      "learning_rate": 0.00021899999999999998,
+      "loss": 0.5997,
+      "step": 2475
+    },
+    {
+      "epoch": 3.2011531069827033,
+      "grad_norm": 0.9104003310203552,
+      "learning_rate": 0.000217972602739726,
+      "loss": 0.728,
+      "step": 2500
+    },
+    {
+      "epoch": 3.233183856502242,
+      "grad_norm": 1.5452452898025513,
+      "learning_rate": 0.00021694520547945202,
+      "loss": 0.5799,
+      "step": 2525
+    },
+    {
+      "epoch": 3.265214606021781,
+      "grad_norm": 0.8508313894271851,
+      "learning_rate": 0.00021591780821917807,
+      "loss": 0.6859,
+      "step": 2550
+    },
+    {
+      "epoch": 3.2972453555413197,
+      "grad_norm": 1.8509936332702637,
+      "learning_rate": 0.0002148904109589041,
+      "loss": 0.6178,
+      "step": 2575
+    },
+    {
+      "epoch": 3.3292761050608584,
+      "grad_norm": 1.355774998664856,
+      "learning_rate": 0.00021386301369863013,
+      "loss": 0.6908,
+      "step": 2600
+    },
+    {
+      "epoch": 3.361306854580397,
+      "grad_norm": 1.322189211845398,
+      "learning_rate": 0.00021283561643835615,
+      "loss": 0.6134,
+      "step": 2625
+    },
+    {
+      "epoch": 3.393337604099936,
+      "grad_norm": 1.606176495552063,
+      "learning_rate": 0.00021180821917808217,
+      "loss": 0.6974,
+      "step": 2650
+    },
+    {
+      "epoch": 3.425368353619475,
+      "grad_norm": 1.7283929586410522,
+      "learning_rate": 0.00021078082191780818,
+      "loss": 0.6069,
+      "step": 2675
+    },
+    {
+      "epoch": 3.4573991031390134,
+      "grad_norm": 1.2916626930236816,
+      "learning_rate": 0.00020975342465753423,
+      "loss": 0.7165,
+      "step": 2700
+    },
+    {
+      "epoch": 3.489429852658552,
+      "grad_norm": 1.8512370586395264,
+      "learning_rate": 0.00020872602739726025,
+      "loss": 0.6287,
+      "step": 2725
+    },
+    {
+      "epoch": 3.5214606021780908,
+      "grad_norm": 3.614025354385376,
+      "learning_rate": 0.0002076986301369863,
+      "loss": 0.6982,
+      "step": 2750
+    },
+    {
+      "epoch": 3.55349135169763,
+      "grad_norm": 1.3554790019989014,
+      "learning_rate": 0.0002066712328767123,
+      "loss": 0.604,
+      "step": 2775
+    },
+    {
+      "epoch": 3.5855221012171685,
+      "grad_norm": 1.638237476348877,
+      "learning_rate": 0.00020564383561643836,
+      "loss": 0.7113,
+      "step": 2800
+    },
+    {
+      "epoch": 3.5855221012171685,
+      "eval_loss": 0.7320713996887207,
+      "eval_runtime": 192.2342,
+      "eval_samples_per_second": 15.564,
+      "eval_steps_per_second": 1.946,
+      "eval_wer": 0.6938207136640557,
+      "step": 2800
+    },
+    {
+      "epoch": 3.617552850736707,
+      "grad_norm": 2.5015342235565186,
+      "learning_rate": 0.00020461643835616438,
+      "loss": 0.6208,
+      "step": 2825
+    },
+    {
+      "epoch": 3.649583600256246,
+      "grad_norm": 1.5744799375534058,
+      "learning_rate": 0.0002035890410958904,
+      "loss": 0.7336,
+      "step": 2850
+    },
+    {
+      "epoch": 3.681614349775785,
+      "grad_norm": 1.980490803718567,
+      "learning_rate": 0.0002025616438356164,
+      "loss": 0.5763,
+      "step": 2875
+    },
+    {
+      "epoch": 3.7136450992953236,
+      "grad_norm": 1.333608627319336,
+      "learning_rate": 0.00020153424657534243,
+      "loss": 0.693,
+      "step": 2900
+    },
+    {
+      "epoch": 3.7456758488148623,
+      "grad_norm": 1.21135675907135,
+      "learning_rate": 0.00020050684931506845,
+      "loss": 0.6162,
+      "step": 2925
+    },
+    {
+      "epoch": 3.777706598334401,
+      "grad_norm": 1.436661958694458,
+      "learning_rate": 0.00019947945205479452,
+      "loss": 0.7126,
+      "step": 2950
+    },
+    {
+      "epoch": 3.8097373478539396,
+      "grad_norm": 1.2120234966278076,
+      "learning_rate": 0.00019845205479452054,
+      "loss": 0.6439,
+      "step": 2975
+    },
+    {
+      "epoch": 3.8417680973734787,
+      "grad_norm": 1.5366668701171875,
+      "learning_rate": 0.00019742465753424656,
+      "loss": 0.6959,
+      "step": 3000
+    },
+    {
+      "epoch": 3.8737988468930173,
+      "grad_norm": 1.171915888786316,
+      "learning_rate": 0.00019639726027397258,
+      "loss": 0.6143,
+      "step": 3025
+    },
+    {
+      "epoch": 3.905829596412556,
+      "grad_norm": 1.1893322467803955,
+      "learning_rate": 0.00019536986301369862,
+      "loss": 0.7201,
+      "step": 3050
+    },
+    {
+      "epoch": 3.9378603459320947,
+      "grad_norm": 2.19003963470459,
+      "learning_rate": 0.00019434246575342464,
+      "loss": 0.6301,
+      "step": 3075
+    },
+    {
+      "epoch": 3.9698910954516338,
+      "grad_norm": 1.0006098747253418,
+      "learning_rate": 0.00019331506849315066,
+      "loss": 0.6753,
+      "step": 3100
+    },
+    {
+      "epoch": 4.001281229980782,
+      "grad_norm": 2.3398594856262207,
+      "learning_rate": 0.00019228767123287668,
+      "loss": 0.6019,
+      "step": 3125
+    },
+    {
+      "epoch": 4.03331197950032,
+      "grad_norm": 1.7329877614974976,
+      "learning_rate": 0.0001912602739726027,
+      "loss": 0.5492,
+      "step": 3150
+    },
+    {
+      "epoch": 4.065342729019859,
+      "grad_norm": 1.0595425367355347,
+      "learning_rate": 0.00019023287671232877,
+      "loss": 0.5996,
+      "step": 3175
+    },
+    {
+      "epoch": 4.097373478539398,
+      "grad_norm": 1.6115587949752808,
+      "learning_rate": 0.0001892054794520548,
+      "loss": 0.5346,
+      "step": 3200
+    },
+    {
+      "epoch": 4.097373478539398,
+      "eval_loss": 0.7688583731651306,
+      "eval_runtime": 192.3934,
+      "eval_samples_per_second": 15.551,
+      "eval_steps_per_second": 1.944,
+      "eval_wer": 0.6896431679721496,
+      "step": 3200
+    },
+    {
+      "epoch": 4.129404228058936,
+      "grad_norm": 1.158002257347107,
+      "learning_rate": 0.0001881780821917808,
+      "loss": 0.569,
+      "step": 3225
+    },
+    {
+      "epoch": 4.161434977578475,
+      "grad_norm": 2.2581615447998047,
+      "learning_rate": 0.00018715068493150682,
+      "loss": 0.4958,
+      "step": 3250
+    },
+    {
+      "epoch": 4.1934657270980145,
+      "grad_norm": 1.4523509740829468,
+      "learning_rate": 0.00018612328767123287,
+      "loss": 0.6063,
+      "step": 3275
+    },
+    {
+      "epoch": 4.225496476617553,
+      "grad_norm": 1.2673031091690063,
+      "learning_rate": 0.0001850958904109589,
+      "loss": 0.4827,
+      "step": 3300
+    },
+    {
+      "epoch": 4.257527226137092,
+      "grad_norm": 2.416383981704712,
+      "learning_rate": 0.0001840684931506849,
+      "loss": 0.6466,
+      "step": 3325
+    },
+    {
+      "epoch": 4.28955797565663,
+      "grad_norm": 1.7506197690963745,
+      "learning_rate": 0.00018304109589041093,
+      "loss": 0.5158,
+      "step": 3350
+    },
+    {
+      "epoch": 4.321588725176169,
+      "grad_norm": 1.171522617340088,
+      "learning_rate": 0.00018201369863013697,
+      "loss": 0.6242,
+      "step": 3375
+    },
+    {
+      "epoch": 4.353619474695708,
+      "grad_norm": 0.8762041330337524,
+      "learning_rate": 0.00018098630136986302,
+      "loss": 0.5173,
+      "step": 3400
+    },
+    {
+      "epoch": 4.385650224215246,
+      "grad_norm": 1.327751636505127,
+      "learning_rate": 0.00017995890410958903,
+      "loss": 0.6182,
+      "step": 3425
+    },
+    {
+      "epoch": 4.417680973734785,
+      "grad_norm": 1.076515555381775,
+      "learning_rate": 0.00017893150684931505,
+      "loss": 0.5229,
+      "step": 3450
+    },
+    {
+      "epoch": 4.449711723254324,
+      "grad_norm": 1.5693820714950562,
+      "learning_rate": 0.00017790410958904107,
+      "loss": 0.5771,
+      "step": 3475
+    },
+    {
+      "epoch": 4.481742472773863,
+      "grad_norm": 1.3674280643463135,
+      "learning_rate": 0.0001768767123287671,
+      "loss": 0.5237,
+      "step": 3500
+    },
+    {
+      "epoch": 4.513773222293402,
+      "grad_norm": 2.232922315597534,
+      "learning_rate": 0.00017584931506849314,
+      "loss": 0.5916,
+      "step": 3525
+    },
+    {
+      "epoch": 4.545803971812941,
+      "grad_norm": 1.1831066608428955,
+      "learning_rate": 0.00017482191780821915,
+      "loss": 0.5244,
+      "step": 3550
+    },
+    {
+      "epoch": 4.577834721332479,
+      "grad_norm": 1.2374058961868286,
+      "learning_rate": 0.0001737945205479452,
+      "loss": 0.5743,
+      "step": 3575
+    },
+    {
+      "epoch": 4.609865470852018,
+      "grad_norm": 1.7176926136016846,
+      "learning_rate": 0.00017276712328767122,
+      "loss": 0.5107,
+      "step": 3600
+    },
+    {
+      "epoch": 4.609865470852018,
+      "eval_loss": 0.783900797367096,
+      "eval_runtime": 191.9719,
+      "eval_samples_per_second": 15.586,
+      "eval_steps_per_second": 1.948,
+      "eval_wer": 0.678503046127067,
+      "step": 3600
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 7800,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 400,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.4784767862106845e+19,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-3600/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f00530c3459cf16b6bd514450f69227d5903a814b8357f77a0b5d0080723e59d
+size 5304

checkpoint-4000/config.json ADDED Viewed

	@@ -0,0 +1,116 @@

+{
+  "_name_or_path": "jonatasgrosman/wav2vec2-large-xlsr-53-arabic",
+  "activation_dropout": 0.05,
+  "adapter_attn_dim": null,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForCTC"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 256,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": true,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.0,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_channel_length": 10,
+  "mask_channel_min_space": 1,
+  "mask_channel_other": 0.0,
+  "mask_channel_prob": 0.0,
+  "mask_channel_selection": "static",
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_min_space": 1,
+  "mask_time_other": 0.0,
+  "mask_time_prob": 0.05,
+  "mask_time_selection": "static",
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 24,
+  "num_negatives": 100,
+  "output_hidden_size": 1024,
+  "pad_token_id": 55,
+  "proj_codevector_dim": 256,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "use_weighted_layer_sum": false,
+  "vocab_size": 58,
+  "xvector_output_dim": 512
+}

checkpoint-4000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8cfc8debbea0680751a4c88ef792bc391c674c9d3896130f1f6dc345947463fb
+size 1262045280

checkpoint-4000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:70f5d27d817401408cb6e2b544c31d3cc00780f464363566c8cbb1a222b03c1f
+size 2490635318

checkpoint-4000/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}

checkpoint-4000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cff8fce8d641585566b50c7928d325eeab081c421015cd5034dd58cd9c9818e6
+size 14244

checkpoint-4000/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b88d751b9e3f0246fc279325a672185d15e3efdb16f5db937f547385d1a6aa7c
+size 988

checkpoint-4000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ecc5d31122ddd1ef2ad27fc0f44878b7c9a9d31a70fbd1d1e18b8669bac51aec
+size 1064

checkpoint-4000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1243 @@

+{
+  "best_metric": 0.678503046127067,
+  "best_model_checkpoint": "./wav2vec2-large-xlsr-ar/checkpoint-3600",
+  "epoch": 5.121716848174247,
+  "eval_steps": 400,
+  "global_step": 4000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.032030749519538756,
+      "grad_norm": 40.31281661987305,
+      "learning_rate": 1.3799999999999998e-05,
+      "loss": 25.5997,
+      "step": 25
+    },
+    {
+      "epoch": 0.06406149903907751,
+      "grad_norm": 70.23408508300781,
+      "learning_rate": 2.7599999999999997e-05,
+      "loss": 30.9717,
+      "step": 50
+    },
+    {
+      "epoch": 0.09609224855861627,
+      "grad_norm": 3.012640953063965,
+      "learning_rate": 4.259999999999999e-05,
+      "loss": 9.1896,
+      "step": 75
+    },
+    {
+      "epoch": 0.12812299807815503,
+      "grad_norm": 30.363168716430664,
+      "learning_rate": 5.76e-05,
+      "loss": 12.5434,
+      "step": 100
+    },
+    {
+      "epoch": 0.1601537475976938,
+      "grad_norm": 13.795882225036621,
+      "learning_rate": 7.259999999999999e-05,
+      "loss": 5.814,
+      "step": 125
+    },
+    {
+      "epoch": 0.19218449711723254,
+      "grad_norm": 99.32404327392578,
+      "learning_rate": 8.759999999999999e-05,
+      "loss": 9.7835,
+      "step": 150
+    },
+    {
+      "epoch": 0.2242152466367713,
+      "grad_norm": 4.108926773071289,
+      "learning_rate": 0.0001026,
+      "loss": 5.1191,
+      "step": 175
+    },
+    {
+      "epoch": 0.25624599615631005,
+      "grad_norm": 7.492372512817383,
+      "learning_rate": 0.0001176,
+      "loss": 3.7315,
+      "step": 200
+    },
+    {
+      "epoch": 0.2882767456758488,
+      "grad_norm": 7.183516502380371,
+      "learning_rate": 0.0001326,
+      "loss": 3.6219,
+      "step": 225
+    },
+    {
+      "epoch": 0.3203074951953876,
+      "grad_norm": 2.490111827850342,
+      "learning_rate": 0.00014759999999999998,
+      "loss": 3.6824,
+      "step": 250
+    },
+    {
+      "epoch": 0.3523382447149263,
+      "grad_norm": 0.5032612681388855,
+      "learning_rate": 0.0001626,
+      "loss": 3.5972,
+      "step": 275
+    },
+    {
+      "epoch": 0.3843689942344651,
+      "grad_norm": 3.7791531085968018,
+      "learning_rate": 0.00017759999999999998,
+      "loss": 3.6051,
+      "step": 300
+    },
+    {
+      "epoch": 0.41639974375400385,
+      "grad_norm": 2.6355323791503906,
+      "learning_rate": 0.0001926,
+      "loss": 3.5084,
+      "step": 325
+    },
+    {
+      "epoch": 0.4484304932735426,
+      "grad_norm": 1.8424248695373535,
+      "learning_rate": 0.00020759999999999998,
+      "loss": 3.6254,
+      "step": 350
+    },
+    {
+      "epoch": 0.48046124279308133,
+      "grad_norm": 2.8042099475860596,
+      "learning_rate": 0.0002226,
+      "loss": 3.5539,
+      "step": 375
+    },
+    {
+      "epoch": 0.5124919923126201,
+      "grad_norm": 1.1292295455932617,
+      "learning_rate": 0.0002376,
+      "loss": 3.5401,
+      "step": 400
+    },
+    {
+      "epoch": 0.5124919923126201,
+      "eval_loss": 3.4790358543395996,
+      "eval_runtime": 190.1673,
+      "eval_samples_per_second": 15.734,
+      "eval_steps_per_second": 1.967,
+      "eval_wer": 1.0,
+      "step": 400
+    },
+    {
+      "epoch": 0.5445227418321589,
+      "grad_norm": 5.499414443969727,
+      "learning_rate": 0.00025259999999999996,
+      "loss": 3.4746,
+      "step": 425
+    },
+    {
+      "epoch": 0.5765534913516976,
+      "grad_norm": 3.2174575328826904,
+      "learning_rate": 0.0002676,
+      "loss": 3.5553,
+      "step": 450
+    },
+    {
+      "epoch": 0.6085842408712364,
+      "grad_norm": 0.8652946352958679,
+      "learning_rate": 0.0002826,
+      "loss": 3.4252,
+      "step": 475
+    },
+    {
+      "epoch": 0.6406149903907752,
+      "grad_norm": 2.645606517791748,
+      "learning_rate": 0.00029759999999999997,
+      "loss": 3.5508,
+      "step": 500
+    },
+    {
+      "epoch": 0.672645739910314,
+      "grad_norm": 0.9275538921356201,
+      "learning_rate": 0.00029913698630136987,
+      "loss": 3.4857,
+      "step": 525
+    },
+    {
+      "epoch": 0.7046764894298526,
+      "grad_norm": 1.0166261196136475,
+      "learning_rate": 0.00029810958904109586,
+      "loss": 3.5138,
+      "step": 550
+    },
+    {
+      "epoch": 0.7367072389493914,
+      "grad_norm": 4.685708045959473,
+      "learning_rate": 0.0002970821917808219,
+      "loss": 3.4017,
+      "step": 575
+    },
+    {
+      "epoch": 0.7687379884689302,
+      "grad_norm": 1.1367671489715576,
+      "learning_rate": 0.0002960547945205479,
+      "loss": 3.4927,
+      "step": 600
+    },
+    {
+      "epoch": 0.8007687379884689,
+      "grad_norm": 2.971071481704712,
+      "learning_rate": 0.00029502739726027395,
+      "loss": 3.4265,
+      "step": 625
+    },
+    {
+      "epoch": 0.8327994875080077,
+      "grad_norm": 2.657762289047241,
+      "learning_rate": 0.000294,
+      "loss": 3.4645,
+      "step": 650
+    },
+    {
+      "epoch": 0.8648302370275465,
+      "grad_norm": 2.87245774269104,
+      "learning_rate": 0.000292972602739726,
+      "loss": 3.3777,
+      "step": 675
+    },
+    {
+      "epoch": 0.8968609865470852,
+      "grad_norm": 1.2535868883132935,
+      "learning_rate": 0.00029194520547945203,
+      "loss": 3.4021,
+      "step": 700
+    },
+    {
+      "epoch": 0.928891736066624,
+      "grad_norm": 0.7670681476593018,
+      "learning_rate": 0.00029091780821917807,
+      "loss": 3.3667,
+      "step": 725
+    },
+    {
+      "epoch": 0.9609224855861627,
+      "grad_norm": 1.4213225841522217,
+      "learning_rate": 0.0002898904109589041,
+      "loss": 3.3387,
+      "step": 750
+    },
+    {
+      "epoch": 0.9929532351057014,
+      "grad_norm": 1.161726951599121,
+      "learning_rate": 0.0002888630136986301,
+      "loss": 2.7481,
+      "step": 775
+    },
+    {
+      "epoch": 1.0243433696348494,
+      "grad_norm": 2.047264337539673,
+      "learning_rate": 0.00028783561643835616,
+      "loss": 2.002,
+      "step": 800
+    },
+    {
+      "epoch": 1.0243433696348494,
+      "eval_loss": 1.6117621660232544,
+      "eval_runtime": 190.6575,
+      "eval_samples_per_second": 15.693,
+      "eval_steps_per_second": 1.962,
+      "eval_wer": 0.9870322019147084,
+      "step": 800
+    },
+    {
+      "epoch": 1.0563741191543883,
+      "grad_norm": 3.6471316814422607,
+      "learning_rate": 0.00028680821917808215,
+      "loss": 1.7004,
+      "step": 825
+    },
+    {
+      "epoch": 1.088404868673927,
+      "grad_norm": 1.8413678407669067,
+      "learning_rate": 0.0002857808219178082,
+      "loss": 1.4543,
+      "step": 850
+    },
+    {
+      "epoch": 1.1204356181934658,
+      "grad_norm": 2.3082125186920166,
+      "learning_rate": 0.00028475342465753424,
+      "loss": 1.3853,
+      "step": 875
+    },
+    {
+      "epoch": 1.1524663677130045,
+      "grad_norm": 1.7227452993392944,
+      "learning_rate": 0.00028372602739726023,
+      "loss": 1.336,
+      "step": 900
+    },
+    {
+      "epoch": 1.1844971172325431,
+      "grad_norm": 2.1250977516174316,
+      "learning_rate": 0.0002826986301369863,
+      "loss": 1.349,
+      "step": 925
+    },
+    {
+      "epoch": 1.216527866752082,
+      "grad_norm": 1.6314936876296997,
+      "learning_rate": 0.0002816712328767123,
+      "loss": 1.2656,
+      "step": 950
+    },
+    {
+      "epoch": 1.2485586162716207,
+      "grad_norm": 2.001681089401245,
+      "learning_rate": 0.00028064383561643837,
+      "loss": 1.2556,
+      "step": 975
+    },
+    {
+      "epoch": 1.2805893657911596,
+      "grad_norm": 2.1396918296813965,
+      "learning_rate": 0.00027961643835616436,
+      "loss": 1.1932,
+      "step": 1000
+    },
+    {
+      "epoch": 1.3126201153106982,
+      "grad_norm": 1.9855870008468628,
+      "learning_rate": 0.0002785890410958904,
+      "loss": 1.1991,
+      "step": 1025
+    },
+    {
+      "epoch": 1.344650864830237,
+      "grad_norm": 1.1789072751998901,
+      "learning_rate": 0.0002775616438356164,
+      "loss": 1.1041,
+      "step": 1050
+    },
+    {
+      "epoch": 1.3766816143497758,
+      "grad_norm": 2.304903507232666,
+      "learning_rate": 0.00027653424657534244,
+      "loss": 1.1538,
+      "step": 1075
+    },
+    {
+      "epoch": 1.4087123638693146,
+      "grad_norm": 2.459096670150757,
+      "learning_rate": 0.00027550684931506843,
+      "loss": 1.2379,
+      "step": 1100
+    },
+    {
+      "epoch": 1.4407431133888533,
+      "grad_norm": 1.886155605316162,
+      "learning_rate": 0.00027447945205479453,
+      "loss": 1.1326,
+      "step": 1125
+    },
+    {
+      "epoch": 1.472773862908392,
+      "grad_norm": 1.1992785930633545,
+      "learning_rate": 0.0002734520547945205,
+      "loss": 1.0691,
+      "step": 1150
+    },
+    {
+      "epoch": 1.5048046124279308,
+      "grad_norm": 3.249142646789551,
+      "learning_rate": 0.00027242465753424657,
+      "loss": 1.0511,
+      "step": 1175
+    },
+    {
+      "epoch": 1.5368353619474697,
+      "grad_norm": 3.3806302547454834,
+      "learning_rate": 0.00027139726027397256,
+      "loss": 1.0618,
+      "step": 1200
+    },
+    {
+      "epoch": 1.5368353619474697,
+      "eval_loss": 1.0360716581344604,
+      "eval_runtime": 191.2082,
+      "eval_samples_per_second": 15.648,
+      "eval_steps_per_second": 1.956,
+      "eval_wer": 0.8269799825935596,
+      "step": 1200
+    },
+    {
+      "epoch": 1.5688661114670084,
+      "grad_norm": 2.508125066757202,
+      "learning_rate": 0.0002703698630136986,
+      "loss": 1.0652,
+      "step": 1225
+    },
+    {
+      "epoch": 1.600896860986547,
+      "grad_norm": 1.3717399835586548,
+      "learning_rate": 0.00026934246575342465,
+      "loss": 1.0354,
+      "step": 1250
+    },
+    {
+      "epoch": 1.6329276105060857,
+      "grad_norm": 1.5011590719223022,
+      "learning_rate": 0.00026831506849315064,
+      "loss": 1.0427,
+      "step": 1275
+    },
+    {
+      "epoch": 1.6649583600256246,
+      "grad_norm": 1.6448092460632324,
+      "learning_rate": 0.0002672876712328767,
+      "loss": 1.0498,
+      "step": 1300
+    },
+    {
+      "epoch": 1.6969891095451635,
+      "grad_norm": 1.4456716775894165,
+      "learning_rate": 0.0002662602739726027,
+      "loss": 1.0481,
+      "step": 1325
+    },
+    {
+      "epoch": 1.7290198590647021,
+      "grad_norm": 1.2869809865951538,
+      "learning_rate": 0.0002652328767123288,
+      "loss": 1.0271,
+      "step": 1350
+    },
+    {
+      "epoch": 1.7610506085842408,
+      "grad_norm": 4.315392971038818,
+      "learning_rate": 0.00026420547945205477,
+      "loss": 0.978,
+      "step": 1375
+    },
+    {
+      "epoch": 1.7930813581037797,
+      "grad_norm": 1.3269984722137451,
+      "learning_rate": 0.0002631780821917808,
+      "loss": 0.9891,
+      "step": 1400
+    },
+    {
+      "epoch": 1.8251121076233185,
+      "grad_norm": 1.6529700756072998,
+      "learning_rate": 0.0002621506849315068,
+      "loss": 0.9917,
+      "step": 1425
+    },
+    {
+      "epoch": 1.8571428571428572,
+      "grad_norm": 2.1668319702148438,
+      "learning_rate": 0.00026112328767123285,
+      "loss": 0.9745,
+      "step": 1450
+    },
+    {
+      "epoch": 1.8891736066623959,
+      "grad_norm": 6.553292751312256,
+      "learning_rate": 0.0002600958904109589,
+      "loss": 0.9898,
+      "step": 1475
+    },
+    {
+      "epoch": 1.9212043561819345,
+      "grad_norm": 1.2242108583450317,
+      "learning_rate": 0.0002590684931506849,
+      "loss": 0.9363,
+      "step": 1500
+    },
+    {
+      "epoch": 1.9532351057014734,
+      "grad_norm": 2.026926040649414,
+      "learning_rate": 0.00025804109589041093,
+      "loss": 0.9731,
+      "step": 1525
+    },
+    {
+      "epoch": 1.9852658552210123,
+      "grad_norm": 1.560719609260559,
+      "learning_rate": 0.000257013698630137,
+      "loss": 0.8685,
+      "step": 1550
+    },
+    {
+      "epoch": 2.01665598975016,
+      "grad_norm": 1.0007785558700562,
+      "learning_rate": 0.000255986301369863,
+      "loss": 0.8586,
+      "step": 1575
+    },
+    {
+      "epoch": 2.048686739269699,
+      "grad_norm": 1.0924744606018066,
+      "learning_rate": 0.000254958904109589,
+      "loss": 0.8025,
+      "step": 1600
+    },
+    {
+      "epoch": 2.048686739269699,
+      "eval_loss": 0.8233081102371216,
+      "eval_runtime": 191.163,
+      "eval_samples_per_second": 15.652,
+      "eval_steps_per_second": 1.956,
+      "eval_wer": 0.751348999129678,
+      "step": 1600
+    },
+    {
+      "epoch": 2.0807174887892375,
+      "grad_norm": 1.9123071432113647,
+      "learning_rate": 0.00025393150684931506,
+      "loss": 0.8919,
+      "step": 1625
+    },
+    {
+      "epoch": 2.1127482383087766,
+      "grad_norm": 1.7388331890106201,
+      "learning_rate": 0.00025290410958904105,
+      "loss": 0.8667,
+      "step": 1650
+    },
+    {
+      "epoch": 2.144778987828315,
+      "grad_norm": 1.247045874595642,
+      "learning_rate": 0.0002518767123287671,
+      "loss": 0.8716,
+      "step": 1675
+    },
+    {
+      "epoch": 2.176809737347854,
+      "grad_norm": 2.1057279109954834,
+      "learning_rate": 0.00025084931506849314,
+      "loss": 0.7654,
+      "step": 1700
+    },
+    {
+      "epoch": 2.2088404868673925,
+      "grad_norm": 1.0074440240859985,
+      "learning_rate": 0.00024982191780821913,
+      "loss": 0.8732,
+      "step": 1725
+    },
+    {
+      "epoch": 2.2408712363869316,
+      "grad_norm": 2.159853219985962,
+      "learning_rate": 0.0002487945205479452,
+      "loss": 0.7941,
+      "step": 1750
+    },
+    {
+      "epoch": 2.2729019859064703,
+      "grad_norm": 1.8050284385681152,
+      "learning_rate": 0.0002477671232876712,
+      "loss": 0.8496,
+      "step": 1775
+    },
+    {
+      "epoch": 2.304932735426009,
+      "grad_norm": 1.3987536430358887,
+      "learning_rate": 0.00024673972602739727,
+      "loss": 0.8332,
+      "step": 1800
+    },
+    {
+      "epoch": 2.3369634849455476,
+      "grad_norm": 2.5281410217285156,
+      "learning_rate": 0.00024571232876712326,
+      "loss": 0.8135,
+      "step": 1825
+    },
+    {
+      "epoch": 2.3689942344650863,
+      "grad_norm": 1.914908766746521,
+      "learning_rate": 0.0002446849315068493,
+      "loss": 0.8067,
+      "step": 1850
+    },
+    {
+      "epoch": 2.4010249839846254,
+      "grad_norm": 1.1049237251281738,
+      "learning_rate": 0.00024365753424657533,
+      "loss": 0.8027,
+      "step": 1875
+    },
+    {
+      "epoch": 2.433055733504164,
+      "grad_norm": 2.2418999671936035,
+      "learning_rate": 0.00024263013698630134,
+      "loss": 0.7857,
+      "step": 1900
+    },
+    {
+      "epoch": 2.4650864830237027,
+      "grad_norm": 1.4093470573425293,
+      "learning_rate": 0.00024160273972602736,
+      "loss": 0.8265,
+      "step": 1925
+    },
+    {
+      "epoch": 2.4971172325432414,
+      "grad_norm": 1.396606206893921,
+      "learning_rate": 0.0002405753424657534,
+      "loss": 0.7601,
+      "step": 1950
+    },
+    {
+      "epoch": 2.5291479820627805,
+      "grad_norm": 1.2854044437408447,
+      "learning_rate": 0.00023954794520547945,
+      "loss": 0.8408,
+      "step": 1975
+    },
+    {
+      "epoch": 2.561178731582319,
+      "grad_norm": 2.2053070068359375,
+      "learning_rate": 0.00023852054794520547,
+      "loss": 0.7199,
+      "step": 2000
+    },
+    {
+      "epoch": 2.561178731582319,
+      "eval_loss": 0.7817878127098083,
+      "eval_runtime": 192.0652,
+      "eval_samples_per_second": 15.578,
+      "eval_steps_per_second": 1.947,
+      "eval_wer": 0.7203655352480418,
+      "step": 2000
+    },
+    {
+      "epoch": 2.593209481101858,
+      "grad_norm": 1.6103401184082031,
+      "learning_rate": 0.0002374931506849315,
+      "loss": 0.7848,
+      "step": 2025
+    },
+    {
+      "epoch": 2.6252402306213964,
+      "grad_norm": 3.00805401802063,
+      "learning_rate": 0.0002364657534246575,
+      "loss": 0.7485,
+      "step": 2050
+    },
+    {
+      "epoch": 2.657270980140935,
+      "grad_norm": 1.0826023817062378,
+      "learning_rate": 0.00023543835616438353,
+      "loss": 0.8131,
+      "step": 2075
+    },
+    {
+      "epoch": 2.689301729660474,
+      "grad_norm": 2.3294951915740967,
+      "learning_rate": 0.00023441095890410955,
+      "loss": 0.7616,
+      "step": 2100
+    },
+    {
+      "epoch": 2.721332479180013,
+      "grad_norm": 1.232429027557373,
+      "learning_rate": 0.0002333835616438356,
+      "loss": 0.7925,
+      "step": 2125
+    },
+    {
+      "epoch": 2.7533632286995515,
+      "grad_norm": 1.8985693454742432,
+      "learning_rate": 0.00023235616438356164,
+      "loss": 0.7829,
+      "step": 2150
+    },
+    {
+      "epoch": 2.78539397821909,
+      "grad_norm": 1.1546630859375,
+      "learning_rate": 0.00023132876712328765,
+      "loss": 0.8053,
+      "step": 2175
+    },
+    {
+      "epoch": 2.8174247277386293,
+      "grad_norm": 1.2817527055740356,
+      "learning_rate": 0.00023030136986301367,
+      "loss": 0.7452,
+      "step": 2200
+    },
+    {
+      "epoch": 2.849455477258168,
+      "grad_norm": 1.7231945991516113,
+      "learning_rate": 0.00022927397260273972,
+      "loss": 0.7817,
+      "step": 2225
+    },
+    {
+      "epoch": 2.8814862267777066,
+      "grad_norm": 2.686530113220215,
+      "learning_rate": 0.00022824657534246574,
+      "loss": 0.7506,
+      "step": 2250
+    },
+    {
+      "epoch": 2.9135169762972453,
+      "grad_norm": 1.3286162614822388,
+      "learning_rate": 0.00022721917808219176,
+      "loss": 0.7947,
+      "step": 2275
+    },
+    {
+      "epoch": 2.945547725816784,
+      "grad_norm": 1.3969508409500122,
+      "learning_rate": 0.00022619178082191777,
+      "loss": 0.7174,
+      "step": 2300
+    },
+    {
+      "epoch": 2.977578475336323,
+      "grad_norm": 1.835070013999939,
+      "learning_rate": 0.0002251643835616438,
+      "loss": 0.7531,
+      "step": 2325
+    },
+    {
+      "epoch": 3.008968609865471,
+      "grad_norm": 2.8138489723205566,
+      "learning_rate": 0.00022413698630136986,
+      "loss": 0.7638,
+      "step": 2350
+    },
+    {
+      "epoch": 3.0409993593850095,
+      "grad_norm": 1.0872498750686646,
+      "learning_rate": 0.00022310958904109588,
+      "loss": 0.6102,
+      "step": 2375
+    },
+    {
+      "epoch": 3.073030108904548,
+      "grad_norm": 2.0995171070098877,
+      "learning_rate": 0.0002220821917808219,
+      "loss": 0.675,
+      "step": 2400
+    },
+    {
+      "epoch": 3.073030108904548,
+      "eval_loss": 0.8002874255180359,
+      "eval_runtime": 191.1551,
+      "eval_samples_per_second": 15.652,
+      "eval_steps_per_second": 1.957,
+      "eval_wer": 0.7121845082680592,
+      "step": 2400
+    },
+    {
+      "epoch": 3.1050608584240873,
+      "grad_norm": 1.8755576610565186,
+      "learning_rate": 0.00022105479452054792,
+      "loss": 0.5922,
+      "step": 2425
+    },
+    {
+      "epoch": 3.137091607943626,
+      "grad_norm": 0.8880970478057861,
+      "learning_rate": 0.00022002739726027397,
+      "loss": 0.6929,
+      "step": 2450
+    },
+    {
+      "epoch": 3.1691223574631646,
+      "grad_norm": 2.357203722000122,
+      "learning_rate": 0.00021899999999999998,
+      "loss": 0.5997,
+      "step": 2475
+    },
+    {
+      "epoch": 3.2011531069827033,
+      "grad_norm": 0.9104003310203552,
+      "learning_rate": 0.000217972602739726,
+      "loss": 0.728,
+      "step": 2500
+    },
+    {
+      "epoch": 3.233183856502242,
+      "grad_norm": 1.5452452898025513,
+      "learning_rate": 0.00021694520547945202,
+      "loss": 0.5799,
+      "step": 2525
+    },
+    {
+      "epoch": 3.265214606021781,
+      "grad_norm": 0.8508313894271851,
+      "learning_rate": 0.00021591780821917807,
+      "loss": 0.6859,
+      "step": 2550
+    },
+    {
+      "epoch": 3.2972453555413197,
+      "grad_norm": 1.8509936332702637,
+      "learning_rate": 0.0002148904109589041,
+      "loss": 0.6178,
+      "step": 2575
+    },
+    {
+      "epoch": 3.3292761050608584,
+      "grad_norm": 1.355774998664856,
+      "learning_rate": 0.00021386301369863013,
+      "loss": 0.6908,
+      "step": 2600
+    },
+    {
+      "epoch": 3.361306854580397,
+      "grad_norm": 1.322189211845398,
+      "learning_rate": 0.00021283561643835615,
+      "loss": 0.6134,
+      "step": 2625
+    },
+    {
+      "epoch": 3.393337604099936,
+      "grad_norm": 1.606176495552063,
+      "learning_rate": 0.00021180821917808217,
+      "loss": 0.6974,
+      "step": 2650
+    },
+    {
+      "epoch": 3.425368353619475,
+      "grad_norm": 1.7283929586410522,
+      "learning_rate": 0.00021078082191780818,
+      "loss": 0.6069,
+      "step": 2675
+    },
+    {
+      "epoch": 3.4573991031390134,
+      "grad_norm": 1.2916626930236816,
+      "learning_rate": 0.00020975342465753423,
+      "loss": 0.7165,
+      "step": 2700
+    },
+    {
+      "epoch": 3.489429852658552,
+      "grad_norm": 1.8512370586395264,
+      "learning_rate": 0.00020872602739726025,
+      "loss": 0.6287,
+      "step": 2725
+    },
+    {
+      "epoch": 3.5214606021780908,
+      "grad_norm": 3.614025354385376,
+      "learning_rate": 0.0002076986301369863,
+      "loss": 0.6982,
+      "step": 2750
+    },
+    {
+      "epoch": 3.55349135169763,
+      "grad_norm": 1.3554790019989014,
+      "learning_rate": 0.0002066712328767123,
+      "loss": 0.604,
+      "step": 2775
+    },
+    {
+      "epoch": 3.5855221012171685,
+      "grad_norm": 1.638237476348877,
+      "learning_rate": 0.00020564383561643836,
+      "loss": 0.7113,
+      "step": 2800
+    },
+    {
+      "epoch": 3.5855221012171685,
+      "eval_loss": 0.7320713996887207,
+      "eval_runtime": 192.2342,
+      "eval_samples_per_second": 15.564,
+      "eval_steps_per_second": 1.946,
+      "eval_wer": 0.6938207136640557,
+      "step": 2800
+    },
+    {
+      "epoch": 3.617552850736707,
+      "grad_norm": 2.5015342235565186,
+      "learning_rate": 0.00020461643835616438,
+      "loss": 0.6208,
+      "step": 2825
+    },
+    {
+      "epoch": 3.649583600256246,
+      "grad_norm": 1.5744799375534058,
+      "learning_rate": 0.0002035890410958904,
+      "loss": 0.7336,
+      "step": 2850
+    },
+    {
+      "epoch": 3.681614349775785,
+      "grad_norm": 1.980490803718567,
+      "learning_rate": 0.0002025616438356164,
+      "loss": 0.5763,
+      "step": 2875
+    },
+    {
+      "epoch": 3.7136450992953236,
+      "grad_norm": 1.333608627319336,
+      "learning_rate": 0.00020153424657534243,
+      "loss": 0.693,
+      "step": 2900
+    },
+    {
+      "epoch": 3.7456758488148623,
+      "grad_norm": 1.21135675907135,
+      "learning_rate": 0.00020050684931506845,
+      "loss": 0.6162,
+      "step": 2925
+    },
+    {
+      "epoch": 3.777706598334401,
+      "grad_norm": 1.436661958694458,
+      "learning_rate": 0.00019947945205479452,
+      "loss": 0.7126,
+      "step": 2950
+    },
+    {
+      "epoch": 3.8097373478539396,
+      "grad_norm": 1.2120234966278076,
+      "learning_rate": 0.00019845205479452054,
+      "loss": 0.6439,
+      "step": 2975
+    },
+    {
+      "epoch": 3.8417680973734787,
+      "grad_norm": 1.5366668701171875,
+      "learning_rate": 0.00019742465753424656,
+      "loss": 0.6959,
+      "step": 3000
+    },
+    {
+      "epoch": 3.8737988468930173,
+      "grad_norm": 1.171915888786316,
+      "learning_rate": 0.00019639726027397258,
+      "loss": 0.6143,
+      "step": 3025
+    },
+    {
+      "epoch": 3.905829596412556,
+      "grad_norm": 1.1893322467803955,
+      "learning_rate": 0.00019536986301369862,
+      "loss": 0.7201,
+      "step": 3050
+    },
+    {
+      "epoch": 3.9378603459320947,
+      "grad_norm": 2.19003963470459,
+      "learning_rate": 0.00019434246575342464,
+      "loss": 0.6301,
+      "step": 3075
+    },
+    {
+      "epoch": 3.9698910954516338,
+      "grad_norm": 1.0006098747253418,
+      "learning_rate": 0.00019331506849315066,
+      "loss": 0.6753,
+      "step": 3100
+    },
+    {
+      "epoch": 4.001281229980782,
+      "grad_norm": 2.3398594856262207,
+      "learning_rate": 0.00019228767123287668,
+      "loss": 0.6019,
+      "step": 3125
+    },
+    {
+      "epoch": 4.03331197950032,
+      "grad_norm": 1.7329877614974976,
+      "learning_rate": 0.0001912602739726027,
+      "loss": 0.5492,
+      "step": 3150
+    },
+    {
+      "epoch": 4.065342729019859,
+      "grad_norm": 1.0595425367355347,
+      "learning_rate": 0.00019023287671232877,
+      "loss": 0.5996,
+      "step": 3175
+    },
+    {
+      "epoch": 4.097373478539398,
+      "grad_norm": 1.6115587949752808,
+      "learning_rate": 0.0001892054794520548,
+      "loss": 0.5346,
+      "step": 3200
+    },
+    {
+      "epoch": 4.097373478539398,
+      "eval_loss": 0.7688583731651306,
+      "eval_runtime": 192.3934,
+      "eval_samples_per_second": 15.551,
+      "eval_steps_per_second": 1.944,
+      "eval_wer": 0.6896431679721496,
+      "step": 3200
+    },
+    {
+      "epoch": 4.129404228058936,
+      "grad_norm": 1.158002257347107,
+      "learning_rate": 0.0001881780821917808,
+      "loss": 0.569,
+      "step": 3225
+    },
+    {
+      "epoch": 4.161434977578475,
+      "grad_norm": 2.2581615447998047,
+      "learning_rate": 0.00018715068493150682,
+      "loss": 0.4958,
+      "step": 3250
+    },
+    {
+      "epoch": 4.1934657270980145,
+      "grad_norm": 1.4523509740829468,
+      "learning_rate": 0.00018612328767123287,
+      "loss": 0.6063,
+      "step": 3275
+    },
+    {
+      "epoch": 4.225496476617553,
+      "grad_norm": 1.2673031091690063,
+      "learning_rate": 0.0001850958904109589,
+      "loss": 0.4827,
+      "step": 3300
+    },
+    {
+      "epoch": 4.257527226137092,
+      "grad_norm": 2.416383981704712,
+      "learning_rate": 0.0001840684931506849,
+      "loss": 0.6466,
+      "step": 3325
+    },
+    {
+      "epoch": 4.28955797565663,
+      "grad_norm": 1.7506197690963745,
+      "learning_rate": 0.00018304109589041093,
+      "loss": 0.5158,
+      "step": 3350
+    },
+    {
+      "epoch": 4.321588725176169,
+      "grad_norm": 1.171522617340088,
+      "learning_rate": 0.00018201369863013697,
+      "loss": 0.6242,
+      "step": 3375
+    },
+    {
+      "epoch": 4.353619474695708,
+      "grad_norm": 0.8762041330337524,
+      "learning_rate": 0.00018098630136986302,
+      "loss": 0.5173,
+      "step": 3400
+    },
+    {
+      "epoch": 4.385650224215246,
+      "grad_norm": 1.327751636505127,
+      "learning_rate": 0.00017995890410958903,
+      "loss": 0.6182,
+      "step": 3425
+    },
+    {
+      "epoch": 4.417680973734785,
+      "grad_norm": 1.076515555381775,
+      "learning_rate": 0.00017893150684931505,
+      "loss": 0.5229,
+      "step": 3450
+    },
+    {
+      "epoch": 4.449711723254324,
+      "grad_norm": 1.5693820714950562,
+      "learning_rate": 0.00017790410958904107,
+      "loss": 0.5771,
+      "step": 3475
+    },
+    {
+      "epoch": 4.481742472773863,
+      "grad_norm": 1.3674280643463135,
+      "learning_rate": 0.0001768767123287671,
+      "loss": 0.5237,
+      "step": 3500
+    },
+    {
+      "epoch": 4.513773222293402,
+      "grad_norm": 2.232922315597534,
+      "learning_rate": 0.00017584931506849314,
+      "loss": 0.5916,
+      "step": 3525
+    },
+    {
+      "epoch": 4.545803971812941,
+      "grad_norm": 1.1831066608428955,
+      "learning_rate": 0.00017482191780821915,
+      "loss": 0.5244,
+      "step": 3550
+    },
+    {
+      "epoch": 4.577834721332479,
+      "grad_norm": 1.2374058961868286,
+      "learning_rate": 0.0001737945205479452,
+      "loss": 0.5743,
+      "step": 3575
+    },
+    {
+      "epoch": 4.609865470852018,
+      "grad_norm": 1.7176926136016846,
+      "learning_rate": 0.00017276712328767122,
+      "loss": 0.5107,
+      "step": 3600
+    },
+    {
+      "epoch": 4.609865470852018,
+      "eval_loss": 0.783900797367096,
+      "eval_runtime": 191.9719,
+      "eval_samples_per_second": 15.586,
+      "eval_steps_per_second": 1.948,
+      "eval_wer": 0.678503046127067,
+      "step": 3600
+    },
+    {
+      "epoch": 4.641896220371557,
+      "grad_norm": 1.3690687417984009,
+      "learning_rate": 0.00017173972602739726,
+      "loss": 0.5845,
+      "step": 3625
+    },
+    {
+      "epoch": 4.673926969891095,
+      "grad_norm": 1.3373069763183594,
+      "learning_rate": 0.00017071232876712328,
+      "loss": 0.5001,
+      "step": 3650
+    },
+    {
+      "epoch": 4.705957719410634,
+      "grad_norm": 1.5431394577026367,
+      "learning_rate": 0.0001696849315068493,
+      "loss": 0.5728,
+      "step": 3675
+    },
+    {
+      "epoch": 4.737988468930173,
+      "grad_norm": 1.4463599920272827,
+      "learning_rate": 0.00016865753424657532,
+      "loss": 0.4824,
+      "step": 3700
+    },
+    {
+      "epoch": 4.770019218449712,
+      "grad_norm": 1.7865560054779053,
+      "learning_rate": 0.00016763013698630134,
+      "loss": 0.5931,
+      "step": 3725
+    },
+    {
+      "epoch": 4.802049967969251,
+      "grad_norm": 1.0858443975448608,
+      "learning_rate": 0.00016660273972602736,
+      "loss": 0.5395,
+      "step": 3750
+    },
+    {
+      "epoch": 4.834080717488789,
+      "grad_norm": 1.9803308248519897,
+      "learning_rate": 0.00016557534246575343,
+      "loss": 0.5776,
+      "step": 3775
+    },
+    {
+      "epoch": 4.866111467008328,
+      "grad_norm": 3.4345428943634033,
+      "learning_rate": 0.00016454794520547945,
+      "loss": 0.4938,
+      "step": 3800
+    },
+    {
+      "epoch": 4.898142216527867,
+      "grad_norm": 0.9586917757987976,
+      "learning_rate": 0.00016352054794520546,
+      "loss": 0.5707,
+      "step": 3825
+    },
+    {
+      "epoch": 4.930172966047405,
+      "grad_norm": 1.524976372718811,
+      "learning_rate": 0.00016249315068493148,
+      "loss": 0.5169,
+      "step": 3850
+    },
+    {
+      "epoch": 4.962203715566944,
+      "grad_norm": 1.1566359996795654,
+      "learning_rate": 0.00016146575342465753,
+      "loss": 0.6073,
+      "step": 3875
+    },
+    {
+      "epoch": 4.994234465086483,
+      "grad_norm": 1.876478672027588,
+      "learning_rate": 0.00016043835616438355,
+      "loss": 0.523,
+      "step": 3900
+    },
+    {
+      "epoch": 5.025624599615631,
+      "grad_norm": 0.982686460018158,
+      "learning_rate": 0.00015941095890410957,
+      "loss": 0.4881,
+      "step": 3925
+    },
+    {
+      "epoch": 5.05765534913517,
+      "grad_norm": 1.5838254690170288,
+      "learning_rate": 0.00015838356164383558,
+      "loss": 0.4756,
+      "step": 3950
+    },
+    {
+      "epoch": 5.089686098654709,
+      "grad_norm": 2.1962034702301025,
+      "learning_rate": 0.00015735616438356166,
+      "loss": 0.5028,
+      "step": 3975
+    },
+    {
+      "epoch": 5.121716848174247,
+      "grad_norm": 4.335826396942139,
+      "learning_rate": 0.00015632876712328767,
+      "loss": 0.5199,
+      "step": 4000
+    },
+    {
+      "epoch": 5.121716848174247,
+      "eval_loss": 0.8298487663269043,
+      "eval_runtime": 192.8428,
+      "eval_samples_per_second": 15.515,
+      "eval_steps_per_second": 1.939,
+      "eval_wer": 0.6859007832898172,
+      "step": 4000
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 7800,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 400,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.6408903738445339e+19,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-4000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f00530c3459cf16b6bd514450f69227d5903a814b8357f77a0b5d0080723e59d
+size 5304

config.json ADDED Viewed

	@@ -0,0 +1,116 @@

+{
+  "_name_or_path": "jonatasgrosman/wav2vec2-large-xlsr-53-arabic",
+  "activation_dropout": 0.05,
+  "adapter_attn_dim": null,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForCTC"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 256,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": true,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.0,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_channel_length": 10,
+  "mask_channel_min_space": 1,
+  "mask_channel_other": 0.0,
+  "mask_channel_prob": 0.0,
+  "mask_channel_selection": "static",
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_min_space": 1,
+  "mask_time_other": 0.0,
+  "mask_time_prob": 0.05,
+  "mask_time_selection": "static",
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 24,
+  "num_negatives": 100,
+  "output_hidden_size": 1024,
+  "pad_token_id": 55,
+  "proj_codevector_dim": 256,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "use_weighted_layer_sum": false,
+  "vocab_size": 58,
+  "xvector_output_dim": 512
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e5f20b9274cca7dadb1e9cf07691826097d13ca0d8808dc60f2bc11604c120fa
+size 1262045280

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}

runs/Feb18_22-23-26_893f794f09cc/events.out.tfevents.1739918097.893f794f09cc.31.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b3b4538e8f1ee37bb10a3613d2d249df67da13bd4184e793cfb700888b7f29a2
+size 44921

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "pad_token": "[PAD]",
+  "unk_token": "[UNK]"
+}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,48 @@

+{
+  "added_tokens_decoder": {
+    "54": {
+      "content": "[UNK]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "55": {
+      "content": "[PAD]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "56": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "57": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "do_lower_case": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "replace_word_delimiter_char": " ",
+  "target_lang": null,
+  "tokenizer_class": "Wav2Vec2CTCTokenizer",
+  "unk_token": "[UNK]",
+  "word_delimiter_token": "|"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f00530c3459cf16b6bd514450f69227d5903a814b8357f77a0b5d0080723e59d
+size 5304

vocab.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "[PAD]": 55,
+  "[UNK]": 54,
+  "|": 46,
+  "ء": 6,
+  "آ": 27,
+  "أ": 15,
+  "ؤ": 30,
+  "إ": 24,
+  "ئ": 36,
+  "ا": 9,
+  "ب": 13,
+  "ة": 19,
+  "ت": 2,
+  "ث": 1,
+  "ج": 26,
+  "ح": 33,
+  "خ": 5,
+  "د": 10,
+  "ذ": 37,
+  "ر": 53,
+  "ز": 39,
+  "س": 11,
+  "ش": 7,
+  "ص": 28,
+  "ض": 49,
+  "ط": 40,
+  "ظ": 22,
+  "ع": 44,
+  "غ": 38,
+  "ـ": 47,
+  "ف": 25,
+  "ق": 21,
+  "ك": 50,
+  "ل": 34,
+  "م": 12,
+  "ن": 41,
+  "ه": 4,
+  "و": 17,
+  "ى": 45,
+  "ي": 51,
+  "ً": 29,
+  "ٌ": 20,
+  "ٍ": 16,
+  "َ": 8,
+  "ُ": 23,
+  "ِ": 35,
+  "ّ": 43,
+  "ْ": 52,
+  "ٰ": 32,
+  "چ": 31,
+  "ک": 14,
+  "ی": 3,
+  "ۖ": 42,
+  "ۗ": 18,
+  "ۚ": 48,
+  "ۛ": 0
+}