diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7dbaed370958b5fc1a24a8c07fe26b1b7f462d9c
--- /dev/null
+++ b/README.md
@@ -0,0 +1,8 @@
+---
+tags:
+- text-to-speech
+- speecht5
+- mabama  # Make sure no empty tags exist
+library_name: transformers
+license: mit
+---
\ No newline at end of file
diff --git a/added_tokens.json b/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..cd5b477a9075c49d99de65622db37bb06a251985
--- /dev/null
+++ b/added_tokens.json
@@ -0,0 +1,4 @@
+{
+  "<ctc_blank>": 80,
+  "<mask>": 79
+}
diff --git a/checkpoint-1000/added_tokens.json b/checkpoint-1000/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..cd5b477a9075c49d99de65622db37bb06a251985
--- /dev/null
+++ b/checkpoint-1000/added_tokens.json
@@ -0,0 +1,4 @@
+{
+  "<ctc_blank>": 80,
+  "<mask>": 79
+}
diff --git a/checkpoint-1000/config.json b/checkpoint-1000/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..11c8ec6e9cb5a360feb11650e24ba1adc08e8d0e
--- /dev/null
+++ b/checkpoint-1000/config.json
@@ -0,0 +1,91 @@
+{
+  "activation_dropout": 0.1,
+  "apply_spec_augment": true,
+  "architectures": [
+    "SpeechT5ForTextToSpeech"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.1,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 2,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.1,
+  "encoder_layers": 12,
+  "encoder_max_relative_position": 160,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.0,
+  "guided_attention_loss_num_heads": 2,
+  "guided_attention_loss_scale": 10.0,
+  "guided_attention_loss_sigma": 0.4,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "is_encoder_decoder": true,
+  "layer_norm_eps": 1e-05,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": null,
+  "max_speech_positions": 1876,
+  "max_text_positions": 600,
+  "model_type": "speecht5",
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_mel_bins": 80,
+  "pad_token_id": 1,
+  "positional_dropout": 0.1,
+  "reduction_factor": 2,
+  "scale_embedding": false,
+  "speaker_embedding_dim": 512,
+  "speech_decoder_postnet_dropout": 0.5,
+  "speech_decoder_postnet_kernel": 5,
+  "speech_decoder_postnet_layers": 5,
+  "speech_decoder_postnet_units": 256,
+  "speech_decoder_prenet_dropout": 0.5,
+  "speech_decoder_prenet_layers": 2,
+  "speech_decoder_prenet_units": 256,
+  "torch_dtype": "float32",
+  "transformers_version": "4.52.0.dev0",
+  "use_cache": false,
+  "use_guided_attention_loss": true,
+  "vocab_size": 81
+}
diff --git a/checkpoint-1000/generation_config.json b/checkpoint-1000/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..3041cc103eccf911a925012f1ad8fd1d12cce149
--- /dev/null
+++ b/checkpoint-1000/generation_config.json
@@ -0,0 +1,9 @@
+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "decoder_start_token_id": 2,
+  "eos_token_id": 2,
+  "max_length": 1876,
+  "pad_token_id": 1,
+  "transformers_version": "4.52.0.dev0"
+}
diff --git a/checkpoint-1000/model.safetensors b/checkpoint-1000/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5540e86f5478d7907f8e2b229391b344c5694ec0
--- /dev/null
+++ b/checkpoint-1000/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87514d400a13c6eefdeb2f89abd1795e621c2344f96e159ce2aeba3d0ce85944
+size 577789320
diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..26e2ef12b961da2cd0404a8f151e97f3b5dea730
--- /dev/null
+++ b/checkpoint-1000/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2699a889db8dbb0ae670281bc558951bffc765ae88e6bb0bb5222ac12288814b
+size 1155772233
diff --git a/checkpoint-1000/rng_state.pth b/checkpoint-1000/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7d6748f8e4525d7fe8c1e3995fc1adb59c705b37
--- /dev/null
+++ b/checkpoint-1000/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82d177efef76b1d9db7c817f74c58d37c483a0042c96999443934a8052be41aa
+size 14244
diff --git a/checkpoint-1000/scaler.pt b/checkpoint-1000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..99fd9b9f6c56133927938af3ea5ee182baf431ae
--- /dev/null
+++ b/checkpoint-1000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:27eb6d31126283f601b217f22a8971040a00a73abf0a2e26bfcb5064cd0afa48
+size 988
diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8565d90824cdbfd2ca3a73025f401c07d39317db
--- /dev/null
+++ b/checkpoint-1000/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5101d8c1f86d6f48167e50b1164b9ba363ab76694ff2d5c1e326e3d5f94ecaef
+size 1064
diff --git a/checkpoint-1000/special_tokens_map.json b/checkpoint-1000/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..4ee24ec69861cfc94abbe2c8c934aa0744aa623c
--- /dev/null
+++ b/checkpoint-1000/special_tokens_map.json
@@ -0,0 +1,13 @@
+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}
diff --git a/checkpoint-1000/spm_char.model b/checkpoint-1000/spm_char.model
new file mode 100644
index 0000000000000000000000000000000000000000..8fb73691942626fa75df80b61aab0e9b9340d8e2
--- /dev/null
+++ b/checkpoint-1000/spm_char.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7fcc48f3e225f627b1641db410ceb0c8649bd2b0c982e150b03f8be3728ab560
+size 238473
diff --git a/checkpoint-1000/tokenizer_config.json b/checkpoint-1000/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..e88d44ba3be31ac8f53461ae7c1b02b4c5c830ab
--- /dev/null
+++ b/checkpoint-1000/tokenizer_config.json
@@ -0,0 +1,64 @@
+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "79": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "80": {
+      "content": "<ctc_blank>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 600,
+  "normalize": false,
+  "pad_token": "<pad>",
+  "processor_class": "SpeechT5Processor",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "SpeechT5Tokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..561078f38d40883d12c484952530e59a49e8a5d1
--- /dev/null
+++ b/checkpoint-1000/trainer_state.json
@@ -0,0 +1,322 @@
+{
+  "best_global_step": 1000,
+  "best_metric": 0.5154594779014587,
+  "best_model_checkpoint": "./speecht5_tts_mabama/checkpoint-1000",
+  "epoch": 125.0,
+  "eval_steps": 1000,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 3.1333333333333333,
+      "grad_norm": 13.092179298400879,
+      "learning_rate": 4.2000000000000006e-07,
+      "loss": 1.0978,
+      "step": 25
+    },
+    {
+      "epoch": 6.266666666666667,
+      "grad_norm": 13.538804054260254,
+      "learning_rate": 9.200000000000001e-07,
+      "loss": 1.0057,
+      "step": 50
+    },
+    {
+      "epoch": 9.4,
+      "grad_norm": 3.8923733234405518,
+      "learning_rate": 1.42e-06,
+      "loss": 0.8155,
+      "step": 75
+    },
+    {
+      "epoch": 12.533333333333333,
+      "grad_norm": 2.569746732711792,
+      "learning_rate": 1.9200000000000003e-06,
+      "loss": 0.7921,
+      "step": 100
+    },
+    {
+      "epoch": 15.666666666666666,
+      "grad_norm": 2.390493631362915,
+      "learning_rate": 2.42e-06,
+      "loss": 0.7531,
+      "step": 125
+    },
+    {
+      "epoch": 18.8,
+      "grad_norm": 2.7168779373168945,
+      "learning_rate": 2.92e-06,
+      "loss": 0.7393,
+      "step": 150
+    },
+    {
+      "epoch": 21.933333333333334,
+      "grad_norm": 10.27633285522461,
+      "learning_rate": 3.4200000000000007e-06,
+      "loss": 0.7292,
+      "step": 175
+    },
+    {
+      "epoch": 25.0,
+      "grad_norm": 5.6921000480651855,
+      "learning_rate": 3.920000000000001e-06,
+      "loss": 0.6642,
+      "step": 200
+    },
+    {
+      "epoch": 28.133333333333333,
+      "grad_norm": 2.6206777095794678,
+      "learning_rate": 4.42e-06,
+      "loss": 0.6555,
+      "step": 225
+    },
+    {
+      "epoch": 31.266666666666666,
+      "grad_norm": 1.9396028518676758,
+      "learning_rate": 4.92e-06,
+      "loss": 0.6484,
+      "step": 250
+    },
+    {
+      "epoch": 34.4,
+      "grad_norm": 3.44437575340271,
+      "learning_rate": 5.420000000000001e-06,
+      "loss": 0.6414,
+      "step": 275
+    },
+    {
+      "epoch": 37.53333333333333,
+      "grad_norm": 2.729497194290161,
+      "learning_rate": 5.92e-06,
+      "loss": 0.6323,
+      "step": 300
+    },
+    {
+      "epoch": 40.666666666666664,
+      "grad_norm": 2.3852877616882324,
+      "learning_rate": 6.42e-06,
+      "loss": 0.6073,
+      "step": 325
+    },
+    {
+      "epoch": 43.8,
+      "grad_norm": 4.4287109375,
+      "learning_rate": 6.92e-06,
+      "loss": 0.6034,
+      "step": 350
+    },
+    {
+      "epoch": 46.93333333333333,
+      "grad_norm": 2.1653966903686523,
+      "learning_rate": 7.420000000000001e-06,
+      "loss": 0.5865,
+      "step": 375
+    },
+    {
+      "epoch": 50.0,
+      "grad_norm": 2.8120265007019043,
+      "learning_rate": 7.92e-06,
+      "loss": 0.5556,
+      "step": 400
+    },
+    {
+      "epoch": 53.13333333333333,
+      "grad_norm": 2.0973806381225586,
+      "learning_rate": 8.42e-06,
+      "loss": 0.5416,
+      "step": 425
+    },
+    {
+      "epoch": 56.266666666666666,
+      "grad_norm": 2.6723616123199463,
+      "learning_rate": 8.920000000000001e-06,
+      "loss": 0.5407,
+      "step": 450
+    },
+    {
+      "epoch": 59.4,
+      "grad_norm": 2.1810383796691895,
+      "learning_rate": 9.42e-06,
+      "loss": 0.5174,
+      "step": 475
+    },
+    {
+      "epoch": 62.53333333333333,
+      "grad_norm": 3.464071750640869,
+      "learning_rate": 9.920000000000002e-06,
+      "loss": 0.5327,
+      "step": 500
+    },
+    {
+      "epoch": 65.66666666666667,
+      "grad_norm": 3.6148977279663086,
+      "learning_rate": 9.940000000000001e-06,
+      "loss": 0.5141,
+      "step": 525
+    },
+    {
+      "epoch": 68.8,
+      "grad_norm": 2.5631027221679688,
+      "learning_rate": 9.86857142857143e-06,
+      "loss": 0.5246,
+      "step": 550
+    },
+    {
+      "epoch": 71.93333333333334,
+      "grad_norm": 2.058468818664551,
+      "learning_rate": 9.797142857142858e-06,
+      "loss": 0.5065,
+      "step": 575
+    },
+    {
+      "epoch": 75.0,
+      "grad_norm": 1.7559466361999512,
+      "learning_rate": 9.725714285714287e-06,
+      "loss": 0.4871,
+      "step": 600
+    },
+    {
+      "epoch": 78.13333333333334,
+      "grad_norm": 2.653345823287964,
+      "learning_rate": 9.654285714285716e-06,
+      "loss": 0.4941,
+      "step": 625
+    },
+    {
+      "epoch": 81.26666666666667,
+      "grad_norm": 2.612226724624634,
+      "learning_rate": 9.582857142857143e-06,
+      "loss": 0.4796,
+      "step": 650
+    },
+    {
+      "epoch": 84.4,
+      "grad_norm": 1.7446099519729614,
+      "learning_rate": 9.511428571428572e-06,
+      "loss": 0.487,
+      "step": 675
+    },
+    {
+      "epoch": 87.53333333333333,
+      "grad_norm": 2.627315044403076,
+      "learning_rate": 9.440000000000001e-06,
+      "loss": 0.4731,
+      "step": 700
+    },
+    {
+      "epoch": 90.66666666666667,
+      "grad_norm": 2.4315383434295654,
+      "learning_rate": 9.368571428571428e-06,
+      "loss": 0.4812,
+      "step": 725
+    },
+    {
+      "epoch": 93.8,
+      "grad_norm": 2.4056336879730225,
+      "learning_rate": 9.297142857142857e-06,
+      "loss": 0.468,
+      "step": 750
+    },
+    {
+      "epoch": 96.93333333333334,
+      "grad_norm": 2.153116464614868,
+      "learning_rate": 9.225714285714286e-06,
+      "loss": 0.4829,
+      "step": 775
+    },
+    {
+      "epoch": 100.0,
+      "grad_norm": 2.9421756267547607,
+      "learning_rate": 9.154285714285715e-06,
+      "loss": 0.4555,
+      "step": 800
+    },
+    {
+      "epoch": 103.13333333333334,
+      "grad_norm": 1.6771883964538574,
+      "learning_rate": 9.082857142857143e-06,
+      "loss": 0.462,
+      "step": 825
+    },
+    {
+      "epoch": 106.26666666666667,
+      "grad_norm": 2.9711899757385254,
+      "learning_rate": 9.011428571428572e-06,
+      "loss": 0.471,
+      "step": 850
+    },
+    {
+      "epoch": 109.4,
+      "grad_norm": 1.922980546951294,
+      "learning_rate": 8.94e-06,
+      "loss": 0.4673,
+      "step": 875
+    },
+    {
+      "epoch": 112.53333333333333,
+      "grad_norm": 2.49945068359375,
+      "learning_rate": 8.86857142857143e-06,
+      "loss": 0.4611,
+      "step": 900
+    },
+    {
+      "epoch": 115.66666666666667,
+      "grad_norm": 2.646510362625122,
+      "learning_rate": 8.797142857142857e-06,
+      "loss": 0.4574,
+      "step": 925
+    },
+    {
+      "epoch": 118.8,
+      "grad_norm": 1.7943354845046997,
+      "learning_rate": 8.725714285714286e-06,
+      "loss": 0.4658,
+      "step": 950
+    },
+    {
+      "epoch": 121.93333333333334,
+      "grad_norm": 2.171827793121338,
+      "learning_rate": 8.654285714285715e-06,
+      "loss": 0.4561,
+      "step": 975
+    },
+    {
+      "epoch": 125.0,
+      "grad_norm": 7.516489505767822,
+      "learning_rate": 8.582857142857144e-06,
+      "loss": 0.4472,
+      "step": 1000
+    },
+    {
+      "epoch": 125.0,
+      "eval_loss": 0.5154594779014587,
+      "eval_runtime": 0.7837,
+      "eval_samples_per_second": 33.175,
+      "eval_steps_per_second": 5.104,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 4000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 572,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2568713479659360.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a61b2643a9f565814c2aa62a171605debf82a9c0
--- /dev/null
+++ b/checkpoint-1000/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07e653cd3b83482a0939ee4c8a207df9a996f44ce9dd82197c4ab6cde60cf2bb
+size 5432
diff --git a/checkpoint-2000/added_tokens.json b/checkpoint-2000/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..cd5b477a9075c49d99de65622db37bb06a251985
--- /dev/null
+++ b/checkpoint-2000/added_tokens.json
@@ -0,0 +1,4 @@
+{
+  "<ctc_blank>": 80,
+  "<mask>": 79
+}
diff --git a/checkpoint-2000/config.json b/checkpoint-2000/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..11c8ec6e9cb5a360feb11650e24ba1adc08e8d0e
--- /dev/null
+++ b/checkpoint-2000/config.json
@@ -0,0 +1,91 @@
+{
+  "activation_dropout": 0.1,
+  "apply_spec_augment": true,
+  "architectures": [
+    "SpeechT5ForTextToSpeech"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.1,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 2,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.1,
+  "encoder_layers": 12,
+  "encoder_max_relative_position": 160,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.0,
+  "guided_attention_loss_num_heads": 2,
+  "guided_attention_loss_scale": 10.0,
+  "guided_attention_loss_sigma": 0.4,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "is_encoder_decoder": true,
+  "layer_norm_eps": 1e-05,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": null,
+  "max_speech_positions": 1876,
+  "max_text_positions": 600,
+  "model_type": "speecht5",
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_mel_bins": 80,
+  "pad_token_id": 1,
+  "positional_dropout": 0.1,
+  "reduction_factor": 2,
+  "scale_embedding": false,
+  "speaker_embedding_dim": 512,
+  "speech_decoder_postnet_dropout": 0.5,
+  "speech_decoder_postnet_kernel": 5,
+  "speech_decoder_postnet_layers": 5,
+  "speech_decoder_postnet_units": 256,
+  "speech_decoder_prenet_dropout": 0.5,
+  "speech_decoder_prenet_layers": 2,
+  "speech_decoder_prenet_units": 256,
+  "torch_dtype": "float32",
+  "transformers_version": "4.52.0.dev0",
+  "use_cache": false,
+  "use_guided_attention_loss": true,
+  "vocab_size": 81
+}
diff --git a/checkpoint-2000/generation_config.json b/checkpoint-2000/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..3041cc103eccf911a925012f1ad8fd1d12cce149
--- /dev/null
+++ b/checkpoint-2000/generation_config.json
@@ -0,0 +1,9 @@
+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "decoder_start_token_id": 2,
+  "eos_token_id": 2,
+  "max_length": 1876,
+  "pad_token_id": 1,
+  "transformers_version": "4.52.0.dev0"
+}
diff --git a/checkpoint-2000/model.safetensors b/checkpoint-2000/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6e0b6396d9f590a58a3fe32f7a2a77f0587b4a8a
--- /dev/null
+++ b/checkpoint-2000/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abac69d52746eff2a8ac4fea48c076a031effd3d774eaa79c34c25289b78a9ad
+size 577789320
diff --git a/checkpoint-2000/optimizer.pt b/checkpoint-2000/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..43314c8f7174d70adfb383a4571110603aabb196
--- /dev/null
+++ b/checkpoint-2000/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df818e2b4cf58851158e10b3d57754c198be51c0b852b9cd4b587b629a205640
+size 1155772233
diff --git a/checkpoint-2000/rng_state.pth b/checkpoint-2000/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7fd9a1224c43c6ea63f9c6313bab015aa0c29295
--- /dev/null
+++ b/checkpoint-2000/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b766bf7909addbb49e8f135f2f8aa3b6e99cb053e36395d8560f93e71c2776e7
+size 14244
diff --git a/checkpoint-2000/scaler.pt b/checkpoint-2000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..177496ba5922b3811ae20b8354e55ee6d4aaeff8
--- /dev/null
+++ b/checkpoint-2000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49bc19d5712fad43d5cef95c2e01c73bd75bdb71e4c16fa8781d626d978f5452
+size 988
diff --git a/checkpoint-2000/scheduler.pt b/checkpoint-2000/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2a6460a89462994303545a8f3997e58e1490360f
--- /dev/null
+++ b/checkpoint-2000/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b80a609c64a12b4db2f38941ea479b9a30f9351b7aac74f4956e8686dc338317
+size 1064
diff --git a/checkpoint-2000/special_tokens_map.json b/checkpoint-2000/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..4ee24ec69861cfc94abbe2c8c934aa0744aa623c
--- /dev/null
+++ b/checkpoint-2000/special_tokens_map.json
@@ -0,0 +1,13 @@
+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}
diff --git a/checkpoint-2000/spm_char.model b/checkpoint-2000/spm_char.model
new file mode 100644
index 0000000000000000000000000000000000000000..8fb73691942626fa75df80b61aab0e9b9340d8e2
--- /dev/null
+++ b/checkpoint-2000/spm_char.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7fcc48f3e225f627b1641db410ceb0c8649bd2b0c982e150b03f8be3728ab560
+size 238473
diff --git a/checkpoint-2000/tokenizer_config.json b/checkpoint-2000/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..e88d44ba3be31ac8f53461ae7c1b02b4c5c830ab
--- /dev/null
+++ b/checkpoint-2000/tokenizer_config.json
@@ -0,0 +1,64 @@
+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "79": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "80": {
+      "content": "<ctc_blank>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 600,
+  "normalize": false,
+  "pad_token": "<pad>",
+  "processor_class": "SpeechT5Processor",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "SpeechT5Tokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/checkpoint-2000/trainer_state.json b/checkpoint-2000/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..d3857b0a031ed9222d8530018fd26b3025f4b10e
--- /dev/null
+++ b/checkpoint-2000/trainer_state.json
@@ -0,0 +1,610 @@
+{
+  "best_global_step": 2000,
+  "best_metric": 0.4833647906780243,
+  "best_model_checkpoint": "./speecht5_tts_mabama/checkpoint-2000",
+  "epoch": 250.0,
+  "eval_steps": 1000,
+  "global_step": 2000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 3.1333333333333333,
+      "grad_norm": 13.092179298400879,
+      "learning_rate": 4.2000000000000006e-07,
+      "loss": 1.0978,
+      "step": 25
+    },
+    {
+      "epoch": 6.266666666666667,
+      "grad_norm": 13.538804054260254,
+      "learning_rate": 9.200000000000001e-07,
+      "loss": 1.0057,
+      "step": 50
+    },
+    {
+      "epoch": 9.4,
+      "grad_norm": 3.8923733234405518,
+      "learning_rate": 1.42e-06,
+      "loss": 0.8155,
+      "step": 75
+    },
+    {
+      "epoch": 12.533333333333333,
+      "grad_norm": 2.569746732711792,
+      "learning_rate": 1.9200000000000003e-06,
+      "loss": 0.7921,
+      "step": 100
+    },
+    {
+      "epoch": 15.666666666666666,
+      "grad_norm": 2.390493631362915,
+      "learning_rate": 2.42e-06,
+      "loss": 0.7531,
+      "step": 125
+    },
+    {
+      "epoch": 18.8,
+      "grad_norm": 2.7168779373168945,
+      "learning_rate": 2.92e-06,
+      "loss": 0.7393,
+      "step": 150
+    },
+    {
+      "epoch": 21.933333333333334,
+      "grad_norm": 10.27633285522461,
+      "learning_rate": 3.4200000000000007e-06,
+      "loss": 0.7292,
+      "step": 175
+    },
+    {
+      "epoch": 25.0,
+      "grad_norm": 5.6921000480651855,
+      "learning_rate": 3.920000000000001e-06,
+      "loss": 0.6642,
+      "step": 200
+    },
+    {
+      "epoch": 28.133333333333333,
+      "grad_norm": 2.6206777095794678,
+      "learning_rate": 4.42e-06,
+      "loss": 0.6555,
+      "step": 225
+    },
+    {
+      "epoch": 31.266666666666666,
+      "grad_norm": 1.9396028518676758,
+      "learning_rate": 4.92e-06,
+      "loss": 0.6484,
+      "step": 250
+    },
+    {
+      "epoch": 34.4,
+      "grad_norm": 3.44437575340271,
+      "learning_rate": 5.420000000000001e-06,
+      "loss": 0.6414,
+      "step": 275
+    },
+    {
+      "epoch": 37.53333333333333,
+      "grad_norm": 2.729497194290161,
+      "learning_rate": 5.92e-06,
+      "loss": 0.6323,
+      "step": 300
+    },
+    {
+      "epoch": 40.666666666666664,
+      "grad_norm": 2.3852877616882324,
+      "learning_rate": 6.42e-06,
+      "loss": 0.6073,
+      "step": 325
+    },
+    {
+      "epoch": 43.8,
+      "grad_norm": 4.4287109375,
+      "learning_rate": 6.92e-06,
+      "loss": 0.6034,
+      "step": 350
+    },
+    {
+      "epoch": 46.93333333333333,
+      "grad_norm": 2.1653966903686523,
+      "learning_rate": 7.420000000000001e-06,
+      "loss": 0.5865,
+      "step": 375
+    },
+    {
+      "epoch": 50.0,
+      "grad_norm": 2.8120265007019043,
+      "learning_rate": 7.92e-06,
+      "loss": 0.5556,
+      "step": 400
+    },
+    {
+      "epoch": 53.13333333333333,
+      "grad_norm": 2.0973806381225586,
+      "learning_rate": 8.42e-06,
+      "loss": 0.5416,
+      "step": 425
+    },
+    {
+      "epoch": 56.266666666666666,
+      "grad_norm": 2.6723616123199463,
+      "learning_rate": 8.920000000000001e-06,
+      "loss": 0.5407,
+      "step": 450
+    },
+    {
+      "epoch": 59.4,
+      "grad_norm": 2.1810383796691895,
+      "learning_rate": 9.42e-06,
+      "loss": 0.5174,
+      "step": 475
+    },
+    {
+      "epoch": 62.53333333333333,
+      "grad_norm": 3.464071750640869,
+      "learning_rate": 9.920000000000002e-06,
+      "loss": 0.5327,
+      "step": 500
+    },
+    {
+      "epoch": 65.66666666666667,
+      "grad_norm": 3.6148977279663086,
+      "learning_rate": 9.940000000000001e-06,
+      "loss": 0.5141,
+      "step": 525
+    },
+    {
+      "epoch": 68.8,
+      "grad_norm": 2.5631027221679688,
+      "learning_rate": 9.86857142857143e-06,
+      "loss": 0.5246,
+      "step": 550
+    },
+    {
+      "epoch": 71.93333333333334,
+      "grad_norm": 2.058468818664551,
+      "learning_rate": 9.797142857142858e-06,
+      "loss": 0.5065,
+      "step": 575
+    },
+    {
+      "epoch": 75.0,
+      "grad_norm": 1.7559466361999512,
+      "learning_rate": 9.725714285714287e-06,
+      "loss": 0.4871,
+      "step": 600
+    },
+    {
+      "epoch": 78.13333333333334,
+      "grad_norm": 2.653345823287964,
+      "learning_rate": 9.654285714285716e-06,
+      "loss": 0.4941,
+      "step": 625
+    },
+    {
+      "epoch": 81.26666666666667,
+      "grad_norm": 2.612226724624634,
+      "learning_rate": 9.582857142857143e-06,
+      "loss": 0.4796,
+      "step": 650
+    },
+    {
+      "epoch": 84.4,
+      "grad_norm": 1.7446099519729614,
+      "learning_rate": 9.511428571428572e-06,
+      "loss": 0.487,
+      "step": 675
+    },
+    {
+      "epoch": 87.53333333333333,
+      "grad_norm": 2.627315044403076,
+      "learning_rate": 9.440000000000001e-06,
+      "loss": 0.4731,
+      "step": 700
+    },
+    {
+      "epoch": 90.66666666666667,
+      "grad_norm": 2.4315383434295654,
+      "learning_rate": 9.368571428571428e-06,
+      "loss": 0.4812,
+      "step": 725
+    },
+    {
+      "epoch": 93.8,
+      "grad_norm": 2.4056336879730225,
+      "learning_rate": 9.297142857142857e-06,
+      "loss": 0.468,
+      "step": 750
+    },
+    {
+      "epoch": 96.93333333333334,
+      "grad_norm": 2.153116464614868,
+      "learning_rate": 9.225714285714286e-06,
+      "loss": 0.4829,
+      "step": 775
+    },
+    {
+      "epoch": 100.0,
+      "grad_norm": 2.9421756267547607,
+      "learning_rate": 9.154285714285715e-06,
+      "loss": 0.4555,
+      "step": 800
+    },
+    {
+      "epoch": 103.13333333333334,
+      "grad_norm": 1.6771883964538574,
+      "learning_rate": 9.082857142857143e-06,
+      "loss": 0.462,
+      "step": 825
+    },
+    {
+      "epoch": 106.26666666666667,
+      "grad_norm": 2.9711899757385254,
+      "learning_rate": 9.011428571428572e-06,
+      "loss": 0.471,
+      "step": 850
+    },
+    {
+      "epoch": 109.4,
+      "grad_norm": 1.922980546951294,
+      "learning_rate": 8.94e-06,
+      "loss": 0.4673,
+      "step": 875
+    },
+    {
+      "epoch": 112.53333333333333,
+      "grad_norm": 2.49945068359375,
+      "learning_rate": 8.86857142857143e-06,
+      "loss": 0.4611,
+      "step": 900
+    },
+    {
+      "epoch": 115.66666666666667,
+      "grad_norm": 2.646510362625122,
+      "learning_rate": 8.797142857142857e-06,
+      "loss": 0.4574,
+      "step": 925
+    },
+    {
+      "epoch": 118.8,
+      "grad_norm": 1.7943354845046997,
+      "learning_rate": 8.725714285714286e-06,
+      "loss": 0.4658,
+      "step": 950
+    },
+    {
+      "epoch": 121.93333333333334,
+      "grad_norm": 2.171827793121338,
+      "learning_rate": 8.654285714285715e-06,
+      "loss": 0.4561,
+      "step": 975
+    },
+    {
+      "epoch": 125.0,
+      "grad_norm": 7.516489505767822,
+      "learning_rate": 8.582857142857144e-06,
+      "loss": 0.4472,
+      "step": 1000
+    },
+    {
+      "epoch": 125.0,
+      "eval_loss": 0.5154594779014587,
+      "eval_runtime": 0.7837,
+      "eval_samples_per_second": 33.175,
+      "eval_steps_per_second": 5.104,
+      "step": 1000
+    },
+    {
+      "epoch": 128.13333333333333,
+      "grad_norm": 2.5167343616485596,
+      "learning_rate": 8.511428571428571e-06,
+      "loss": 0.457,
+      "step": 1025
+    },
+    {
+      "epoch": 131.26666666666668,
+      "grad_norm": 3.3089983463287354,
+      "learning_rate": 8.44e-06,
+      "loss": 0.4456,
+      "step": 1050
+    },
+    {
+      "epoch": 134.4,
+      "grad_norm": 2.778348445892334,
+      "learning_rate": 8.36857142857143e-06,
+      "loss": 0.4612,
+      "step": 1075
+    },
+    {
+      "epoch": 137.53333333333333,
+      "grad_norm": 2.529778480529785,
+      "learning_rate": 8.297142857142859e-06,
+      "loss": 0.4429,
+      "step": 1100
+    },
+    {
+      "epoch": 140.66666666666666,
+      "grad_norm": 1.76685631275177,
+      "learning_rate": 8.225714285714288e-06,
+      "loss": 0.4399,
+      "step": 1125
+    },
+    {
+      "epoch": 143.8,
+      "grad_norm": 1.8449666500091553,
+      "learning_rate": 8.154285714285715e-06,
+      "loss": 0.4329,
+      "step": 1150
+    },
+    {
+      "epoch": 146.93333333333334,
+      "grad_norm": 1.9097468852996826,
+      "learning_rate": 8.082857142857144e-06,
+      "loss": 0.4527,
+      "step": 1175
+    },
+    {
+      "epoch": 150.0,
+      "grad_norm": 3.892838716506958,
+      "learning_rate": 8.011428571428573e-06,
+      "loss": 0.4448,
+      "step": 1200
+    },
+    {
+      "epoch": 153.13333333333333,
+      "grad_norm": 2.1518826484680176,
+      "learning_rate": 7.94e-06,
+      "loss": 0.4412,
+      "step": 1225
+    },
+    {
+      "epoch": 156.26666666666668,
+      "grad_norm": 1.5322662591934204,
+      "learning_rate": 7.86857142857143e-06,
+      "loss": 0.4388,
+      "step": 1250
+    },
+    {
+      "epoch": 159.4,
+      "grad_norm": 1.4961107969284058,
+      "learning_rate": 7.797142857142858e-06,
+      "loss": 0.4363,
+      "step": 1275
+    },
+    {
+      "epoch": 162.53333333333333,
+      "grad_norm": 1.8992841243743896,
+      "learning_rate": 7.725714285714286e-06,
+      "loss": 0.4474,
+      "step": 1300
+    },
+    {
+      "epoch": 165.66666666666666,
+      "grad_norm": 1.5015554428100586,
+      "learning_rate": 7.654285714285715e-06,
+      "loss": 0.4327,
+      "step": 1325
+    },
+    {
+      "epoch": 168.8,
+      "grad_norm": 2.0730693340301514,
+      "learning_rate": 7.5828571428571444e-06,
+      "loss": 0.4348,
+      "step": 1350
+    },
+    {
+      "epoch": 171.93333333333334,
+      "grad_norm": 2.0838747024536133,
+      "learning_rate": 7.511428571428572e-06,
+      "loss": 0.4393,
+      "step": 1375
+    },
+    {
+      "epoch": 175.0,
+      "grad_norm": 4.3804030418396,
+      "learning_rate": 7.440000000000001e-06,
+      "loss": 0.4386,
+      "step": 1400
+    },
+    {
+      "epoch": 178.13333333333333,
+      "grad_norm": 1.8927189111709595,
+      "learning_rate": 7.36857142857143e-06,
+      "loss": 0.4318,
+      "step": 1425
+    },
+    {
+      "epoch": 181.26666666666668,
+      "grad_norm": 1.5456620454788208,
+      "learning_rate": 7.297142857142858e-06,
+      "loss": 0.4336,
+      "step": 1450
+    },
+    {
+      "epoch": 184.4,
+      "grad_norm": 2.722612142562866,
+      "learning_rate": 7.225714285714286e-06,
+      "loss": 0.4281,
+      "step": 1475
+    },
+    {
+      "epoch": 187.53333333333333,
+      "grad_norm": 1.9484314918518066,
+      "learning_rate": 7.154285714285715e-06,
+      "loss": 0.4312,
+      "step": 1500
+    },
+    {
+      "epoch": 190.66666666666666,
+      "grad_norm": 2.101043224334717,
+      "learning_rate": 7.082857142857143e-06,
+      "loss": 0.427,
+      "step": 1525
+    },
+    {
+      "epoch": 193.8,
+      "grad_norm": 1.9785490036010742,
+      "learning_rate": 7.011428571428572e-06,
+      "loss": 0.4298,
+      "step": 1550
+    },
+    {
+      "epoch": 196.93333333333334,
+      "grad_norm": 2.319054126739502,
+      "learning_rate": 6.9400000000000005e-06,
+      "loss": 0.4376,
+      "step": 1575
+    },
+    {
+      "epoch": 200.0,
+      "grad_norm": 1.3612741231918335,
+      "learning_rate": 6.868571428571429e-06,
+      "loss": 0.4217,
+      "step": 1600
+    },
+    {
+      "epoch": 203.13333333333333,
+      "grad_norm": 2.128363847732544,
+      "learning_rate": 6.797142857142858e-06,
+      "loss": 0.4217,
+      "step": 1625
+    },
+    {
+      "epoch": 206.26666666666668,
+      "grad_norm": 1.7985234260559082,
+      "learning_rate": 6.725714285714287e-06,
+      "loss": 0.4147,
+      "step": 1650
+    },
+    {
+      "epoch": 209.4,
+      "grad_norm": 1.3478573560714722,
+      "learning_rate": 6.654285714285716e-06,
+      "loss": 0.4357,
+      "step": 1675
+    },
+    {
+      "epoch": 212.53333333333333,
+      "grad_norm": 1.5389248132705688,
+      "learning_rate": 6.582857142857143e-06,
+      "loss": 0.419,
+      "step": 1700
+    },
+    {
+      "epoch": 215.66666666666666,
+      "grad_norm": 1.9558783769607544,
+      "learning_rate": 6.511428571428572e-06,
+      "loss": 0.4289,
+      "step": 1725
+    },
+    {
+      "epoch": 218.8,
+      "grad_norm": 1.756585955619812,
+      "learning_rate": 6.440000000000001e-06,
+      "loss": 0.4168,
+      "step": 1750
+    },
+    {
+      "epoch": 221.93333333333334,
+      "grad_norm": 1.8744903802871704,
+      "learning_rate": 6.368571428571429e-06,
+      "loss": 0.4296,
+      "step": 1775
+    },
+    {
+      "epoch": 225.0,
+      "grad_norm": 1.133415699005127,
+      "learning_rate": 6.297142857142857e-06,
+      "loss": 0.4162,
+      "step": 1800
+    },
+    {
+      "epoch": 228.13333333333333,
+      "grad_norm": 2.819840908050537,
+      "learning_rate": 6.225714285714286e-06,
+      "loss": 0.4275,
+      "step": 1825
+    },
+    {
+      "epoch": 231.26666666666668,
+      "grad_norm": 1.5150210857391357,
+      "learning_rate": 6.1542857142857145e-06,
+      "loss": 0.4244,
+      "step": 1850
+    },
+    {
+      "epoch": 234.4,
+      "grad_norm": 2.184819459915161,
+      "learning_rate": 6.0828571428571435e-06,
+      "loss": 0.4282,
+      "step": 1875
+    },
+    {
+      "epoch": 237.53333333333333,
+      "grad_norm": 3.293454170227051,
+      "learning_rate": 6.011428571428572e-06,
+      "loss": 0.4215,
+      "step": 1900
+    },
+    {
+      "epoch": 240.66666666666666,
+      "grad_norm": 1.210433006286621,
+      "learning_rate": 5.94e-06,
+      "loss": 0.4103,
+      "step": 1925
+    },
+    {
+      "epoch": 243.8,
+      "grad_norm": 2.5027923583984375,
+      "learning_rate": 5.868571428571429e-06,
+      "loss": 0.4186,
+      "step": 1950
+    },
+    {
+      "epoch": 246.93333333333334,
+      "grad_norm": 1.9649789333343506,
+      "learning_rate": 5.797142857142858e-06,
+      "loss": 0.427,
+      "step": 1975
+    },
+    {
+      "epoch": 250.0,
+      "grad_norm": 5.899420261383057,
+      "learning_rate": 5.725714285714287e-06,
+      "loss": 0.4113,
+      "step": 2000
+    },
+    {
+      "epoch": 250.0,
+      "eval_loss": 0.4833647906780243,
+      "eval_runtime": 0.7095,
+      "eval_samples_per_second": 36.646,
+      "eval_steps_per_second": 5.638,
+      "step": 2000
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 4000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 572,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5140893067410672.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-2000/training_args.bin b/checkpoint-2000/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a61b2643a9f565814c2aa62a171605debf82a9c0
--- /dev/null
+++ b/checkpoint-2000/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07e653cd3b83482a0939ee4c8a207df9a996f44ce9dd82197c4ab6cde60cf2bb
+size 5432
diff --git a/checkpoint-3000/added_tokens.json b/checkpoint-3000/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..cd5b477a9075c49d99de65622db37bb06a251985
--- /dev/null
+++ b/checkpoint-3000/added_tokens.json
@@ -0,0 +1,4 @@
+{
+  "<ctc_blank>": 80,
+  "<mask>": 79
+}
diff --git a/checkpoint-3000/config.json b/checkpoint-3000/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..11c8ec6e9cb5a360feb11650e24ba1adc08e8d0e
--- /dev/null
+++ b/checkpoint-3000/config.json
@@ -0,0 +1,91 @@
+{
+  "activation_dropout": 0.1,
+  "apply_spec_augment": true,
+  "architectures": [
+    "SpeechT5ForTextToSpeech"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.1,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 2,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.1,
+  "encoder_layers": 12,
+  "encoder_max_relative_position": 160,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.0,
+  "guided_attention_loss_num_heads": 2,
+  "guided_attention_loss_scale": 10.0,
+  "guided_attention_loss_sigma": 0.4,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "is_encoder_decoder": true,
+  "layer_norm_eps": 1e-05,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": null,
+  "max_speech_positions": 1876,
+  "max_text_positions": 600,
+  "model_type": "speecht5",
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_mel_bins": 80,
+  "pad_token_id": 1,
+  "positional_dropout": 0.1,
+  "reduction_factor": 2,
+  "scale_embedding": false,
+  "speaker_embedding_dim": 512,
+  "speech_decoder_postnet_dropout": 0.5,
+  "speech_decoder_postnet_kernel": 5,
+  "speech_decoder_postnet_layers": 5,
+  "speech_decoder_postnet_units": 256,
+  "speech_decoder_prenet_dropout": 0.5,
+  "speech_decoder_prenet_layers": 2,
+  "speech_decoder_prenet_units": 256,
+  "torch_dtype": "float32",
+  "transformers_version": "4.52.0.dev0",
+  "use_cache": false,
+  "use_guided_attention_loss": true,
+  "vocab_size": 81
+}
diff --git a/checkpoint-3000/generation_config.json b/checkpoint-3000/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..3041cc103eccf911a925012f1ad8fd1d12cce149
--- /dev/null
+++ b/checkpoint-3000/generation_config.json
@@ -0,0 +1,9 @@
+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "decoder_start_token_id": 2,
+  "eos_token_id": 2,
+  "max_length": 1876,
+  "pad_token_id": 1,
+  "transformers_version": "4.52.0.dev0"
+}
diff --git a/checkpoint-3000/model.safetensors b/checkpoint-3000/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e0fcae15e7cc654d100ee8a6038bf337da110444
--- /dev/null
+++ b/checkpoint-3000/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d78d191d64b057ef7708236443ccd41bb24d44484f04b36d4fd46df31daa1c6
+size 577789320
diff --git a/checkpoint-3000/optimizer.pt b/checkpoint-3000/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7b5339aa4aa73f7f4bfc775204fbe55dfa8c694b
--- /dev/null
+++ b/checkpoint-3000/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abb1e34922901fa49e90b622d421fcc022123b3db879e5c48cdc697dd3a9c2d3
+size 1155772233
diff --git a/checkpoint-3000/rng_state.pth b/checkpoint-3000/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e799596e250f2e298a2124c7a1ad86b87b5eb19d
--- /dev/null
+++ b/checkpoint-3000/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a258daad5ac5df9273072647bd5fccfa416cdadb91b7707278c61cc1145a5964
+size 14244
diff --git a/checkpoint-3000/scaler.pt b/checkpoint-3000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6a144ce0190defeeb9f7d69e33a30cba839bee5f
--- /dev/null
+++ b/checkpoint-3000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0c9142a3b98e645e9dc3ffae8c602fb70b74046fea7664e6d081ebb3d0bbb58
+size 988
diff --git a/checkpoint-3000/scheduler.pt b/checkpoint-3000/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..00802b7d7ba610674c2662a6888f78aa35c14408
--- /dev/null
+++ b/checkpoint-3000/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ff56efc76c16a3b9a712527179ae61c8d6dfccc7e3a53f8c421d6329adacfbb
+size 1064
diff --git a/checkpoint-3000/special_tokens_map.json b/checkpoint-3000/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..4ee24ec69861cfc94abbe2c8c934aa0744aa623c
--- /dev/null
+++ b/checkpoint-3000/special_tokens_map.json
@@ -0,0 +1,13 @@
+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}
diff --git a/checkpoint-3000/spm_char.model b/checkpoint-3000/spm_char.model
new file mode 100644
index 0000000000000000000000000000000000000000..8fb73691942626fa75df80b61aab0e9b9340d8e2
--- /dev/null
+++ b/checkpoint-3000/spm_char.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7fcc48f3e225f627b1641db410ceb0c8649bd2b0c982e150b03f8be3728ab560
+size 238473
diff --git a/checkpoint-3000/tokenizer_config.json b/checkpoint-3000/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..e88d44ba3be31ac8f53461ae7c1b02b4c5c830ab
--- /dev/null
+++ b/checkpoint-3000/tokenizer_config.json
@@ -0,0 +1,64 @@
+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "79": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "80": {
+      "content": "<ctc_blank>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 600,
+  "normalize": false,
+  "pad_token": "<pad>",
+  "processor_class": "SpeechT5Processor",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "SpeechT5Tokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/checkpoint-3000/trainer_state.json b/checkpoint-3000/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..32f7d0ece3153553bd2f22a87cbc990f249809ce
--- /dev/null
+++ b/checkpoint-3000/trainer_state.json
@@ -0,0 +1,898 @@
+{
+  "best_global_step": 3000,
+  "best_metric": 0.46799278259277344,
+  "best_model_checkpoint": "./speecht5_tts_mabama/checkpoint-3000",
+  "epoch": 375.0,
+  "eval_steps": 1000,
+  "global_step": 3000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 3.1333333333333333,
+      "grad_norm": 13.092179298400879,
+      "learning_rate": 4.2000000000000006e-07,
+      "loss": 1.0978,
+      "step": 25
+    },
+    {
+      "epoch": 6.266666666666667,
+      "grad_norm": 13.538804054260254,
+      "learning_rate": 9.200000000000001e-07,
+      "loss": 1.0057,
+      "step": 50
+    },
+    {
+      "epoch": 9.4,
+      "grad_norm": 3.8923733234405518,
+      "learning_rate": 1.42e-06,
+      "loss": 0.8155,
+      "step": 75
+    },
+    {
+      "epoch": 12.533333333333333,
+      "grad_norm": 2.569746732711792,
+      "learning_rate": 1.9200000000000003e-06,
+      "loss": 0.7921,
+      "step": 100
+    },
+    {
+      "epoch": 15.666666666666666,
+      "grad_norm": 2.390493631362915,
+      "learning_rate": 2.42e-06,
+      "loss": 0.7531,
+      "step": 125
+    },
+    {
+      "epoch": 18.8,
+      "grad_norm": 2.7168779373168945,
+      "learning_rate": 2.92e-06,
+      "loss": 0.7393,
+      "step": 150
+    },
+    {
+      "epoch": 21.933333333333334,
+      "grad_norm": 10.27633285522461,
+      "learning_rate": 3.4200000000000007e-06,
+      "loss": 0.7292,
+      "step": 175
+    },
+    {
+      "epoch": 25.0,
+      "grad_norm": 5.6921000480651855,
+      "learning_rate": 3.920000000000001e-06,
+      "loss": 0.6642,
+      "step": 200
+    },
+    {
+      "epoch": 28.133333333333333,
+      "grad_norm": 2.6206777095794678,
+      "learning_rate": 4.42e-06,
+      "loss": 0.6555,
+      "step": 225
+    },
+    {
+      "epoch": 31.266666666666666,
+      "grad_norm": 1.9396028518676758,
+      "learning_rate": 4.92e-06,
+      "loss": 0.6484,
+      "step": 250
+    },
+    {
+      "epoch": 34.4,
+      "grad_norm": 3.44437575340271,
+      "learning_rate": 5.420000000000001e-06,
+      "loss": 0.6414,
+      "step": 275
+    },
+    {
+      "epoch": 37.53333333333333,
+      "grad_norm": 2.729497194290161,
+      "learning_rate": 5.92e-06,
+      "loss": 0.6323,
+      "step": 300
+    },
+    {
+      "epoch": 40.666666666666664,
+      "grad_norm": 2.3852877616882324,
+      "learning_rate": 6.42e-06,
+      "loss": 0.6073,
+      "step": 325
+    },
+    {
+      "epoch": 43.8,
+      "grad_norm": 4.4287109375,
+      "learning_rate": 6.92e-06,
+      "loss": 0.6034,
+      "step": 350
+    },
+    {
+      "epoch": 46.93333333333333,
+      "grad_norm": 2.1653966903686523,
+      "learning_rate": 7.420000000000001e-06,
+      "loss": 0.5865,
+      "step": 375
+    },
+    {
+      "epoch": 50.0,
+      "grad_norm": 2.8120265007019043,
+      "learning_rate": 7.92e-06,
+      "loss": 0.5556,
+      "step": 400
+    },
+    {
+      "epoch": 53.13333333333333,
+      "grad_norm": 2.0973806381225586,
+      "learning_rate": 8.42e-06,
+      "loss": 0.5416,
+      "step": 425
+    },
+    {
+      "epoch": 56.266666666666666,
+      "grad_norm": 2.6723616123199463,
+      "learning_rate": 8.920000000000001e-06,
+      "loss": 0.5407,
+      "step": 450
+    },
+    {
+      "epoch": 59.4,
+      "grad_norm": 2.1810383796691895,
+      "learning_rate": 9.42e-06,
+      "loss": 0.5174,
+      "step": 475
+    },
+    {
+      "epoch": 62.53333333333333,
+      "grad_norm": 3.464071750640869,
+      "learning_rate": 9.920000000000002e-06,
+      "loss": 0.5327,
+      "step": 500
+    },
+    {
+      "epoch": 65.66666666666667,
+      "grad_norm": 3.6148977279663086,
+      "learning_rate": 9.940000000000001e-06,
+      "loss": 0.5141,
+      "step": 525
+    },
+    {
+      "epoch": 68.8,
+      "grad_norm": 2.5631027221679688,
+      "learning_rate": 9.86857142857143e-06,
+      "loss": 0.5246,
+      "step": 550
+    },
+    {
+      "epoch": 71.93333333333334,
+      "grad_norm": 2.058468818664551,
+      "learning_rate": 9.797142857142858e-06,
+      "loss": 0.5065,
+      "step": 575
+    },
+    {
+      "epoch": 75.0,
+      "grad_norm": 1.7559466361999512,
+      "learning_rate": 9.725714285714287e-06,
+      "loss": 0.4871,
+      "step": 600
+    },
+    {
+      "epoch": 78.13333333333334,
+      "grad_norm": 2.653345823287964,
+      "learning_rate": 9.654285714285716e-06,
+      "loss": 0.4941,
+      "step": 625
+    },
+    {
+      "epoch": 81.26666666666667,
+      "grad_norm": 2.612226724624634,
+      "learning_rate": 9.582857142857143e-06,
+      "loss": 0.4796,
+      "step": 650
+    },
+    {
+      "epoch": 84.4,
+      "grad_norm": 1.7446099519729614,
+      "learning_rate": 9.511428571428572e-06,
+      "loss": 0.487,
+      "step": 675
+    },
+    {
+      "epoch": 87.53333333333333,
+      "grad_norm": 2.627315044403076,
+      "learning_rate": 9.440000000000001e-06,
+      "loss": 0.4731,
+      "step": 700
+    },
+    {
+      "epoch": 90.66666666666667,
+      "grad_norm": 2.4315383434295654,
+      "learning_rate": 9.368571428571428e-06,
+      "loss": 0.4812,
+      "step": 725
+    },
+    {
+      "epoch": 93.8,
+      "grad_norm": 2.4056336879730225,
+      "learning_rate": 9.297142857142857e-06,
+      "loss": 0.468,
+      "step": 750
+    },
+    {
+      "epoch": 96.93333333333334,
+      "grad_norm": 2.153116464614868,
+      "learning_rate": 9.225714285714286e-06,
+      "loss": 0.4829,
+      "step": 775
+    },
+    {
+      "epoch": 100.0,
+      "grad_norm": 2.9421756267547607,
+      "learning_rate": 9.154285714285715e-06,
+      "loss": 0.4555,
+      "step": 800
+    },
+    {
+      "epoch": 103.13333333333334,
+      "grad_norm": 1.6771883964538574,
+      "learning_rate": 9.082857142857143e-06,
+      "loss": 0.462,
+      "step": 825
+    },
+    {
+      "epoch": 106.26666666666667,
+      "grad_norm": 2.9711899757385254,
+      "learning_rate": 9.011428571428572e-06,
+      "loss": 0.471,
+      "step": 850
+    },
+    {
+      "epoch": 109.4,
+      "grad_norm": 1.922980546951294,
+      "learning_rate": 8.94e-06,
+      "loss": 0.4673,
+      "step": 875
+    },
+    {
+      "epoch": 112.53333333333333,
+      "grad_norm": 2.49945068359375,
+      "learning_rate": 8.86857142857143e-06,
+      "loss": 0.4611,
+      "step": 900
+    },
+    {
+      "epoch": 115.66666666666667,
+      "grad_norm": 2.646510362625122,
+      "learning_rate": 8.797142857142857e-06,
+      "loss": 0.4574,
+      "step": 925
+    },
+    {
+      "epoch": 118.8,
+      "grad_norm": 1.7943354845046997,
+      "learning_rate": 8.725714285714286e-06,
+      "loss": 0.4658,
+      "step": 950
+    },
+    {
+      "epoch": 121.93333333333334,
+      "grad_norm": 2.171827793121338,
+      "learning_rate": 8.654285714285715e-06,
+      "loss": 0.4561,
+      "step": 975
+    },
+    {
+      "epoch": 125.0,
+      "grad_norm": 7.516489505767822,
+      "learning_rate": 8.582857142857144e-06,
+      "loss": 0.4472,
+      "step": 1000
+    },
+    {
+      "epoch": 125.0,
+      "eval_loss": 0.5154594779014587,
+      "eval_runtime": 0.7837,
+      "eval_samples_per_second": 33.175,
+      "eval_steps_per_second": 5.104,
+      "step": 1000
+    },
+    {
+      "epoch": 128.13333333333333,
+      "grad_norm": 2.5167343616485596,
+      "learning_rate": 8.511428571428571e-06,
+      "loss": 0.457,
+      "step": 1025
+    },
+    {
+      "epoch": 131.26666666666668,
+      "grad_norm": 3.3089983463287354,
+      "learning_rate": 8.44e-06,
+      "loss": 0.4456,
+      "step": 1050
+    },
+    {
+      "epoch": 134.4,
+      "grad_norm": 2.778348445892334,
+      "learning_rate": 8.36857142857143e-06,
+      "loss": 0.4612,
+      "step": 1075
+    },
+    {
+      "epoch": 137.53333333333333,
+      "grad_norm": 2.529778480529785,
+      "learning_rate": 8.297142857142859e-06,
+      "loss": 0.4429,
+      "step": 1100
+    },
+    {
+      "epoch": 140.66666666666666,
+      "grad_norm": 1.76685631275177,
+      "learning_rate": 8.225714285714288e-06,
+      "loss": 0.4399,
+      "step": 1125
+    },
+    {
+      "epoch": 143.8,
+      "grad_norm": 1.8449666500091553,
+      "learning_rate": 8.154285714285715e-06,
+      "loss": 0.4329,
+      "step": 1150
+    },
+    {
+      "epoch": 146.93333333333334,
+      "grad_norm": 1.9097468852996826,
+      "learning_rate": 8.082857142857144e-06,
+      "loss": 0.4527,
+      "step": 1175
+    },
+    {
+      "epoch": 150.0,
+      "grad_norm": 3.892838716506958,
+      "learning_rate": 8.011428571428573e-06,
+      "loss": 0.4448,
+      "step": 1200
+    },
+    {
+      "epoch": 153.13333333333333,
+      "grad_norm": 2.1518826484680176,
+      "learning_rate": 7.94e-06,
+      "loss": 0.4412,
+      "step": 1225
+    },
+    {
+      "epoch": 156.26666666666668,
+      "grad_norm": 1.5322662591934204,
+      "learning_rate": 7.86857142857143e-06,
+      "loss": 0.4388,
+      "step": 1250
+    },
+    {
+      "epoch": 159.4,
+      "grad_norm": 1.4961107969284058,
+      "learning_rate": 7.797142857142858e-06,
+      "loss": 0.4363,
+      "step": 1275
+    },
+    {
+      "epoch": 162.53333333333333,
+      "grad_norm": 1.8992841243743896,
+      "learning_rate": 7.725714285714286e-06,
+      "loss": 0.4474,
+      "step": 1300
+    },
+    {
+      "epoch": 165.66666666666666,
+      "grad_norm": 1.5015554428100586,
+      "learning_rate": 7.654285714285715e-06,
+      "loss": 0.4327,
+      "step": 1325
+    },
+    {
+      "epoch": 168.8,
+      "grad_norm": 2.0730693340301514,
+      "learning_rate": 7.5828571428571444e-06,
+      "loss": 0.4348,
+      "step": 1350
+    },
+    {
+      "epoch": 171.93333333333334,
+      "grad_norm": 2.0838747024536133,
+      "learning_rate": 7.511428571428572e-06,
+      "loss": 0.4393,
+      "step": 1375
+    },
+    {
+      "epoch": 175.0,
+      "grad_norm": 4.3804030418396,
+      "learning_rate": 7.440000000000001e-06,
+      "loss": 0.4386,
+      "step": 1400
+    },
+    {
+      "epoch": 178.13333333333333,
+      "grad_norm": 1.8927189111709595,
+      "learning_rate": 7.36857142857143e-06,
+      "loss": 0.4318,
+      "step": 1425
+    },
+    {
+      "epoch": 181.26666666666668,
+      "grad_norm": 1.5456620454788208,
+      "learning_rate": 7.297142857142858e-06,
+      "loss": 0.4336,
+      "step": 1450
+    },
+    {
+      "epoch": 184.4,
+      "grad_norm": 2.722612142562866,
+      "learning_rate": 7.225714285714286e-06,
+      "loss": 0.4281,
+      "step": 1475
+    },
+    {
+      "epoch": 187.53333333333333,
+      "grad_norm": 1.9484314918518066,
+      "learning_rate": 7.154285714285715e-06,
+      "loss": 0.4312,
+      "step": 1500
+    },
+    {
+      "epoch": 190.66666666666666,
+      "grad_norm": 2.101043224334717,
+      "learning_rate": 7.082857142857143e-06,
+      "loss": 0.427,
+      "step": 1525
+    },
+    {
+      "epoch": 193.8,
+      "grad_norm": 1.9785490036010742,
+      "learning_rate": 7.011428571428572e-06,
+      "loss": 0.4298,
+      "step": 1550
+    },
+    {
+      "epoch": 196.93333333333334,
+      "grad_norm": 2.319054126739502,
+      "learning_rate": 6.9400000000000005e-06,
+      "loss": 0.4376,
+      "step": 1575
+    },
+    {
+      "epoch": 200.0,
+      "grad_norm": 1.3612741231918335,
+      "learning_rate": 6.868571428571429e-06,
+      "loss": 0.4217,
+      "step": 1600
+    },
+    {
+      "epoch": 203.13333333333333,
+      "grad_norm": 2.128363847732544,
+      "learning_rate": 6.797142857142858e-06,
+      "loss": 0.4217,
+      "step": 1625
+    },
+    {
+      "epoch": 206.26666666666668,
+      "grad_norm": 1.7985234260559082,
+      "learning_rate": 6.725714285714287e-06,
+      "loss": 0.4147,
+      "step": 1650
+    },
+    {
+      "epoch": 209.4,
+      "grad_norm": 1.3478573560714722,
+      "learning_rate": 6.654285714285716e-06,
+      "loss": 0.4357,
+      "step": 1675
+    },
+    {
+      "epoch": 212.53333333333333,
+      "grad_norm": 1.5389248132705688,
+      "learning_rate": 6.582857142857143e-06,
+      "loss": 0.419,
+      "step": 1700
+    },
+    {
+      "epoch": 215.66666666666666,
+      "grad_norm": 1.9558783769607544,
+      "learning_rate": 6.511428571428572e-06,
+      "loss": 0.4289,
+      "step": 1725
+    },
+    {
+      "epoch": 218.8,
+      "grad_norm": 1.756585955619812,
+      "learning_rate": 6.440000000000001e-06,
+      "loss": 0.4168,
+      "step": 1750
+    },
+    {
+      "epoch": 221.93333333333334,
+      "grad_norm": 1.8744903802871704,
+      "learning_rate": 6.368571428571429e-06,
+      "loss": 0.4296,
+      "step": 1775
+    },
+    {
+      "epoch": 225.0,
+      "grad_norm": 1.133415699005127,
+      "learning_rate": 6.297142857142857e-06,
+      "loss": 0.4162,
+      "step": 1800
+    },
+    {
+      "epoch": 228.13333333333333,
+      "grad_norm": 2.819840908050537,
+      "learning_rate": 6.225714285714286e-06,
+      "loss": 0.4275,
+      "step": 1825
+    },
+    {
+      "epoch": 231.26666666666668,
+      "grad_norm": 1.5150210857391357,
+      "learning_rate": 6.1542857142857145e-06,
+      "loss": 0.4244,
+      "step": 1850
+    },
+    {
+      "epoch": 234.4,
+      "grad_norm": 2.184819459915161,
+      "learning_rate": 6.0828571428571435e-06,
+      "loss": 0.4282,
+      "step": 1875
+    },
+    {
+      "epoch": 237.53333333333333,
+      "grad_norm": 3.293454170227051,
+      "learning_rate": 6.011428571428572e-06,
+      "loss": 0.4215,
+      "step": 1900
+    },
+    {
+      "epoch": 240.66666666666666,
+      "grad_norm": 1.210433006286621,
+      "learning_rate": 5.94e-06,
+      "loss": 0.4103,
+      "step": 1925
+    },
+    {
+      "epoch": 243.8,
+      "grad_norm": 2.5027923583984375,
+      "learning_rate": 5.868571428571429e-06,
+      "loss": 0.4186,
+      "step": 1950
+    },
+    {
+      "epoch": 246.93333333333334,
+      "grad_norm": 1.9649789333343506,
+      "learning_rate": 5.797142857142858e-06,
+      "loss": 0.427,
+      "step": 1975
+    },
+    {
+      "epoch": 250.0,
+      "grad_norm": 5.899420261383057,
+      "learning_rate": 5.725714285714287e-06,
+      "loss": 0.4113,
+      "step": 2000
+    },
+    {
+      "epoch": 250.0,
+      "eval_loss": 0.4833647906780243,
+      "eval_runtime": 0.7095,
+      "eval_samples_per_second": 36.646,
+      "eval_steps_per_second": 5.638,
+      "step": 2000
+    },
+    {
+      "epoch": 253.13333333333333,
+      "grad_norm": 2.0845134258270264,
+      "learning_rate": 5.654285714285714e-06,
+      "loss": 0.4168,
+      "step": 2025
+    },
+    {
+      "epoch": 256.26666666666665,
+      "grad_norm": 1.3729593753814697,
+      "learning_rate": 5.582857142857143e-06,
+      "loss": 0.4099,
+      "step": 2050
+    },
+    {
+      "epoch": 259.4,
+      "grad_norm": 1.8317629098892212,
+      "learning_rate": 5.511428571428572e-06,
+      "loss": 0.4136,
+      "step": 2075
+    },
+    {
+      "epoch": 262.53333333333336,
+      "grad_norm": 1.6238123178482056,
+      "learning_rate": 5.4400000000000004e-06,
+      "loss": 0.4204,
+      "step": 2100
+    },
+    {
+      "epoch": 265.6666666666667,
+      "grad_norm": 1.6968961954116821,
+      "learning_rate": 5.368571428571429e-06,
+      "loss": 0.4119,
+      "step": 2125
+    },
+    {
+      "epoch": 268.8,
+      "grad_norm": 2.1868855953216553,
+      "learning_rate": 5.297142857142858e-06,
+      "loss": 0.4114,
+      "step": 2150
+    },
+    {
+      "epoch": 271.93333333333334,
+      "grad_norm": 1.3070896863937378,
+      "learning_rate": 5.225714285714286e-06,
+      "loss": 0.4108,
+      "step": 2175
+    },
+    {
+      "epoch": 275.0,
+      "grad_norm": 1.977940559387207,
+      "learning_rate": 5.154285714285715e-06,
+      "loss": 0.4045,
+      "step": 2200
+    },
+    {
+      "epoch": 278.1333333333333,
+      "grad_norm": 1.6485978364944458,
+      "learning_rate": 5.082857142857144e-06,
+      "loss": 0.4119,
+      "step": 2225
+    },
+    {
+      "epoch": 281.26666666666665,
+      "grad_norm": 1.9459550380706787,
+      "learning_rate": 5.011428571428571e-06,
+      "loss": 0.411,
+      "step": 2250
+    },
+    {
+      "epoch": 284.4,
+      "grad_norm": 1.5531017780303955,
+      "learning_rate": 4.94e-06,
+      "loss": 0.4083,
+      "step": 2275
+    },
+    {
+      "epoch": 287.53333333333336,
+      "grad_norm": 1.232640027999878,
+      "learning_rate": 4.868571428571429e-06,
+      "loss": 0.4121,
+      "step": 2300
+    },
+    {
+      "epoch": 290.6666666666667,
+      "grad_norm": 7.107569217681885,
+      "learning_rate": 4.800000000000001e-06,
+      "loss": 0.4013,
+      "step": 2325
+    },
+    {
+      "epoch": 293.8,
+      "grad_norm": 1.387934684753418,
+      "learning_rate": 4.728571428571429e-06,
+      "loss": 0.4135,
+      "step": 2350
+    },
+    {
+      "epoch": 296.93333333333334,
+      "grad_norm": 1.8122384548187256,
+      "learning_rate": 4.657142857142857e-06,
+      "loss": 0.4025,
+      "step": 2375
+    },
+    {
+      "epoch": 300.0,
+      "grad_norm": 3.2206528186798096,
+      "learning_rate": 4.585714285714286e-06,
+      "loss": 0.4055,
+      "step": 2400
+    },
+    {
+      "epoch": 303.1333333333333,
+      "grad_norm": 1.6222842931747437,
+      "learning_rate": 4.514285714285714e-06,
+      "loss": 0.4125,
+      "step": 2425
+    },
+    {
+      "epoch": 306.26666666666665,
+      "grad_norm": 1.4375584125518799,
+      "learning_rate": 4.442857142857143e-06,
+      "loss": 0.4033,
+      "step": 2450
+    },
+    {
+      "epoch": 309.4,
+      "grad_norm": 1.173034906387329,
+      "learning_rate": 4.371428571428572e-06,
+      "loss": 0.4081,
+      "step": 2475
+    },
+    {
+      "epoch": 312.53333333333336,
+      "grad_norm": 1.9508713483810425,
+      "learning_rate": 4.3e-06,
+      "loss": 0.4126,
+      "step": 2500
+    },
+    {
+      "epoch": 315.6666666666667,
+      "grad_norm": 1.6111533641815186,
+      "learning_rate": 4.228571428571429e-06,
+      "loss": 0.3956,
+      "step": 2525
+    },
+    {
+      "epoch": 318.8,
+      "grad_norm": 2.0711958408355713,
+      "learning_rate": 4.1571428571428575e-06,
+      "loss": 0.4079,
+      "step": 2550
+    },
+    {
+      "epoch": 321.93333333333334,
+      "grad_norm": 2.312619924545288,
+      "learning_rate": 4.0857142857142865e-06,
+      "loss": 0.4172,
+      "step": 2575
+    },
+    {
+      "epoch": 325.0,
+      "grad_norm": 8.329635620117188,
+      "learning_rate": 4.014285714285715e-06,
+      "loss": 0.3956,
+      "step": 2600
+    },
+    {
+      "epoch": 328.1333333333333,
+      "grad_norm": 6.655773639678955,
+      "learning_rate": 3.942857142857143e-06,
+      "loss": 0.3998,
+      "step": 2625
+    },
+    {
+      "epoch": 331.26666666666665,
+      "grad_norm": 1.7531079053878784,
+      "learning_rate": 3.871428571428572e-06,
+      "loss": 0.4023,
+      "step": 2650
+    },
+    {
+      "epoch": 334.4,
+      "grad_norm": 2.5502614974975586,
+      "learning_rate": 3.8000000000000005e-06,
+      "loss": 0.4026,
+      "step": 2675
+    },
+    {
+      "epoch": 337.53333333333336,
+      "grad_norm": 1.471871256828308,
+      "learning_rate": 3.7285714285714286e-06,
+      "loss": 0.3981,
+      "step": 2700
+    },
+    {
+      "epoch": 340.6666666666667,
+      "grad_norm": 2.094290018081665,
+      "learning_rate": 3.6571428571428576e-06,
+      "loss": 0.4006,
+      "step": 2725
+    },
+    {
+      "epoch": 343.8,
+      "grad_norm": 1.3232810497283936,
+      "learning_rate": 3.5857142857142862e-06,
+      "loss": 0.4013,
+      "step": 2750
+    },
+    {
+      "epoch": 346.93333333333334,
+      "grad_norm": 1.5902683734893799,
+      "learning_rate": 3.5142857142857144e-06,
+      "loss": 0.4042,
+      "step": 2775
+    },
+    {
+      "epoch": 350.0,
+      "grad_norm": 5.186419486999512,
+      "learning_rate": 3.4428571428571434e-06,
+      "loss": 0.3843,
+      "step": 2800
+    },
+    {
+      "epoch": 353.1333333333333,
+      "grad_norm": 2.3405115604400635,
+      "learning_rate": 3.3714285714285716e-06,
+      "loss": 0.409,
+      "step": 2825
+    },
+    {
+      "epoch": 356.26666666666665,
+      "grad_norm": 1.1804980039596558,
+      "learning_rate": 3.3000000000000006e-06,
+      "loss": 0.405,
+      "step": 2850
+    },
+    {
+      "epoch": 359.4,
+      "grad_norm": 1.596712589263916,
+      "learning_rate": 3.2285714285714288e-06,
+      "loss": 0.4098,
+      "step": 2875
+    },
+    {
+      "epoch": 362.53333333333336,
+      "grad_norm": 1.9429064989089966,
+      "learning_rate": 3.1571428571428573e-06,
+      "loss": 0.413,
+      "step": 2900
+    },
+    {
+      "epoch": 365.6666666666667,
+      "grad_norm": 1.3636008501052856,
+      "learning_rate": 3.085714285714286e-06,
+      "loss": 0.394,
+      "step": 2925
+    },
+    {
+      "epoch": 368.8,
+      "grad_norm": 1.2349225282669067,
+      "learning_rate": 3.0142857142857145e-06,
+      "loss": 0.3964,
+      "step": 2950
+    },
+    {
+      "epoch": 371.93333333333334,
+      "grad_norm": 1.3793219327926636,
+      "learning_rate": 2.9428571428571427e-06,
+      "loss": 0.3958,
+      "step": 2975
+    },
+    {
+      "epoch": 375.0,
+      "grad_norm": 7.785330772399902,
+      "learning_rate": 2.8714285714285717e-06,
+      "loss": 0.3906,
+      "step": 3000
+    },
+    {
+      "epoch": 375.0,
+      "eval_loss": 0.46799278259277344,
+      "eval_runtime": 0.7043,
+      "eval_samples_per_second": 36.916,
+      "eval_steps_per_second": 5.679,
+      "step": 3000
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 4000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 572,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 7721257243235184.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-3000/training_args.bin b/checkpoint-3000/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a61b2643a9f565814c2aa62a171605debf82a9c0
--- /dev/null
+++ b/checkpoint-3000/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07e653cd3b83482a0939ee4c8a207df9a996f44ce9dd82197c4ab6cde60cf2bb
+size 5432
diff --git a/checkpoint-4000/added_tokens.json b/checkpoint-4000/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..cd5b477a9075c49d99de65622db37bb06a251985
--- /dev/null
+++ b/checkpoint-4000/added_tokens.json
@@ -0,0 +1,4 @@
+{
+  "<ctc_blank>": 80,
+  "<mask>": 79
+}
diff --git a/checkpoint-4000/config.json b/checkpoint-4000/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..11c8ec6e9cb5a360feb11650e24ba1adc08e8d0e
--- /dev/null
+++ b/checkpoint-4000/config.json
@@ -0,0 +1,91 @@
+{
+  "activation_dropout": 0.1,
+  "apply_spec_augment": true,
+  "architectures": [
+    "SpeechT5ForTextToSpeech"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.1,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 2,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.1,
+  "encoder_layers": 12,
+  "encoder_max_relative_position": 160,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.0,
+  "guided_attention_loss_num_heads": 2,
+  "guided_attention_loss_scale": 10.0,
+  "guided_attention_loss_sigma": 0.4,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "is_encoder_decoder": true,
+  "layer_norm_eps": 1e-05,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": null,
+  "max_speech_positions": 1876,
+  "max_text_positions": 600,
+  "model_type": "speecht5",
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_mel_bins": 80,
+  "pad_token_id": 1,
+  "positional_dropout": 0.1,
+  "reduction_factor": 2,
+  "scale_embedding": false,
+  "speaker_embedding_dim": 512,
+  "speech_decoder_postnet_dropout": 0.5,
+  "speech_decoder_postnet_kernel": 5,
+  "speech_decoder_postnet_layers": 5,
+  "speech_decoder_postnet_units": 256,
+  "speech_decoder_prenet_dropout": 0.5,
+  "speech_decoder_prenet_layers": 2,
+  "speech_decoder_prenet_units": 256,
+  "torch_dtype": "float32",
+  "transformers_version": "4.52.0.dev0",
+  "use_cache": false,
+  "use_guided_attention_loss": true,
+  "vocab_size": 81
+}
diff --git a/checkpoint-4000/generation_config.json b/checkpoint-4000/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..3041cc103eccf911a925012f1ad8fd1d12cce149
--- /dev/null
+++ b/checkpoint-4000/generation_config.json
@@ -0,0 +1,9 @@
+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "decoder_start_token_id": 2,
+  "eos_token_id": 2,
+  "max_length": 1876,
+  "pad_token_id": 1,
+  "transformers_version": "4.52.0.dev0"
+}
diff --git a/checkpoint-4000/model.safetensors b/checkpoint-4000/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..866c230062873c112e9b94191d9412b3e69f198f
--- /dev/null
+++ b/checkpoint-4000/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:055e59911edf63563dfc4431a4301a869cc74effda4ba5c905ace376e831bd5d
+size 577789320
diff --git a/checkpoint-4000/optimizer.pt b/checkpoint-4000/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c3e45897b3d4795ce914d6ce27c6183b74bb9008
--- /dev/null
+++ b/checkpoint-4000/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e74b50ecc65d621a0c3fd3c8d0516e0345843175eb2a55af482c69d69da162e7
+size 1155772233
diff --git a/checkpoint-4000/rng_state.pth b/checkpoint-4000/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e4d29a09b3287c78394eee4069e702e1fe5b8391
--- /dev/null
+++ b/checkpoint-4000/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75ff95056306e75aaca85257572ca65ded44b3fc874ae842724682d1ad4067c2
+size 14244
diff --git a/checkpoint-4000/scaler.pt b/checkpoint-4000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9abe9a00c2b45b15cd85258f1807eefeb3a51ead
--- /dev/null
+++ b/checkpoint-4000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:030f559b5aedef78935dd7632eb67ec4527791e9aca3eb758b902243f597abd2
+size 988
diff --git a/checkpoint-4000/scheduler.pt b/checkpoint-4000/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bbf6940bd360ad866b84117412902669203f8f21
--- /dev/null
+++ b/checkpoint-4000/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7efbf80617c96c78286826ce59d9a12c86da62d7631874b3d6364a8e993ada60
+size 1064
diff --git a/checkpoint-4000/special_tokens_map.json b/checkpoint-4000/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..4ee24ec69861cfc94abbe2c8c934aa0744aa623c
--- /dev/null
+++ b/checkpoint-4000/special_tokens_map.json
@@ -0,0 +1,13 @@
+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}
diff --git a/checkpoint-4000/spm_char.model b/checkpoint-4000/spm_char.model
new file mode 100644
index 0000000000000000000000000000000000000000..8fb73691942626fa75df80b61aab0e9b9340d8e2
--- /dev/null
+++ b/checkpoint-4000/spm_char.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7fcc48f3e225f627b1641db410ceb0c8649bd2b0c982e150b03f8be3728ab560
+size 238473
diff --git a/checkpoint-4000/tokenizer_config.json b/checkpoint-4000/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..e88d44ba3be31ac8f53461ae7c1b02b4c5c830ab
--- /dev/null
+++ b/checkpoint-4000/tokenizer_config.json
@@ -0,0 +1,64 @@
+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "79": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "80": {
+      "content": "<ctc_blank>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 600,
+  "normalize": false,
+  "pad_token": "<pad>",
+  "processor_class": "SpeechT5Processor",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "SpeechT5Tokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/checkpoint-4000/trainer_state.json b/checkpoint-4000/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..92418265f5d30b89bb0f9c5fee970afb2c02d295
--- /dev/null
+++ b/checkpoint-4000/trainer_state.json
@@ -0,0 +1,1186 @@
+{
+  "best_global_step": 4000,
+  "best_metric": 0.4673193097114563,
+  "best_model_checkpoint": "./speecht5_tts_mabama/checkpoint-4000",
+  "epoch": 500.0,
+  "eval_steps": 1000,
+  "global_step": 4000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 3.1333333333333333,
+      "grad_norm": 13.092179298400879,
+      "learning_rate": 4.2000000000000006e-07,
+      "loss": 1.0978,
+      "step": 25
+    },
+    {
+      "epoch": 6.266666666666667,
+      "grad_norm": 13.538804054260254,
+      "learning_rate": 9.200000000000001e-07,
+      "loss": 1.0057,
+      "step": 50
+    },
+    {
+      "epoch": 9.4,
+      "grad_norm": 3.8923733234405518,
+      "learning_rate": 1.42e-06,
+      "loss": 0.8155,
+      "step": 75
+    },
+    {
+      "epoch": 12.533333333333333,
+      "grad_norm": 2.569746732711792,
+      "learning_rate": 1.9200000000000003e-06,
+      "loss": 0.7921,
+      "step": 100
+    },
+    {
+      "epoch": 15.666666666666666,
+      "grad_norm": 2.390493631362915,
+      "learning_rate": 2.42e-06,
+      "loss": 0.7531,
+      "step": 125
+    },
+    {
+      "epoch": 18.8,
+      "grad_norm": 2.7168779373168945,
+      "learning_rate": 2.92e-06,
+      "loss": 0.7393,
+      "step": 150
+    },
+    {
+      "epoch": 21.933333333333334,
+      "grad_norm": 10.27633285522461,
+      "learning_rate": 3.4200000000000007e-06,
+      "loss": 0.7292,
+      "step": 175
+    },
+    {
+      "epoch": 25.0,
+      "grad_norm": 5.6921000480651855,
+      "learning_rate": 3.920000000000001e-06,
+      "loss": 0.6642,
+      "step": 200
+    },
+    {
+      "epoch": 28.133333333333333,
+      "grad_norm": 2.6206777095794678,
+      "learning_rate": 4.42e-06,
+      "loss": 0.6555,
+      "step": 225
+    },
+    {
+      "epoch": 31.266666666666666,
+      "grad_norm": 1.9396028518676758,
+      "learning_rate": 4.92e-06,
+      "loss": 0.6484,
+      "step": 250
+    },
+    {
+      "epoch": 34.4,
+      "grad_norm": 3.44437575340271,
+      "learning_rate": 5.420000000000001e-06,
+      "loss": 0.6414,
+      "step": 275
+    },
+    {
+      "epoch": 37.53333333333333,
+      "grad_norm": 2.729497194290161,
+      "learning_rate": 5.92e-06,
+      "loss": 0.6323,
+      "step": 300
+    },
+    {
+      "epoch": 40.666666666666664,
+      "grad_norm": 2.3852877616882324,
+      "learning_rate": 6.42e-06,
+      "loss": 0.6073,
+      "step": 325
+    },
+    {
+      "epoch": 43.8,
+      "grad_norm": 4.4287109375,
+      "learning_rate": 6.92e-06,
+      "loss": 0.6034,
+      "step": 350
+    },
+    {
+      "epoch": 46.93333333333333,
+      "grad_norm": 2.1653966903686523,
+      "learning_rate": 7.420000000000001e-06,
+      "loss": 0.5865,
+      "step": 375
+    },
+    {
+      "epoch": 50.0,
+      "grad_norm": 2.8120265007019043,
+      "learning_rate": 7.92e-06,
+      "loss": 0.5556,
+      "step": 400
+    },
+    {
+      "epoch": 53.13333333333333,
+      "grad_norm": 2.0973806381225586,
+      "learning_rate": 8.42e-06,
+      "loss": 0.5416,
+      "step": 425
+    },
+    {
+      "epoch": 56.266666666666666,
+      "grad_norm": 2.6723616123199463,
+      "learning_rate": 8.920000000000001e-06,
+      "loss": 0.5407,
+      "step": 450
+    },
+    {
+      "epoch": 59.4,
+      "grad_norm": 2.1810383796691895,
+      "learning_rate": 9.42e-06,
+      "loss": 0.5174,
+      "step": 475
+    },
+    {
+      "epoch": 62.53333333333333,
+      "grad_norm": 3.464071750640869,
+      "learning_rate": 9.920000000000002e-06,
+      "loss": 0.5327,
+      "step": 500
+    },
+    {
+      "epoch": 65.66666666666667,
+      "grad_norm": 3.6148977279663086,
+      "learning_rate": 9.940000000000001e-06,
+      "loss": 0.5141,
+      "step": 525
+    },
+    {
+      "epoch": 68.8,
+      "grad_norm": 2.5631027221679688,
+      "learning_rate": 9.86857142857143e-06,
+      "loss": 0.5246,
+      "step": 550
+    },
+    {
+      "epoch": 71.93333333333334,
+      "grad_norm": 2.058468818664551,
+      "learning_rate": 9.797142857142858e-06,
+      "loss": 0.5065,
+      "step": 575
+    },
+    {
+      "epoch": 75.0,
+      "grad_norm": 1.7559466361999512,
+      "learning_rate": 9.725714285714287e-06,
+      "loss": 0.4871,
+      "step": 600
+    },
+    {
+      "epoch": 78.13333333333334,
+      "grad_norm": 2.653345823287964,
+      "learning_rate": 9.654285714285716e-06,
+      "loss": 0.4941,
+      "step": 625
+    },
+    {
+      "epoch": 81.26666666666667,
+      "grad_norm": 2.612226724624634,
+      "learning_rate": 9.582857142857143e-06,
+      "loss": 0.4796,
+      "step": 650
+    },
+    {
+      "epoch": 84.4,
+      "grad_norm": 1.7446099519729614,
+      "learning_rate": 9.511428571428572e-06,
+      "loss": 0.487,
+      "step": 675
+    },
+    {
+      "epoch": 87.53333333333333,
+      "grad_norm": 2.627315044403076,
+      "learning_rate": 9.440000000000001e-06,
+      "loss": 0.4731,
+      "step": 700
+    },
+    {
+      "epoch": 90.66666666666667,
+      "grad_norm": 2.4315383434295654,
+      "learning_rate": 9.368571428571428e-06,
+      "loss": 0.4812,
+      "step": 725
+    },
+    {
+      "epoch": 93.8,
+      "grad_norm": 2.4056336879730225,
+      "learning_rate": 9.297142857142857e-06,
+      "loss": 0.468,
+      "step": 750
+    },
+    {
+      "epoch": 96.93333333333334,
+      "grad_norm": 2.153116464614868,
+      "learning_rate": 9.225714285714286e-06,
+      "loss": 0.4829,
+      "step": 775
+    },
+    {
+      "epoch": 100.0,
+      "grad_norm": 2.9421756267547607,
+      "learning_rate": 9.154285714285715e-06,
+      "loss": 0.4555,
+      "step": 800
+    },
+    {
+      "epoch": 103.13333333333334,
+      "grad_norm": 1.6771883964538574,
+      "learning_rate": 9.082857142857143e-06,
+      "loss": 0.462,
+      "step": 825
+    },
+    {
+      "epoch": 106.26666666666667,
+      "grad_norm": 2.9711899757385254,
+      "learning_rate": 9.011428571428572e-06,
+      "loss": 0.471,
+      "step": 850
+    },
+    {
+      "epoch": 109.4,
+      "grad_norm": 1.922980546951294,
+      "learning_rate": 8.94e-06,
+      "loss": 0.4673,
+      "step": 875
+    },
+    {
+      "epoch": 112.53333333333333,
+      "grad_norm": 2.49945068359375,
+      "learning_rate": 8.86857142857143e-06,
+      "loss": 0.4611,
+      "step": 900
+    },
+    {
+      "epoch": 115.66666666666667,
+      "grad_norm": 2.646510362625122,
+      "learning_rate": 8.797142857142857e-06,
+      "loss": 0.4574,
+      "step": 925
+    },
+    {
+      "epoch": 118.8,
+      "grad_norm": 1.7943354845046997,
+      "learning_rate": 8.725714285714286e-06,
+      "loss": 0.4658,
+      "step": 950
+    },
+    {
+      "epoch": 121.93333333333334,
+      "grad_norm": 2.171827793121338,
+      "learning_rate": 8.654285714285715e-06,
+      "loss": 0.4561,
+      "step": 975
+    },
+    {
+      "epoch": 125.0,
+      "grad_norm": 7.516489505767822,
+      "learning_rate": 8.582857142857144e-06,
+      "loss": 0.4472,
+      "step": 1000
+    },
+    {
+      "epoch": 125.0,
+      "eval_loss": 0.5154594779014587,
+      "eval_runtime": 0.7837,
+      "eval_samples_per_second": 33.175,
+      "eval_steps_per_second": 5.104,
+      "step": 1000
+    },
+    {
+      "epoch": 128.13333333333333,
+      "grad_norm": 2.5167343616485596,
+      "learning_rate": 8.511428571428571e-06,
+      "loss": 0.457,
+      "step": 1025
+    },
+    {
+      "epoch": 131.26666666666668,
+      "grad_norm": 3.3089983463287354,
+      "learning_rate": 8.44e-06,
+      "loss": 0.4456,
+      "step": 1050
+    },
+    {
+      "epoch": 134.4,
+      "grad_norm": 2.778348445892334,
+      "learning_rate": 8.36857142857143e-06,
+      "loss": 0.4612,
+      "step": 1075
+    },
+    {
+      "epoch": 137.53333333333333,
+      "grad_norm": 2.529778480529785,
+      "learning_rate": 8.297142857142859e-06,
+      "loss": 0.4429,
+      "step": 1100
+    },
+    {
+      "epoch": 140.66666666666666,
+      "grad_norm": 1.76685631275177,
+      "learning_rate": 8.225714285714288e-06,
+      "loss": 0.4399,
+      "step": 1125
+    },
+    {
+      "epoch": 143.8,
+      "grad_norm": 1.8449666500091553,
+      "learning_rate": 8.154285714285715e-06,
+      "loss": 0.4329,
+      "step": 1150
+    },
+    {
+      "epoch": 146.93333333333334,
+      "grad_norm": 1.9097468852996826,
+      "learning_rate": 8.082857142857144e-06,
+      "loss": 0.4527,
+      "step": 1175
+    },
+    {
+      "epoch": 150.0,
+      "grad_norm": 3.892838716506958,
+      "learning_rate": 8.011428571428573e-06,
+      "loss": 0.4448,
+      "step": 1200
+    },
+    {
+      "epoch": 153.13333333333333,
+      "grad_norm": 2.1518826484680176,
+      "learning_rate": 7.94e-06,
+      "loss": 0.4412,
+      "step": 1225
+    },
+    {
+      "epoch": 156.26666666666668,
+      "grad_norm": 1.5322662591934204,
+      "learning_rate": 7.86857142857143e-06,
+      "loss": 0.4388,
+      "step": 1250
+    },
+    {
+      "epoch": 159.4,
+      "grad_norm": 1.4961107969284058,
+      "learning_rate": 7.797142857142858e-06,
+      "loss": 0.4363,
+      "step": 1275
+    },
+    {
+      "epoch": 162.53333333333333,
+      "grad_norm": 1.8992841243743896,
+      "learning_rate": 7.725714285714286e-06,
+      "loss": 0.4474,
+      "step": 1300
+    },
+    {
+      "epoch": 165.66666666666666,
+      "grad_norm": 1.5015554428100586,
+      "learning_rate": 7.654285714285715e-06,
+      "loss": 0.4327,
+      "step": 1325
+    },
+    {
+      "epoch": 168.8,
+      "grad_norm": 2.0730693340301514,
+      "learning_rate": 7.5828571428571444e-06,
+      "loss": 0.4348,
+      "step": 1350
+    },
+    {
+      "epoch": 171.93333333333334,
+      "grad_norm": 2.0838747024536133,
+      "learning_rate": 7.511428571428572e-06,
+      "loss": 0.4393,
+      "step": 1375
+    },
+    {
+      "epoch": 175.0,
+      "grad_norm": 4.3804030418396,
+      "learning_rate": 7.440000000000001e-06,
+      "loss": 0.4386,
+      "step": 1400
+    },
+    {
+      "epoch": 178.13333333333333,
+      "grad_norm": 1.8927189111709595,
+      "learning_rate": 7.36857142857143e-06,
+      "loss": 0.4318,
+      "step": 1425
+    },
+    {
+      "epoch": 181.26666666666668,
+      "grad_norm": 1.5456620454788208,
+      "learning_rate": 7.297142857142858e-06,
+      "loss": 0.4336,
+      "step": 1450
+    },
+    {
+      "epoch": 184.4,
+      "grad_norm": 2.722612142562866,
+      "learning_rate": 7.225714285714286e-06,
+      "loss": 0.4281,
+      "step": 1475
+    },
+    {
+      "epoch": 187.53333333333333,
+      "grad_norm": 1.9484314918518066,
+      "learning_rate": 7.154285714285715e-06,
+      "loss": 0.4312,
+      "step": 1500
+    },
+    {
+      "epoch": 190.66666666666666,
+      "grad_norm": 2.101043224334717,
+      "learning_rate": 7.082857142857143e-06,
+      "loss": 0.427,
+      "step": 1525
+    },
+    {
+      "epoch": 193.8,
+      "grad_norm": 1.9785490036010742,
+      "learning_rate": 7.011428571428572e-06,
+      "loss": 0.4298,
+      "step": 1550
+    },
+    {
+      "epoch": 196.93333333333334,
+      "grad_norm": 2.319054126739502,
+      "learning_rate": 6.9400000000000005e-06,
+      "loss": 0.4376,
+      "step": 1575
+    },
+    {
+      "epoch": 200.0,
+      "grad_norm": 1.3612741231918335,
+      "learning_rate": 6.868571428571429e-06,
+      "loss": 0.4217,
+      "step": 1600
+    },
+    {
+      "epoch": 203.13333333333333,
+      "grad_norm": 2.128363847732544,
+      "learning_rate": 6.797142857142858e-06,
+      "loss": 0.4217,
+      "step": 1625
+    },
+    {
+      "epoch": 206.26666666666668,
+      "grad_norm": 1.7985234260559082,
+      "learning_rate": 6.725714285714287e-06,
+      "loss": 0.4147,
+      "step": 1650
+    },
+    {
+      "epoch": 209.4,
+      "grad_norm": 1.3478573560714722,
+      "learning_rate": 6.654285714285716e-06,
+      "loss": 0.4357,
+      "step": 1675
+    },
+    {
+      "epoch": 212.53333333333333,
+      "grad_norm": 1.5389248132705688,
+      "learning_rate": 6.582857142857143e-06,
+      "loss": 0.419,
+      "step": 1700
+    },
+    {
+      "epoch": 215.66666666666666,
+      "grad_norm": 1.9558783769607544,
+      "learning_rate": 6.511428571428572e-06,
+      "loss": 0.4289,
+      "step": 1725
+    },
+    {
+      "epoch": 218.8,
+      "grad_norm": 1.756585955619812,
+      "learning_rate": 6.440000000000001e-06,
+      "loss": 0.4168,
+      "step": 1750
+    },
+    {
+      "epoch": 221.93333333333334,
+      "grad_norm": 1.8744903802871704,
+      "learning_rate": 6.368571428571429e-06,
+      "loss": 0.4296,
+      "step": 1775
+    },
+    {
+      "epoch": 225.0,
+      "grad_norm": 1.133415699005127,
+      "learning_rate": 6.297142857142857e-06,
+      "loss": 0.4162,
+      "step": 1800
+    },
+    {
+      "epoch": 228.13333333333333,
+      "grad_norm": 2.819840908050537,
+      "learning_rate": 6.225714285714286e-06,
+      "loss": 0.4275,
+      "step": 1825
+    },
+    {
+      "epoch": 231.26666666666668,
+      "grad_norm": 1.5150210857391357,
+      "learning_rate": 6.1542857142857145e-06,
+      "loss": 0.4244,
+      "step": 1850
+    },
+    {
+      "epoch": 234.4,
+      "grad_norm": 2.184819459915161,
+      "learning_rate": 6.0828571428571435e-06,
+      "loss": 0.4282,
+      "step": 1875
+    },
+    {
+      "epoch": 237.53333333333333,
+      "grad_norm": 3.293454170227051,
+      "learning_rate": 6.011428571428572e-06,
+      "loss": 0.4215,
+      "step": 1900
+    },
+    {
+      "epoch": 240.66666666666666,
+      "grad_norm": 1.210433006286621,
+      "learning_rate": 5.94e-06,
+      "loss": 0.4103,
+      "step": 1925
+    },
+    {
+      "epoch": 243.8,
+      "grad_norm": 2.5027923583984375,
+      "learning_rate": 5.868571428571429e-06,
+      "loss": 0.4186,
+      "step": 1950
+    },
+    {
+      "epoch": 246.93333333333334,
+      "grad_norm": 1.9649789333343506,
+      "learning_rate": 5.797142857142858e-06,
+      "loss": 0.427,
+      "step": 1975
+    },
+    {
+      "epoch": 250.0,
+      "grad_norm": 5.899420261383057,
+      "learning_rate": 5.725714285714287e-06,
+      "loss": 0.4113,
+      "step": 2000
+    },
+    {
+      "epoch": 250.0,
+      "eval_loss": 0.4833647906780243,
+      "eval_runtime": 0.7095,
+      "eval_samples_per_second": 36.646,
+      "eval_steps_per_second": 5.638,
+      "step": 2000
+    },
+    {
+      "epoch": 253.13333333333333,
+      "grad_norm": 2.0845134258270264,
+      "learning_rate": 5.654285714285714e-06,
+      "loss": 0.4168,
+      "step": 2025
+    },
+    {
+      "epoch": 256.26666666666665,
+      "grad_norm": 1.3729593753814697,
+      "learning_rate": 5.582857142857143e-06,
+      "loss": 0.4099,
+      "step": 2050
+    },
+    {
+      "epoch": 259.4,
+      "grad_norm": 1.8317629098892212,
+      "learning_rate": 5.511428571428572e-06,
+      "loss": 0.4136,
+      "step": 2075
+    },
+    {
+      "epoch": 262.53333333333336,
+      "grad_norm": 1.6238123178482056,
+      "learning_rate": 5.4400000000000004e-06,
+      "loss": 0.4204,
+      "step": 2100
+    },
+    {
+      "epoch": 265.6666666666667,
+      "grad_norm": 1.6968961954116821,
+      "learning_rate": 5.368571428571429e-06,
+      "loss": 0.4119,
+      "step": 2125
+    },
+    {
+      "epoch": 268.8,
+      "grad_norm": 2.1868855953216553,
+      "learning_rate": 5.297142857142858e-06,
+      "loss": 0.4114,
+      "step": 2150
+    },
+    {
+      "epoch": 271.93333333333334,
+      "grad_norm": 1.3070896863937378,
+      "learning_rate": 5.225714285714286e-06,
+      "loss": 0.4108,
+      "step": 2175
+    },
+    {
+      "epoch": 275.0,
+      "grad_norm": 1.977940559387207,
+      "learning_rate": 5.154285714285715e-06,
+      "loss": 0.4045,
+      "step": 2200
+    },
+    {
+      "epoch": 278.1333333333333,
+      "grad_norm": 1.6485978364944458,
+      "learning_rate": 5.082857142857144e-06,
+      "loss": 0.4119,
+      "step": 2225
+    },
+    {
+      "epoch": 281.26666666666665,
+      "grad_norm": 1.9459550380706787,
+      "learning_rate": 5.011428571428571e-06,
+      "loss": 0.411,
+      "step": 2250
+    },
+    {
+      "epoch": 284.4,
+      "grad_norm": 1.5531017780303955,
+      "learning_rate": 4.94e-06,
+      "loss": 0.4083,
+      "step": 2275
+    },
+    {
+      "epoch": 287.53333333333336,
+      "grad_norm": 1.232640027999878,
+      "learning_rate": 4.868571428571429e-06,
+      "loss": 0.4121,
+      "step": 2300
+    },
+    {
+      "epoch": 290.6666666666667,
+      "grad_norm": 7.107569217681885,
+      "learning_rate": 4.800000000000001e-06,
+      "loss": 0.4013,
+      "step": 2325
+    },
+    {
+      "epoch": 293.8,
+      "grad_norm": 1.387934684753418,
+      "learning_rate": 4.728571428571429e-06,
+      "loss": 0.4135,
+      "step": 2350
+    },
+    {
+      "epoch": 296.93333333333334,
+      "grad_norm": 1.8122384548187256,
+      "learning_rate": 4.657142857142857e-06,
+      "loss": 0.4025,
+      "step": 2375
+    },
+    {
+      "epoch": 300.0,
+      "grad_norm": 3.2206528186798096,
+      "learning_rate": 4.585714285714286e-06,
+      "loss": 0.4055,
+      "step": 2400
+    },
+    {
+      "epoch": 303.1333333333333,
+      "grad_norm": 1.6222842931747437,
+      "learning_rate": 4.514285714285714e-06,
+      "loss": 0.4125,
+      "step": 2425
+    },
+    {
+      "epoch": 306.26666666666665,
+      "grad_norm": 1.4375584125518799,
+      "learning_rate": 4.442857142857143e-06,
+      "loss": 0.4033,
+      "step": 2450
+    },
+    {
+      "epoch": 309.4,
+      "grad_norm": 1.173034906387329,
+      "learning_rate": 4.371428571428572e-06,
+      "loss": 0.4081,
+      "step": 2475
+    },
+    {
+      "epoch": 312.53333333333336,
+      "grad_norm": 1.9508713483810425,
+      "learning_rate": 4.3e-06,
+      "loss": 0.4126,
+      "step": 2500
+    },
+    {
+      "epoch": 315.6666666666667,
+      "grad_norm": 1.6111533641815186,
+      "learning_rate": 4.228571428571429e-06,
+      "loss": 0.3956,
+      "step": 2525
+    },
+    {
+      "epoch": 318.8,
+      "grad_norm": 2.0711958408355713,
+      "learning_rate": 4.1571428571428575e-06,
+      "loss": 0.4079,
+      "step": 2550
+    },
+    {
+      "epoch": 321.93333333333334,
+      "grad_norm": 2.312619924545288,
+      "learning_rate": 4.0857142857142865e-06,
+      "loss": 0.4172,
+      "step": 2575
+    },
+    {
+      "epoch": 325.0,
+      "grad_norm": 8.329635620117188,
+      "learning_rate": 4.014285714285715e-06,
+      "loss": 0.3956,
+      "step": 2600
+    },
+    {
+      "epoch": 328.1333333333333,
+      "grad_norm": 6.655773639678955,
+      "learning_rate": 3.942857142857143e-06,
+      "loss": 0.3998,
+      "step": 2625
+    },
+    {
+      "epoch": 331.26666666666665,
+      "grad_norm": 1.7531079053878784,
+      "learning_rate": 3.871428571428572e-06,
+      "loss": 0.4023,
+      "step": 2650
+    },
+    {
+      "epoch": 334.4,
+      "grad_norm": 2.5502614974975586,
+      "learning_rate": 3.8000000000000005e-06,
+      "loss": 0.4026,
+      "step": 2675
+    },
+    {
+      "epoch": 337.53333333333336,
+      "grad_norm": 1.471871256828308,
+      "learning_rate": 3.7285714285714286e-06,
+      "loss": 0.3981,
+      "step": 2700
+    },
+    {
+      "epoch": 340.6666666666667,
+      "grad_norm": 2.094290018081665,
+      "learning_rate": 3.6571428571428576e-06,
+      "loss": 0.4006,
+      "step": 2725
+    },
+    {
+      "epoch": 343.8,
+      "grad_norm": 1.3232810497283936,
+      "learning_rate": 3.5857142857142862e-06,
+      "loss": 0.4013,
+      "step": 2750
+    },
+    {
+      "epoch": 346.93333333333334,
+      "grad_norm": 1.5902683734893799,
+      "learning_rate": 3.5142857142857144e-06,
+      "loss": 0.4042,
+      "step": 2775
+    },
+    {
+      "epoch": 350.0,
+      "grad_norm": 5.186419486999512,
+      "learning_rate": 3.4428571428571434e-06,
+      "loss": 0.3843,
+      "step": 2800
+    },
+    {
+      "epoch": 353.1333333333333,
+      "grad_norm": 2.3405115604400635,
+      "learning_rate": 3.3714285714285716e-06,
+      "loss": 0.409,
+      "step": 2825
+    },
+    {
+      "epoch": 356.26666666666665,
+      "grad_norm": 1.1804980039596558,
+      "learning_rate": 3.3000000000000006e-06,
+      "loss": 0.405,
+      "step": 2850
+    },
+    {
+      "epoch": 359.4,
+      "grad_norm": 1.596712589263916,
+      "learning_rate": 3.2285714285714288e-06,
+      "loss": 0.4098,
+      "step": 2875
+    },
+    {
+      "epoch": 362.53333333333336,
+      "grad_norm": 1.9429064989089966,
+      "learning_rate": 3.1571428571428573e-06,
+      "loss": 0.413,
+      "step": 2900
+    },
+    {
+      "epoch": 365.6666666666667,
+      "grad_norm": 1.3636008501052856,
+      "learning_rate": 3.085714285714286e-06,
+      "loss": 0.394,
+      "step": 2925
+    },
+    {
+      "epoch": 368.8,
+      "grad_norm": 1.2349225282669067,
+      "learning_rate": 3.0142857142857145e-06,
+      "loss": 0.3964,
+      "step": 2950
+    },
+    {
+      "epoch": 371.93333333333334,
+      "grad_norm": 1.3793219327926636,
+      "learning_rate": 2.9428571428571427e-06,
+      "loss": 0.3958,
+      "step": 2975
+    },
+    {
+      "epoch": 375.0,
+      "grad_norm": 7.785330772399902,
+      "learning_rate": 2.8714285714285717e-06,
+      "loss": 0.3906,
+      "step": 3000
+    },
+    {
+      "epoch": 375.0,
+      "eval_loss": 0.46799278259277344,
+      "eval_runtime": 0.7043,
+      "eval_samples_per_second": 36.916,
+      "eval_steps_per_second": 5.679,
+      "step": 3000
+    },
+    {
+      "epoch": 378.1333333333333,
+      "grad_norm": 1.4927845001220703,
+      "learning_rate": 2.8000000000000003e-06,
+      "loss": 0.4,
+      "step": 3025
+    },
+    {
+      "epoch": 381.26666666666665,
+      "grad_norm": 1.2291827201843262,
+      "learning_rate": 2.728571428571429e-06,
+      "loss": 0.3989,
+      "step": 3050
+    },
+    {
+      "epoch": 384.4,
+      "grad_norm": 1.579060673713684,
+      "learning_rate": 2.6571428571428575e-06,
+      "loss": 0.3999,
+      "step": 3075
+    },
+    {
+      "epoch": 387.53333333333336,
+      "grad_norm": 1.6574435234069824,
+      "learning_rate": 2.5857142857142856e-06,
+      "loss": 0.388,
+      "step": 3100
+    },
+    {
+      "epoch": 390.6666666666667,
+      "grad_norm": 1.3004297018051147,
+      "learning_rate": 2.5142857142857147e-06,
+      "loss": 0.3921,
+      "step": 3125
+    },
+    {
+      "epoch": 393.8,
+      "grad_norm": 1.097733497619629,
+      "learning_rate": 2.442857142857143e-06,
+      "loss": 0.389,
+      "step": 3150
+    },
+    {
+      "epoch": 396.93333333333334,
+      "grad_norm": 2.4902069568634033,
+      "learning_rate": 2.371428571428572e-06,
+      "loss": 0.3969,
+      "step": 3175
+    },
+    {
+      "epoch": 400.0,
+      "grad_norm": 2.962905168533325,
+      "learning_rate": 2.3000000000000004e-06,
+      "loss": 0.3915,
+      "step": 3200
+    },
+    {
+      "epoch": 403.1333333333333,
+      "grad_norm": 1.7403258085250854,
+      "learning_rate": 2.228571428571429e-06,
+      "loss": 0.3917,
+      "step": 3225
+    },
+    {
+      "epoch": 406.26666666666665,
+      "grad_norm": 1.8885612487792969,
+      "learning_rate": 2.157142857142857e-06,
+      "loss": 0.3994,
+      "step": 3250
+    },
+    {
+      "epoch": 409.4,
+      "grad_norm": 1.08004891872406,
+      "learning_rate": 2.0857142857142858e-06,
+      "loss": 0.3957,
+      "step": 3275
+    },
+    {
+      "epoch": 412.53333333333336,
+      "grad_norm": 1.0919499397277832,
+      "learning_rate": 2.0142857142857144e-06,
+      "loss": 0.4011,
+      "step": 3300
+    },
+    {
+      "epoch": 415.6666666666667,
+      "grad_norm": 1.176896095275879,
+      "learning_rate": 1.942857142857143e-06,
+      "loss": 0.4051,
+      "step": 3325
+    },
+    {
+      "epoch": 418.8,
+      "grad_norm": 1.5768754482269287,
+      "learning_rate": 1.8714285714285715e-06,
+      "loss": 0.3922,
+      "step": 3350
+    },
+    {
+      "epoch": 421.93333333333334,
+      "grad_norm": 1.541318416595459,
+      "learning_rate": 1.8000000000000001e-06,
+      "loss": 0.3921,
+      "step": 3375
+    },
+    {
+      "epoch": 425.0,
+      "grad_norm": 2.7317168712615967,
+      "learning_rate": 1.7285714285714287e-06,
+      "loss": 0.3867,
+      "step": 3400
+    },
+    {
+      "epoch": 428.1333333333333,
+      "grad_norm": 1.5212819576263428,
+      "learning_rate": 1.657142857142857e-06,
+      "loss": 0.3994,
+      "step": 3425
+    },
+    {
+      "epoch": 431.26666666666665,
+      "grad_norm": 1.6700373888015747,
+      "learning_rate": 1.5857142857142857e-06,
+      "loss": 0.404,
+      "step": 3450
+    },
+    {
+      "epoch": 434.4,
+      "grad_norm": 1.2883983850479126,
+      "learning_rate": 1.5142857142857145e-06,
+      "loss": 0.383,
+      "step": 3475
+    },
+    {
+      "epoch": 437.53333333333336,
+      "grad_norm": 1.3779336214065552,
+      "learning_rate": 1.442857142857143e-06,
+      "loss": 0.3913,
+      "step": 3500
+    },
+    {
+      "epoch": 440.6666666666667,
+      "grad_norm": 3.994981050491333,
+      "learning_rate": 1.3714285714285717e-06,
+      "loss": 0.3936,
+      "step": 3525
+    },
+    {
+      "epoch": 443.8,
+      "grad_norm": 1.1143438816070557,
+      "learning_rate": 1.3e-06,
+      "loss": 0.389,
+      "step": 3550
+    },
+    {
+      "epoch": 446.93333333333334,
+      "grad_norm": 1.7732421159744263,
+      "learning_rate": 1.2285714285714286e-06,
+      "loss": 0.3996,
+      "step": 3575
+    },
+    {
+      "epoch": 450.0,
+      "grad_norm": 2.198235273361206,
+      "learning_rate": 1.1571428571428572e-06,
+      "loss": 0.383,
+      "step": 3600
+    },
+    {
+      "epoch": 453.1333333333333,
+      "grad_norm": 1.0842177867889404,
+      "learning_rate": 1.0857142857142858e-06,
+      "loss": 0.3959,
+      "step": 3625
+    },
+    {
+      "epoch": 456.26666666666665,
+      "grad_norm": 1.633346676826477,
+      "learning_rate": 1.0142857142857144e-06,
+      "loss": 0.3954,
+      "step": 3650
+    },
+    {
+      "epoch": 459.4,
+      "grad_norm": 1.3174960613250732,
+      "learning_rate": 9.42857142857143e-07,
+      "loss": 0.3921,
+      "step": 3675
+    },
+    {
+      "epoch": 462.53333333333336,
+      "grad_norm": 5.679307460784912,
+      "learning_rate": 8.714285714285716e-07,
+      "loss": 0.3967,
+      "step": 3700
+    },
+    {
+      "epoch": 465.6666666666667,
+      "grad_norm": 1.3262453079223633,
+      "learning_rate": 8.000000000000001e-07,
+      "loss": 0.389,
+      "step": 3725
+    },
+    {
+      "epoch": 468.8,
+      "grad_norm": 2.5061025619506836,
+      "learning_rate": 7.285714285714287e-07,
+      "loss": 0.4017,
+      "step": 3750
+    },
+    {
+      "epoch": 471.93333333333334,
+      "grad_norm": 9.512356758117676,
+      "learning_rate": 6.571428571428571e-07,
+      "loss": 0.3969,
+      "step": 3775
+    },
+    {
+      "epoch": 475.0,
+      "grad_norm": 2.8925583362579346,
+      "learning_rate": 5.857142857142857e-07,
+      "loss": 0.3851,
+      "step": 3800
+    },
+    {
+      "epoch": 478.1333333333333,
+      "grad_norm": 1.8175698518753052,
+      "learning_rate": 5.142857142857143e-07,
+      "loss": 0.3964,
+      "step": 3825
+    },
+    {
+      "epoch": 481.26666666666665,
+      "grad_norm": 1.5850640535354614,
+      "learning_rate": 4.4285714285714286e-07,
+      "loss": 0.3932,
+      "step": 3850
+    },
+    {
+      "epoch": 484.4,
+      "grad_norm": 1.6796472072601318,
+      "learning_rate": 3.7142857142857145e-07,
+      "loss": 0.3965,
+      "step": 3875
+    },
+    {
+      "epoch": 487.53333333333336,
+      "grad_norm": 1.5245673656463623,
+      "learning_rate": 3.0000000000000004e-07,
+      "loss": 0.3889,
+      "step": 3900
+    },
+    {
+      "epoch": 490.6666666666667,
+      "grad_norm": 1.3968061208724976,
+      "learning_rate": 2.285714285714286e-07,
+      "loss": 0.3939,
+      "step": 3925
+    },
+    {
+      "epoch": 493.8,
+      "grad_norm": 1.4190447330474854,
+      "learning_rate": 1.5714285714285717e-07,
+      "loss": 0.3888,
+      "step": 3950
+    },
+    {
+      "epoch": 496.93333333333334,
+      "grad_norm": 1.2263931035995483,
+      "learning_rate": 8.571428571428573e-08,
+      "loss": 0.399,
+      "step": 3975
+    },
+    {
+      "epoch": 500.0,
+      "grad_norm": 7.713298797607422,
+      "learning_rate": 1.4285714285714288e-08,
+      "loss": 0.3902,
+      "step": 4000
+    },
+    {
+      "epoch": 500.0,
+      "eval_loss": 0.4673193097114563,
+      "eval_runtime": 0.7262,
+      "eval_samples_per_second": 35.803,
+      "eval_steps_per_second": 5.508,
+      "step": 4000
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 4000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 572,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.029284269051896e+16,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-4000/training_args.bin b/checkpoint-4000/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a61b2643a9f565814c2aa62a171605debf82a9c0
--- /dev/null
+++ b/checkpoint-4000/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07e653cd3b83482a0939ee4c8a207df9a996f44ce9dd82197c4ab6cde60cf2bb
+size 5432
diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..11c8ec6e9cb5a360feb11650e24ba1adc08e8d0e
--- /dev/null
+++ b/config.json
@@ -0,0 +1,91 @@
+{
+  "activation_dropout": 0.1,
+  "apply_spec_augment": true,
+  "architectures": [
+    "SpeechT5ForTextToSpeech"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.1,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 2,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.1,
+  "encoder_layers": 12,
+  "encoder_max_relative_position": 160,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.0,
+  "guided_attention_loss_num_heads": 2,
+  "guided_attention_loss_scale": 10.0,
+  "guided_attention_loss_sigma": 0.4,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "is_encoder_decoder": true,
+  "layer_norm_eps": 1e-05,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": null,
+  "max_speech_positions": 1876,
+  "max_text_positions": 600,
+  "model_type": "speecht5",
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_mel_bins": 80,
+  "pad_token_id": 1,
+  "positional_dropout": 0.1,
+  "reduction_factor": 2,
+  "scale_embedding": false,
+  "speaker_embedding_dim": 512,
+  "speech_decoder_postnet_dropout": 0.5,
+  "speech_decoder_postnet_kernel": 5,
+  "speech_decoder_postnet_layers": 5,
+  "speech_decoder_postnet_units": 256,
+  "speech_decoder_prenet_dropout": 0.5,
+  "speech_decoder_prenet_layers": 2,
+  "speech_decoder_prenet_units": 256,
+  "torch_dtype": "float32",
+  "transformers_version": "4.52.0.dev0",
+  "use_cache": false,
+  "use_guided_attention_loss": true,
+  "vocab_size": 81
+}
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..3041cc103eccf911a925012f1ad8fd1d12cce149
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,9 @@
+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "decoder_start_token_id": 2,
+  "eos_token_id": 2,
+  "max_length": 1876,
+  "pad_token_id": 1,
+  "transformers_version": "4.52.0.dev0"
+}
diff --git a/model.safetensors b/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..866c230062873c112e9b94191d9412b3e69f198f
--- /dev/null
+++ b/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:055e59911edf63563dfc4431a4301a869cc74effda4ba5c905ace376e831bd5d
+size 577789320
diff --git a/runs/Apr21_05-30-54_9a6e619cb7d1/events.out.tfevents.1745213478.9a6e619cb7d1.455.0 b/runs/Apr21_05-30-54_9a6e619cb7d1/events.out.tfevents.1745213478.9a6e619cb7d1.455.0
new file mode 100644
index 0000000000000000000000000000000000000000..a8684a224ed518b4f1779c1a558537cb1728267d
--- /dev/null
+++ b/runs/Apr21_05-30-54_9a6e619cb7d1/events.out.tfevents.1745213478.9a6e619cb7d1.455.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8077436aa583fddfe2ef4a048c8ca21d368355fa2c74a1e17aba4c226d4dbd03
+size 41743
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..4ee24ec69861cfc94abbe2c8c934aa0744aa623c
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,13 @@
+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}
diff --git a/spm_char.model b/spm_char.model
new file mode 100644
index 0000000000000000000000000000000000000000..8fb73691942626fa75df80b61aab0e9b9340d8e2
--- /dev/null
+++ b/spm_char.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7fcc48f3e225f627b1641db410ceb0c8649bd2b0c982e150b03f8be3728ab560
+size 238473
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..e88d44ba3be31ac8f53461ae7c1b02b4c5c830ab
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,64 @@
+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "79": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "80": {
+      "content": "<ctc_blank>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 600,
+  "normalize": false,
+  "pad_token": "<pad>",
+  "processor_class": "SpeechT5Processor",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "SpeechT5Tokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/training_args.bin b/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a61b2643a9f565814c2aa62a171605debf82a9c0
--- /dev/null
+++ b/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07e653cd3b83482a0939ee4c8a207df9a996f44ce9dd82197c4ab6cde60cf2bb
+size 5432