omarViga commited on Apr 26

Commit

79c0a9c

verified ·

1 Parent(s): dda1623

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

README.md +8 -0
added_tokens.json +4 -0
checkpoint-1000/added_tokens.json +4 -0
checkpoint-1000/config.json +91 -0
checkpoint-1000/generation_config.json +9 -0
checkpoint-1000/model.safetensors +3 -0
checkpoint-1000/optimizer.pt +3 -0
checkpoint-1000/rng_state.pth +3 -0
checkpoint-1000/scaler.pt +3 -0
checkpoint-1000/scheduler.pt +3 -0
checkpoint-1000/special_tokens_map.json +13 -0
checkpoint-1000/spm_char.model +3 -0
checkpoint-1000/tokenizer_config.json +64 -0
checkpoint-1000/trainer_state.json +322 -0
checkpoint-1000/training_args.bin +3 -0
checkpoint-2000/added_tokens.json +4 -0
checkpoint-2000/config.json +91 -0
checkpoint-2000/generation_config.json +9 -0
checkpoint-2000/model.safetensors +3 -0
checkpoint-2000/optimizer.pt +3 -0
checkpoint-2000/rng_state.pth +3 -0
checkpoint-2000/scaler.pt +3 -0
checkpoint-2000/scheduler.pt +3 -0
checkpoint-2000/special_tokens_map.json +13 -0
checkpoint-2000/spm_char.model +3 -0
checkpoint-2000/tokenizer_config.json +64 -0
checkpoint-2000/trainer_state.json +610 -0
checkpoint-2000/training_args.bin +3 -0
checkpoint-3000/added_tokens.json +4 -0
checkpoint-3000/config.json +91 -0
checkpoint-3000/generation_config.json +9 -0
checkpoint-3000/model.safetensors +3 -0
checkpoint-3000/optimizer.pt +3 -0
checkpoint-3000/rng_state.pth +3 -0
checkpoint-3000/scaler.pt +3 -0
checkpoint-3000/scheduler.pt +3 -0
checkpoint-3000/special_tokens_map.json +13 -0
checkpoint-3000/spm_char.model +3 -0
checkpoint-3000/tokenizer_config.json +64 -0
checkpoint-3000/trainer_state.json +898 -0
checkpoint-3000/training_args.bin +3 -0
checkpoint-4000/added_tokens.json +4 -0
checkpoint-4000/config.json +91 -0
checkpoint-4000/generation_config.json +9 -0
checkpoint-4000/model.safetensors +3 -0
checkpoint-4000/optimizer.pt +3 -0
checkpoint-4000/rng_state.pth +3 -0
checkpoint-4000/scaler.pt +3 -0
checkpoint-4000/scheduler.pt +3 -0
checkpoint-4000/special_tokens_map.json +13 -0

README.md ADDED Viewed

	@@ -0,0 +1,8 @@

+---
+tags:
+- text-to-speech
+- speecht5
+- mabama  # Make sure no empty tags exist
+library_name: transformers
+license: mit
+---

added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "<ctc_blank>": 80,
+  "<mask>": 79
+}

checkpoint-1000/added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "<ctc_blank>": 80,
+  "<mask>": 79
+}

checkpoint-1000/config.json ADDED Viewed

	@@ -0,0 +1,91 @@

+{
+  "activation_dropout": 0.1,
+  "apply_spec_augment": true,
+  "architectures": [
+    "SpeechT5ForTextToSpeech"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.1,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 2,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.1,
+  "encoder_layers": 12,
+  "encoder_max_relative_position": 160,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.0,
+  "guided_attention_loss_num_heads": 2,
+  "guided_attention_loss_scale": 10.0,
+  "guided_attention_loss_sigma": 0.4,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "is_encoder_decoder": true,
+  "layer_norm_eps": 1e-05,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": null,
+  "max_speech_positions": 1876,
+  "max_text_positions": 600,
+  "model_type": "speecht5",
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_mel_bins": 80,
+  "pad_token_id": 1,
+  "positional_dropout": 0.1,
+  "reduction_factor": 2,
+  "scale_embedding": false,
+  "speaker_embedding_dim": 512,
+  "speech_decoder_postnet_dropout": 0.5,
+  "speech_decoder_postnet_kernel": 5,
+  "speech_decoder_postnet_layers": 5,
+  "speech_decoder_postnet_units": 256,
+  "speech_decoder_prenet_dropout": 0.5,
+  "speech_decoder_prenet_layers": 2,
+  "speech_decoder_prenet_units": 256,
+  "torch_dtype": "float32",
+  "transformers_version": "4.52.0.dev0",
+  "use_cache": false,
+  "use_guided_attention_loss": true,
+  "vocab_size": 81
+}

checkpoint-1000/generation_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "decoder_start_token_id": 2,
+  "eos_token_id": 2,
+  "max_length": 1876,
+  "pad_token_id": 1,
+  "transformers_version": "4.52.0.dev0"
+}

checkpoint-1000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:87514d400a13c6eefdeb2f89abd1795e621c2344f96e159ce2aeba3d0ce85944
+size 577789320

checkpoint-1000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2699a889db8dbb0ae670281bc558951bffc765ae88e6bb0bb5222ac12288814b
+size 1155772233

checkpoint-1000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:82d177efef76b1d9db7c817f74c58d37c483a0042c96999443934a8052be41aa
+size 14244

checkpoint-1000/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:27eb6d31126283f601b217f22a8971040a00a73abf0a2e26bfcb5064cd0afa48
+size 988

checkpoint-1000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5101d8c1f86d6f48167e50b1164b9ba363ab76694ff2d5c1e326e3d5f94ecaef
+size 1064

checkpoint-1000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

checkpoint-1000/spm_char.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7fcc48f3e225f627b1641db410ceb0c8649bd2b0c982e150b03f8be3728ab560
+size 238473

checkpoint-1000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "79": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "80": {
+      "content": "<ctc_blank>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 600,
+  "normalize": false,
+  "pad_token": "<pad>",
+  "processor_class": "SpeechT5Processor",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "SpeechT5Tokenizer",
+  "unk_token": "<unk>"
+}

checkpoint-1000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,322 @@

+{
+  "best_global_step": 1000,
+  "best_metric": 0.5154594779014587,
+  "best_model_checkpoint": "./speecht5_tts_mabama/checkpoint-1000",
+  "epoch": 125.0,
+  "eval_steps": 1000,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 3.1333333333333333,
+      "grad_norm": 13.092179298400879,
+      "learning_rate": 4.2000000000000006e-07,
+      "loss": 1.0978,
+      "step": 25
+    },
+    {
+      "epoch": 6.266666666666667,
+      "grad_norm": 13.538804054260254,
+      "learning_rate": 9.200000000000001e-07,
+      "loss": 1.0057,
+      "step": 50
+    },
+    {
+      "epoch": 9.4,
+      "grad_norm": 3.8923733234405518,
+      "learning_rate": 1.42e-06,
+      "loss": 0.8155,
+      "step": 75
+    },
+    {
+      "epoch": 12.533333333333333,
+      "grad_norm": 2.569746732711792,
+      "learning_rate": 1.9200000000000003e-06,
+      "loss": 0.7921,
+      "step": 100
+    },
+    {
+      "epoch": 15.666666666666666,
+      "grad_norm": 2.390493631362915,
+      "learning_rate": 2.42e-06,
+      "loss": 0.7531,
+      "step": 125
+    },
+    {
+      "epoch": 18.8,
+      "grad_norm": 2.7168779373168945,
+      "learning_rate": 2.92e-06,
+      "loss": 0.7393,
+      "step": 150
+    },
+    {
+      "epoch": 21.933333333333334,
+      "grad_norm": 10.27633285522461,
+      "learning_rate": 3.4200000000000007e-06,
+      "loss": 0.7292,
+      "step": 175
+    },
+    {
+      "epoch": 25.0,
+      "grad_norm": 5.6921000480651855,
+      "learning_rate": 3.920000000000001e-06,
+      "loss": 0.6642,
+      "step": 200
+    },
+    {
+      "epoch": 28.133333333333333,
+      "grad_norm": 2.6206777095794678,
+      "learning_rate": 4.42e-06,
+      "loss": 0.6555,
+      "step": 225
+    },
+    {
+      "epoch": 31.266666666666666,
+      "grad_norm": 1.9396028518676758,
+      "learning_rate": 4.92e-06,
+      "loss": 0.6484,
+      "step": 250
+    },
+    {
+      "epoch": 34.4,
+      "grad_norm": 3.44437575340271,
+      "learning_rate": 5.420000000000001e-06,
+      "loss": 0.6414,
+      "step": 275
+    },
+    {
+      "epoch": 37.53333333333333,
+      "grad_norm": 2.729497194290161,
+      "learning_rate": 5.92e-06,
+      "loss": 0.6323,
+      "step": 300
+    },
+    {
+      "epoch": 40.666666666666664,
+      "grad_norm": 2.3852877616882324,
+      "learning_rate": 6.42e-06,
+      "loss": 0.6073,
+      "step": 325
+    },
+    {
+      "epoch": 43.8,
+      "grad_norm": 4.4287109375,
+      "learning_rate": 6.92e-06,
+      "loss": 0.6034,
+      "step": 350
+    },
+    {
+      "epoch": 46.93333333333333,
+      "grad_norm": 2.1653966903686523,
+      "learning_rate": 7.420000000000001e-06,
+      "loss": 0.5865,
+      "step": 375
+    },
+    {
+      "epoch": 50.0,
+      "grad_norm": 2.8120265007019043,
+      "learning_rate": 7.92e-06,
+      "loss": 0.5556,
+      "step": 400
+    },
+    {
+      "epoch": 53.13333333333333,
+      "grad_norm": 2.0973806381225586,
+      "learning_rate": 8.42e-06,
+      "loss": 0.5416,
+      "step": 425
+    },
+    {
+      "epoch": 56.266666666666666,
+      "grad_norm": 2.6723616123199463,
+      "learning_rate": 8.920000000000001e-06,
+      "loss": 0.5407,
+      "step": 450
+    },
+    {
+      "epoch": 59.4,
+      "grad_norm": 2.1810383796691895,
+      "learning_rate": 9.42e-06,
+      "loss": 0.5174,
+      "step": 475
+    },
+    {
+      "epoch": 62.53333333333333,
+      "grad_norm": 3.464071750640869,
+      "learning_rate": 9.920000000000002e-06,
+      "loss": 0.5327,
+      "step": 500
+    },
+    {
+      "epoch": 65.66666666666667,
+      "grad_norm": 3.6148977279663086,
+      "learning_rate": 9.940000000000001e-06,
+      "loss": 0.5141,
+      "step": 525
+    },
+    {
+      "epoch": 68.8,
+      "grad_norm": 2.5631027221679688,
+      "learning_rate": 9.86857142857143e-06,
+      "loss": 0.5246,
+      "step": 550
+    },
+    {
+      "epoch": 71.93333333333334,
+      "grad_norm": 2.058468818664551,
+      "learning_rate": 9.797142857142858e-06,
+      "loss": 0.5065,
+      "step": 575
+    },
+    {
+      "epoch": 75.0,
+      "grad_norm": 1.7559466361999512,
+      "learning_rate": 9.725714285714287e-06,
+      "loss": 0.4871,
+      "step": 600
+    },
+    {
+      "epoch": 78.13333333333334,
+      "grad_norm": 2.653345823287964,
+      "learning_rate": 9.654285714285716e-06,
+      "loss": 0.4941,
+      "step": 625
+    },
+    {
+      "epoch": 81.26666666666667,
+      "grad_norm": 2.612226724624634,
+      "learning_rate": 9.582857142857143e-06,
+      "loss": 0.4796,
+      "step": 650
+    },
+    {
+      "epoch": 84.4,
+      "grad_norm": 1.7446099519729614,
+      "learning_rate": 9.511428571428572e-06,
+      "loss": 0.487,
+      "step": 675
+    },
+    {
+      "epoch": 87.53333333333333,
+      "grad_norm": 2.627315044403076,
+      "learning_rate": 9.440000000000001e-06,
+      "loss": 0.4731,
+      "step": 700
+    },
+    {
+      "epoch": 90.66666666666667,
+      "grad_norm": 2.4315383434295654,
+      "learning_rate": 9.368571428571428e-06,
+      "loss": 0.4812,
+      "step": 725
+    },
+    {
+      "epoch": 93.8,
+      "grad_norm": 2.4056336879730225,
+      "learning_rate": 9.297142857142857e-06,
+      "loss": 0.468,
+      "step": 750
+    },
+    {
+      "epoch": 96.93333333333334,
+      "grad_norm": 2.153116464614868,
+      "learning_rate": 9.225714285714286e-06,
+      "loss": 0.4829,
+      "step": 775
+    },
+    {
+      "epoch": 100.0,
+      "grad_norm": 2.9421756267547607,
+      "learning_rate": 9.154285714285715e-06,
+      "loss": 0.4555,
+      "step": 800
+    },
+    {
+      "epoch": 103.13333333333334,
+      "grad_norm": 1.6771883964538574,
+      "learning_rate": 9.082857142857143e-06,
+      "loss": 0.462,
+      "step": 825
+    },
+    {
+      "epoch": 106.26666666666667,
+      "grad_norm": 2.9711899757385254,
+      "learning_rate": 9.011428571428572e-06,
+      "loss": 0.471,
+      "step": 850
+    },
+    {
+      "epoch": 109.4,
+      "grad_norm": 1.922980546951294,
+      "learning_rate": 8.94e-06,
+      "loss": 0.4673,
+      "step": 875
+    },
+    {
+      "epoch": 112.53333333333333,
+      "grad_norm": 2.49945068359375,
+      "learning_rate": 8.86857142857143e-06,
+      "loss": 0.4611,
+      "step": 900
+    },
+    {
+      "epoch": 115.66666666666667,
+      "grad_norm": 2.646510362625122,
+      "learning_rate": 8.797142857142857e-06,
+      "loss": 0.4574,
+      "step": 925
+    },
+    {
+      "epoch": 118.8,
+      "grad_norm": 1.7943354845046997,
+      "learning_rate": 8.725714285714286e-06,
+      "loss": 0.4658,
+      "step": 950
+    },
+    {
+      "epoch": 121.93333333333334,
+      "grad_norm": 2.171827793121338,
+      "learning_rate": 8.654285714285715e-06,
+      "loss": 0.4561,
+      "step": 975
+    },
+    {
+      "epoch": 125.0,
+      "grad_norm": 7.516489505767822,
+      "learning_rate": 8.582857142857144e-06,
+      "loss": 0.4472,
+      "step": 1000
+    },
+    {
+      "epoch": 125.0,
+      "eval_loss": 0.5154594779014587,
+      "eval_runtime": 0.7837,
+      "eval_samples_per_second": 33.175,
+      "eval_steps_per_second": 5.104,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 4000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 572,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2568713479659360.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-1000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:07e653cd3b83482a0939ee4c8a207df9a996f44ce9dd82197c4ab6cde60cf2bb
+size 5432

checkpoint-2000/added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "<ctc_blank>": 80,
+  "<mask>": 79
+}

checkpoint-2000/config.json ADDED Viewed

	@@ -0,0 +1,91 @@

+{
+  "activation_dropout": 0.1,
+  "apply_spec_augment": true,
+  "architectures": [
+    "SpeechT5ForTextToSpeech"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.1,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 2,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.1,
+  "encoder_layers": 12,
+  "encoder_max_relative_position": 160,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.0,
+  "guided_attention_loss_num_heads": 2,
+  "guided_attention_loss_scale": 10.0,
+  "guided_attention_loss_sigma": 0.4,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "is_encoder_decoder": true,
+  "layer_norm_eps": 1e-05,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": null,
+  "max_speech_positions": 1876,
+  "max_text_positions": 600,
+  "model_type": "speecht5",
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_mel_bins": 80,
+  "pad_token_id": 1,
+  "positional_dropout": 0.1,
+  "reduction_factor": 2,
+  "scale_embedding": false,
+  "speaker_embedding_dim": 512,
+  "speech_decoder_postnet_dropout": 0.5,
+  "speech_decoder_postnet_kernel": 5,
+  "speech_decoder_postnet_layers": 5,
+  "speech_decoder_postnet_units": 256,
+  "speech_decoder_prenet_dropout": 0.5,
+  "speech_decoder_prenet_layers": 2,
+  "speech_decoder_prenet_units": 256,
+  "torch_dtype": "float32",
+  "transformers_version": "4.52.0.dev0",
+  "use_cache": false,
+  "use_guided_attention_loss": true,
+  "vocab_size": 81
+}

checkpoint-2000/generation_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "decoder_start_token_id": 2,
+  "eos_token_id": 2,
+  "max_length": 1876,
+  "pad_token_id": 1,
+  "transformers_version": "4.52.0.dev0"
+}

checkpoint-2000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:abac69d52746eff2a8ac4fea48c076a031effd3d774eaa79c34c25289b78a9ad
+size 577789320

checkpoint-2000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df818e2b4cf58851158e10b3d57754c198be51c0b852b9cd4b587b629a205640
+size 1155772233

checkpoint-2000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b766bf7909addbb49e8f135f2f8aa3b6e99cb053e36395d8560f93e71c2776e7
+size 14244

checkpoint-2000/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:49bc19d5712fad43d5cef95c2e01c73bd75bdb71e4c16fa8781d626d978f5452
+size 988

checkpoint-2000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b80a609c64a12b4db2f38941ea479b9a30f9351b7aac74f4956e8686dc338317
+size 1064

checkpoint-2000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

checkpoint-2000/spm_char.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7fcc48f3e225f627b1641db410ceb0c8649bd2b0c982e150b03f8be3728ab560
+size 238473

checkpoint-2000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "79": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "80": {
+      "content": "<ctc_blank>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 600,
+  "normalize": false,
+  "pad_token": "<pad>",
+  "processor_class": "SpeechT5Processor",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "SpeechT5Tokenizer",
+  "unk_token": "<unk>"
+}

checkpoint-2000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,610 @@

+{
+  "best_global_step": 2000,
+  "best_metric": 0.4833647906780243,
+  "best_model_checkpoint": "./speecht5_tts_mabama/checkpoint-2000",
+  "epoch": 250.0,
+  "eval_steps": 1000,
+  "global_step": 2000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 3.1333333333333333,
+      "grad_norm": 13.092179298400879,
+      "learning_rate": 4.2000000000000006e-07,
+      "loss": 1.0978,
+      "step": 25
+    },
+    {
+      "epoch": 6.266666666666667,
+      "grad_norm": 13.538804054260254,
+      "learning_rate": 9.200000000000001e-07,
+      "loss": 1.0057,
+      "step": 50
+    },
+    {
+      "epoch": 9.4,
+      "grad_norm": 3.8923733234405518,
+      "learning_rate": 1.42e-06,
+      "loss": 0.8155,
+      "step": 75
+    },
+    {
+      "epoch": 12.533333333333333,
+      "grad_norm": 2.569746732711792,
+      "learning_rate": 1.9200000000000003e-06,
+      "loss": 0.7921,
+      "step": 100
+    },
+    {
+      "epoch": 15.666666666666666,
+      "grad_norm": 2.390493631362915,
+      "learning_rate": 2.42e-06,
+      "loss": 0.7531,
+      "step": 125
+    },
+    {
+      "epoch": 18.8,
+      "grad_norm": 2.7168779373168945,
+      "learning_rate": 2.92e-06,
+      "loss": 0.7393,
+      "step": 150
+    },
+    {
+      "epoch": 21.933333333333334,
+      "grad_norm": 10.27633285522461,
+      "learning_rate": 3.4200000000000007e-06,
+      "loss": 0.7292,
+      "step": 175
+    },
+    {
+      "epoch": 25.0,
+      "grad_norm": 5.6921000480651855,
+      "learning_rate": 3.920000000000001e-06,
+      "loss": 0.6642,
+      "step": 200
+    },
+    {
+      "epoch": 28.133333333333333,
+      "grad_norm": 2.6206777095794678,
+      "learning_rate": 4.42e-06,
+      "loss": 0.6555,
+      "step": 225
+    },
+    {
+      "epoch": 31.266666666666666,
+      "grad_norm": 1.9396028518676758,
+      "learning_rate": 4.92e-06,
+      "loss": 0.6484,
+      "step": 250
+    },
+    {
+      "epoch": 34.4,
+      "grad_norm": 3.44437575340271,
+      "learning_rate": 5.420000000000001e-06,
+      "loss": 0.6414,
+      "step": 275
+    },
+    {
+      "epoch": 37.53333333333333,
+      "grad_norm": 2.729497194290161,
+      "learning_rate": 5.92e-06,
+      "loss": 0.6323,
+      "step": 300
+    },
+    {
+      "epoch": 40.666666666666664,
+      "grad_norm": 2.3852877616882324,
+      "learning_rate": 6.42e-06,
+      "loss": 0.6073,
+      "step": 325
+    },
+    {
+      "epoch": 43.8,
+      "grad_norm": 4.4287109375,
+      "learning_rate": 6.92e-06,
+      "loss": 0.6034,
+      "step": 350
+    },
+    {
+      "epoch": 46.93333333333333,
+      "grad_norm": 2.1653966903686523,
+      "learning_rate": 7.420000000000001e-06,
+      "loss": 0.5865,
+      "step": 375
+    },
+    {
+      "epoch": 50.0,
+      "grad_norm": 2.8120265007019043,
+      "learning_rate": 7.92e-06,
+      "loss": 0.5556,
+      "step": 400
+    },
+    {
+      "epoch": 53.13333333333333,
+      "grad_norm": 2.0973806381225586,
+      "learning_rate": 8.42e-06,
+      "loss": 0.5416,
+      "step": 425
+    },
+    {
+      "epoch": 56.266666666666666,
+      "grad_norm": 2.6723616123199463,
+      "learning_rate": 8.920000000000001e-06,
+      "loss": 0.5407,
+      "step": 450
+    },
+    {
+      "epoch": 59.4,
+      "grad_norm": 2.1810383796691895,
+      "learning_rate": 9.42e-06,
+      "loss": 0.5174,
+      "step": 475
+    },
+    {
+      "epoch": 62.53333333333333,
+      "grad_norm": 3.464071750640869,
+      "learning_rate": 9.920000000000002e-06,
+      "loss": 0.5327,
+      "step": 500
+    },
+    {
+      "epoch": 65.66666666666667,
+      "grad_norm": 3.6148977279663086,
+      "learning_rate": 9.940000000000001e-06,
+      "loss": 0.5141,
+      "step": 525
+    },
+    {
+      "epoch": 68.8,
+      "grad_norm": 2.5631027221679688,
+      "learning_rate": 9.86857142857143e-06,
+      "loss": 0.5246,
+      "step": 550
+    },
+    {
+      "epoch": 71.93333333333334,
+      "grad_norm": 2.058468818664551,
+      "learning_rate": 9.797142857142858e-06,
+      "loss": 0.5065,
+      "step": 575
+    },
+    {
+      "epoch": 75.0,
+      "grad_norm": 1.7559466361999512,
+      "learning_rate": 9.725714285714287e-06,
+      "loss": 0.4871,
+      "step": 600
+    },
+    {
+      "epoch": 78.13333333333334,
+      "grad_norm": 2.653345823287964,
+      "learning_rate": 9.654285714285716e-06,
+      "loss": 0.4941,
+      "step": 625
+    },
+    {
+      "epoch": 81.26666666666667,
+      "grad_norm": 2.612226724624634,
+      "learning_rate": 9.582857142857143e-06,
+      "loss": 0.4796,
+      "step": 650
+    },
+    {
+      "epoch": 84.4,
+      "grad_norm": 1.7446099519729614,
+      "learning_rate": 9.511428571428572e-06,
+      "loss": 0.487,
+      "step": 675
+    },
+    {
+      "epoch": 87.53333333333333,
+      "grad_norm": 2.627315044403076,
+      "learning_rate": 9.440000000000001e-06,
+      "loss": 0.4731,
+      "step": 700
+    },
+    {
+      "epoch": 90.66666666666667,
+      "grad_norm": 2.4315383434295654,
+      "learning_rate": 9.368571428571428e-06,
+      "loss": 0.4812,
+      "step": 725
+    },
+    {
+      "epoch": 93.8,
+      "grad_norm": 2.4056336879730225,
+      "learning_rate": 9.297142857142857e-06,
+      "loss": 0.468,
+      "step": 750
+    },
+    {
+      "epoch": 96.93333333333334,
+      "grad_norm": 2.153116464614868,
+      "learning_rate": 9.225714285714286e-06,
+      "loss": 0.4829,
+      "step": 775
+    },
+    {
+      "epoch": 100.0,
+      "grad_norm": 2.9421756267547607,
+      "learning_rate": 9.154285714285715e-06,
+      "loss": 0.4555,
+      "step": 800
+    },
+    {
+      "epoch": 103.13333333333334,
+      "grad_norm": 1.6771883964538574,
+      "learning_rate": 9.082857142857143e-06,
+      "loss": 0.462,
+      "step": 825
+    },
+    {
+      "epoch": 106.26666666666667,
+      "grad_norm": 2.9711899757385254,
+      "learning_rate": 9.011428571428572e-06,
+      "loss": 0.471,
+      "step": 850
+    },
+    {
+      "epoch": 109.4,
+      "grad_norm": 1.922980546951294,
+      "learning_rate": 8.94e-06,
+      "loss": 0.4673,
+      "step": 875
+    },
+    {
+      "epoch": 112.53333333333333,
+      "grad_norm": 2.49945068359375,
+      "learning_rate": 8.86857142857143e-06,
+      "loss": 0.4611,
+      "step": 900
+    },
+    {
+      "epoch": 115.66666666666667,
+      "grad_norm": 2.646510362625122,
+      "learning_rate": 8.797142857142857e-06,
+      "loss": 0.4574,
+      "step": 925
+    },
+    {
+      "epoch": 118.8,
+      "grad_norm": 1.7943354845046997,
+      "learning_rate": 8.725714285714286e-06,
+      "loss": 0.4658,
+      "step": 950
+    },
+    {
+      "epoch": 121.93333333333334,
+      "grad_norm": 2.171827793121338,
+      "learning_rate": 8.654285714285715e-06,
+      "loss": 0.4561,
+      "step": 975
+    },
+    {
+      "epoch": 125.0,
+      "grad_norm": 7.516489505767822,
+      "learning_rate": 8.582857142857144e-06,
+      "loss": 0.4472,
+      "step": 1000
+    },
+    {
+      "epoch": 125.0,
+      "eval_loss": 0.5154594779014587,
+      "eval_runtime": 0.7837,
+      "eval_samples_per_second": 33.175,
+      "eval_steps_per_second": 5.104,
+      "step": 1000
+    },
+    {
+      "epoch": 128.13333333333333,
+      "grad_norm": 2.5167343616485596,
+      "learning_rate": 8.511428571428571e-06,
+      "loss": 0.457,
+      "step": 1025
+    },
+    {
+      "epoch": 131.26666666666668,
+      "grad_norm": 3.3089983463287354,
+      "learning_rate": 8.44e-06,
+      "loss": 0.4456,
+      "step": 1050
+    },
+    {
+      "epoch": 134.4,
+      "grad_norm": 2.778348445892334,
+      "learning_rate": 8.36857142857143e-06,
+      "loss": 0.4612,
+      "step": 1075
+    },
+    {
+      "epoch": 137.53333333333333,
+      "grad_norm": 2.529778480529785,
+      "learning_rate": 8.297142857142859e-06,
+      "loss": 0.4429,
+      "step": 1100
+    },
+    {
+      "epoch": 140.66666666666666,
+      "grad_norm": 1.76685631275177,
+      "learning_rate": 8.225714285714288e-06,
+      "loss": 0.4399,
+      "step": 1125
+    },
+    {
+      "epoch": 143.8,
+      "grad_norm": 1.8449666500091553,
+      "learning_rate": 8.154285714285715e-06,
+      "loss": 0.4329,
+      "step": 1150
+    },
+    {
+      "epoch": 146.93333333333334,
+      "grad_norm": 1.9097468852996826,
+      "learning_rate": 8.082857142857144e-06,
+      "loss": 0.4527,
+      "step": 1175
+    },
+    {
+      "epoch": 150.0,
+      "grad_norm": 3.892838716506958,
+      "learning_rate": 8.011428571428573e-06,
+      "loss": 0.4448,
+      "step": 1200
+    },
+    {
+      "epoch": 153.13333333333333,
+      "grad_norm": 2.1518826484680176,
+      "learning_rate": 7.94e-06,
+      "loss": 0.4412,
+      "step": 1225
+    },
+    {
+      "epoch": 156.26666666666668,
+      "grad_norm": 1.5322662591934204,
+      "learning_rate": 7.86857142857143e-06,
+      "loss": 0.4388,
+      "step": 1250
+    },
+    {
+      "epoch": 159.4,
+      "grad_norm": 1.4961107969284058,
+      "learning_rate": 7.797142857142858e-06,
+      "loss": 0.4363,
+      "step": 1275
+    },
+    {
+      "epoch": 162.53333333333333,
+      "grad_norm": 1.8992841243743896,
+      "learning_rate": 7.725714285714286e-06,
+      "loss": 0.4474,
+      "step": 1300
+    },
+    {
+      "epoch": 165.66666666666666,
+      "grad_norm": 1.5015554428100586,
+      "learning_rate": 7.654285714285715e-06,
+      "loss": 0.4327,
+      "step": 1325
+    },
+    {
+      "epoch": 168.8,
+      "grad_norm": 2.0730693340301514,
+      "learning_rate": 7.5828571428571444e-06,
+      "loss": 0.4348,
+      "step": 1350
+    },
+    {
+      "epoch": 171.93333333333334,
+      "grad_norm": 2.0838747024536133,
+      "learning_rate": 7.511428571428572e-06,
+      "loss": 0.4393,
+      "step": 1375
+    },
+    {
+      "epoch": 175.0,
+      "grad_norm": 4.3804030418396,
+      "learning_rate": 7.440000000000001e-06,
+      "loss": 0.4386,
+      "step": 1400
+    },
+    {
+      "epoch": 178.13333333333333,
+      "grad_norm": 1.8927189111709595,
+      "learning_rate": 7.36857142857143e-06,
+      "loss": 0.4318,
+      "step": 1425
+    },
+    {
+      "epoch": 181.26666666666668,
+      "grad_norm": 1.5456620454788208,
+      "learning_rate": 7.297142857142858e-06,
+      "loss": 0.4336,
+      "step": 1450
+    },
+    {
+      "epoch": 184.4,
+      "grad_norm": 2.722612142562866,
+      "learning_rate": 7.225714285714286e-06,
+      "loss": 0.4281,
+      "step": 1475
+    },
+    {
+      "epoch": 187.53333333333333,
+      "grad_norm": 1.9484314918518066,
+      "learning_rate": 7.154285714285715e-06,
+      "loss": 0.4312,
+      "step": 1500
+    },
+    {
+      "epoch": 190.66666666666666,
+      "grad_norm": 2.101043224334717,
+      "learning_rate": 7.082857142857143e-06,
+      "loss": 0.427,
+      "step": 1525
+    },
+    {
+      "epoch": 193.8,
+      "grad_norm": 1.9785490036010742,
+      "learning_rate": 7.011428571428572e-06,
+      "loss": 0.4298,
+      "step": 1550
+    },
+    {
+      "epoch": 196.93333333333334,
+      "grad_norm": 2.319054126739502,
+      "learning_rate": 6.9400000000000005e-06,
+      "loss": 0.4376,
+      "step": 1575
+    },
+    {
+      "epoch": 200.0,
+      "grad_norm": 1.3612741231918335,
+      "learning_rate": 6.868571428571429e-06,
+      "loss": 0.4217,
+      "step": 1600
+    },
+    {
+      "epoch": 203.13333333333333,
+      "grad_norm": 2.128363847732544,
+      "learning_rate": 6.797142857142858e-06,
+      "loss": 0.4217,
+      "step": 1625
+    },
+    {
+      "epoch": 206.26666666666668,
+      "grad_norm": 1.7985234260559082,
+      "learning_rate": 6.725714285714287e-06,
+      "loss": 0.4147,
+      "step": 1650
+    },
+    {
+      "epoch": 209.4,
+      "grad_norm": 1.3478573560714722,
+      "learning_rate": 6.654285714285716e-06,
+      "loss": 0.4357,
+      "step": 1675
+    },
+    {
+      "epoch": 212.53333333333333,
+      "grad_norm": 1.5389248132705688,
+      "learning_rate": 6.582857142857143e-06,
+      "loss": 0.419,
+      "step": 1700
+    },
+    {
+      "epoch": 215.66666666666666,
+      "grad_norm": 1.9558783769607544,
+      "learning_rate": 6.511428571428572e-06,
+      "loss": 0.4289,
+      "step": 1725
+    },
+    {
+      "epoch": 218.8,
+      "grad_norm": 1.756585955619812,
+      "learning_rate": 6.440000000000001e-06,
+      "loss": 0.4168,
+      "step": 1750
+    },
+    {
+      "epoch": 221.93333333333334,
+      "grad_norm": 1.8744903802871704,
+      "learning_rate": 6.368571428571429e-06,
+      "loss": 0.4296,
+      "step": 1775
+    },
+    {
+      "epoch": 225.0,
+      "grad_norm": 1.133415699005127,
+      "learning_rate": 6.297142857142857e-06,
+      "loss": 0.4162,
+      "step": 1800
+    },
+    {
+      "epoch": 228.13333333333333,
+      "grad_norm": 2.819840908050537,
+      "learning_rate": 6.225714285714286e-06,
+      "loss": 0.4275,
+      "step": 1825
+    },
+    {
+      "epoch": 231.26666666666668,
+      "grad_norm": 1.5150210857391357,
+      "learning_rate": 6.1542857142857145e-06,
+      "loss": 0.4244,
+      "step": 1850
+    },
+    {
+      "epoch": 234.4,
+      "grad_norm": 2.184819459915161,
+      "learning_rate": 6.0828571428571435e-06,
+      "loss": 0.4282,
+      "step": 1875
+    },
+    {
+      "epoch": 237.53333333333333,
+      "grad_norm": 3.293454170227051,
+      "learning_rate": 6.011428571428572e-06,
+      "loss": 0.4215,
+      "step": 1900
+    },
+    {
+      "epoch": 240.66666666666666,
+      "grad_norm": 1.210433006286621,
+      "learning_rate": 5.94e-06,
+      "loss": 0.4103,
+      "step": 1925
+    },
+    {
+      "epoch": 243.8,
+      "grad_norm": 2.5027923583984375,
+      "learning_rate": 5.868571428571429e-06,
+      "loss": 0.4186,
+      "step": 1950
+    },
+    {
+      "epoch": 246.93333333333334,
+      "grad_norm": 1.9649789333343506,
+      "learning_rate": 5.797142857142858e-06,
+      "loss": 0.427,
+      "step": 1975
+    },
+    {
+      "epoch": 250.0,
+      "grad_norm": 5.899420261383057,
+      "learning_rate": 5.725714285714287e-06,
+      "loss": 0.4113,
+      "step": 2000
+    },
+    {
+      "epoch": 250.0,
+      "eval_loss": 0.4833647906780243,
+      "eval_runtime": 0.7095,
+      "eval_samples_per_second": 36.646,
+      "eval_steps_per_second": 5.638,
+      "step": 2000
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 4000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 572,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5140893067410672.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-2000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:07e653cd3b83482a0939ee4c8a207df9a996f44ce9dd82197c4ab6cde60cf2bb
+size 5432

checkpoint-3000/added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "<ctc_blank>": 80,
+  "<mask>": 79
+}

checkpoint-3000/config.json ADDED Viewed

	@@ -0,0 +1,91 @@

+{
+  "activation_dropout": 0.1,
+  "apply_spec_augment": true,
+  "architectures": [
+    "SpeechT5ForTextToSpeech"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.1,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 2,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.1,
+  "encoder_layers": 12,
+  "encoder_max_relative_position": 160,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.0,
+  "guided_attention_loss_num_heads": 2,
+  "guided_attention_loss_scale": 10.0,
+  "guided_attention_loss_sigma": 0.4,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "is_encoder_decoder": true,
+  "layer_norm_eps": 1e-05,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": null,
+  "max_speech_positions": 1876,
+  "max_text_positions": 600,
+  "model_type": "speecht5",
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_mel_bins": 80,
+  "pad_token_id": 1,
+  "positional_dropout": 0.1,
+  "reduction_factor": 2,
+  "scale_embedding": false,
+  "speaker_embedding_dim": 512,
+  "speech_decoder_postnet_dropout": 0.5,
+  "speech_decoder_postnet_kernel": 5,
+  "speech_decoder_postnet_layers": 5,
+  "speech_decoder_postnet_units": 256,
+  "speech_decoder_prenet_dropout": 0.5,
+  "speech_decoder_prenet_layers": 2,
+  "speech_decoder_prenet_units": 256,
+  "torch_dtype": "float32",
+  "transformers_version": "4.52.0.dev0",
+  "use_cache": false,
+  "use_guided_attention_loss": true,
+  "vocab_size": 81
+}

checkpoint-3000/generation_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "decoder_start_token_id": 2,
+  "eos_token_id": 2,
+  "max_length": 1876,
+  "pad_token_id": 1,
+  "transformers_version": "4.52.0.dev0"
+}

checkpoint-3000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d78d191d64b057ef7708236443ccd41bb24d44484f04b36d4fd46df31daa1c6
+size 577789320

checkpoint-3000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:abb1e34922901fa49e90b622d421fcc022123b3db879e5c48cdc697dd3a9c2d3
+size 1155772233

checkpoint-3000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a258daad5ac5df9273072647bd5fccfa416cdadb91b7707278c61cc1145a5964
+size 14244

checkpoint-3000/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0c9142a3b98e645e9dc3ffae8c602fb70b74046fea7664e6d081ebb3d0bbb58
+size 988

checkpoint-3000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1ff56efc76c16a3b9a712527179ae61c8d6dfccc7e3a53f8c421d6329adacfbb
+size 1064

checkpoint-3000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

checkpoint-3000/spm_char.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7fcc48f3e225f627b1641db410ceb0c8649bd2b0c982e150b03f8be3728ab560
+size 238473

checkpoint-3000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "79": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "80": {
+      "content": "<ctc_blank>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 600,
+  "normalize": false,
+  "pad_token": "<pad>",
+  "processor_class": "SpeechT5Processor",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "SpeechT5Tokenizer",
+  "unk_token": "<unk>"
+}

checkpoint-3000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,898 @@

+{
+  "best_global_step": 3000,
+  "best_metric": 0.46799278259277344,
+  "best_model_checkpoint": "./speecht5_tts_mabama/checkpoint-3000",
+  "epoch": 375.0,
+  "eval_steps": 1000,
+  "global_step": 3000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 3.1333333333333333,
+      "grad_norm": 13.092179298400879,
+      "learning_rate": 4.2000000000000006e-07,
+      "loss": 1.0978,
+      "step": 25
+    },
+    {
+      "epoch": 6.266666666666667,
+      "grad_norm": 13.538804054260254,
+      "learning_rate": 9.200000000000001e-07,
+      "loss": 1.0057,
+      "step": 50
+    },
+    {
+      "epoch": 9.4,
+      "grad_norm": 3.8923733234405518,
+      "learning_rate": 1.42e-06,
+      "loss": 0.8155,
+      "step": 75
+    },
+    {
+      "epoch": 12.533333333333333,
+      "grad_norm": 2.569746732711792,
+      "learning_rate": 1.9200000000000003e-06,
+      "loss": 0.7921,
+      "step": 100
+    },
+    {
+      "epoch": 15.666666666666666,
+      "grad_norm": 2.390493631362915,
+      "learning_rate": 2.42e-06,
+      "loss": 0.7531,
+      "step": 125
+    },
+    {
+      "epoch": 18.8,
+      "grad_norm": 2.7168779373168945,
+      "learning_rate": 2.92e-06,
+      "loss": 0.7393,
+      "step": 150
+    },
+    {
+      "epoch": 21.933333333333334,
+      "grad_norm": 10.27633285522461,
+      "learning_rate": 3.4200000000000007e-06,
+      "loss": 0.7292,
+      "step": 175
+    },
+    {
+      "epoch": 25.0,
+      "grad_norm": 5.6921000480651855,
+      "learning_rate": 3.920000000000001e-06,
+      "loss": 0.6642,
+      "step": 200
+    },
+    {
+      "epoch": 28.133333333333333,
+      "grad_norm": 2.6206777095794678,
+      "learning_rate": 4.42e-06,
+      "loss": 0.6555,
+      "step": 225
+    },
+    {
+      "epoch": 31.266666666666666,
+      "grad_norm": 1.9396028518676758,
+      "learning_rate": 4.92e-06,
+      "loss": 0.6484,
+      "step": 250
+    },
+    {
+      "epoch": 34.4,
+      "grad_norm": 3.44437575340271,
+      "learning_rate": 5.420000000000001e-06,
+      "loss": 0.6414,
+      "step": 275
+    },
+    {
+      "epoch": 37.53333333333333,
+      "grad_norm": 2.729497194290161,
+      "learning_rate": 5.92e-06,
+      "loss": 0.6323,
+      "step": 300
+    },
+    {
+      "epoch": 40.666666666666664,
+      "grad_norm": 2.3852877616882324,
+      "learning_rate": 6.42e-06,
+      "loss": 0.6073,
+      "step": 325
+    },
+    {
+      "epoch": 43.8,
+      "grad_norm": 4.4287109375,
+      "learning_rate": 6.92e-06,
+      "loss": 0.6034,
+      "step": 350
+    },
+    {
+      "epoch": 46.93333333333333,
+      "grad_norm": 2.1653966903686523,
+      "learning_rate": 7.420000000000001e-06,
+      "loss": 0.5865,
+      "step": 375
+    },
+    {
+      "epoch": 50.0,
+      "grad_norm": 2.8120265007019043,
+      "learning_rate": 7.92e-06,
+      "loss": 0.5556,
+      "step": 400
+    },
+    {
+      "epoch": 53.13333333333333,
+      "grad_norm": 2.0973806381225586,
+      "learning_rate": 8.42e-06,
+      "loss": 0.5416,
+      "step": 425
+    },
+    {
+      "epoch": 56.266666666666666,
+      "grad_norm": 2.6723616123199463,
+      "learning_rate": 8.920000000000001e-06,
+      "loss": 0.5407,
+      "step": 450
+    },
+    {
+      "epoch": 59.4,
+      "grad_norm": 2.1810383796691895,
+      "learning_rate": 9.42e-06,
+      "loss": 0.5174,
+      "step": 475
+    },
+    {
+      "epoch": 62.53333333333333,
+      "grad_norm": 3.464071750640869,
+      "learning_rate": 9.920000000000002e-06,
+      "loss": 0.5327,
+      "step": 500
+    },
+    {
+      "epoch": 65.66666666666667,
+      "grad_norm": 3.6148977279663086,
+      "learning_rate": 9.940000000000001e-06,
+      "loss": 0.5141,
+      "step": 525
+    },
+    {
+      "epoch": 68.8,
+      "grad_norm": 2.5631027221679688,
+      "learning_rate": 9.86857142857143e-06,
+      "loss": 0.5246,
+      "step": 550
+    },
+    {
+      "epoch": 71.93333333333334,
+      "grad_norm": 2.058468818664551,
+      "learning_rate": 9.797142857142858e-06,
+      "loss": 0.5065,
+      "step": 575
+    },
+    {
+      "epoch": 75.0,
+      "grad_norm": 1.7559466361999512,
+      "learning_rate": 9.725714285714287e-06,
+      "loss": 0.4871,
+      "step": 600
+    },
+    {
+      "epoch": 78.13333333333334,
+      "grad_norm": 2.653345823287964,
+      "learning_rate": 9.654285714285716e-06,
+      "loss": 0.4941,
+      "step": 625
+    },
+    {
+      "epoch": 81.26666666666667,
+      "grad_norm": 2.612226724624634,
+      "learning_rate": 9.582857142857143e-06,
+      "loss": 0.4796,
+      "step": 650
+    },
+    {
+      "epoch": 84.4,
+      "grad_norm": 1.7446099519729614,
+      "learning_rate": 9.511428571428572e-06,
+      "loss": 0.487,
+      "step": 675
+    },
+    {
+      "epoch": 87.53333333333333,
+      "grad_norm": 2.627315044403076,
+      "learning_rate": 9.440000000000001e-06,
+      "loss": 0.4731,
+      "step": 700
+    },
+    {
+      "epoch": 90.66666666666667,
+      "grad_norm": 2.4315383434295654,
+      "learning_rate": 9.368571428571428e-06,
+      "loss": 0.4812,
+      "step": 725
+    },
+    {
+      "epoch": 93.8,
+      "grad_norm": 2.4056336879730225,
+      "learning_rate": 9.297142857142857e-06,
+      "loss": 0.468,
+      "step": 750
+    },
+    {
+      "epoch": 96.93333333333334,
+      "grad_norm": 2.153116464614868,
+      "learning_rate": 9.225714285714286e-06,
+      "loss": 0.4829,
+      "step": 775
+    },
+    {
+      "epoch": 100.0,
+      "grad_norm": 2.9421756267547607,
+      "learning_rate": 9.154285714285715e-06,
+      "loss": 0.4555,
+      "step": 800
+    },
+    {
+      "epoch": 103.13333333333334,
+      "grad_norm": 1.6771883964538574,
+      "learning_rate": 9.082857142857143e-06,
+      "loss": 0.462,
+      "step": 825
+    },
+    {
+      "epoch": 106.26666666666667,
+      "grad_norm": 2.9711899757385254,
+      "learning_rate": 9.011428571428572e-06,
+      "loss": 0.471,
+      "step": 850
+    },
+    {
+      "epoch": 109.4,
+      "grad_norm": 1.922980546951294,
+      "learning_rate": 8.94e-06,
+      "loss": 0.4673,
+      "step": 875
+    },
+    {
+      "epoch": 112.53333333333333,
+      "grad_norm": 2.49945068359375,
+      "learning_rate": 8.86857142857143e-06,
+      "loss": 0.4611,
+      "step": 900
+    },
+    {
+      "epoch": 115.66666666666667,
+      "grad_norm": 2.646510362625122,
+      "learning_rate": 8.797142857142857e-06,
+      "loss": 0.4574,
+      "step": 925
+    },
+    {
+      "epoch": 118.8,
+      "grad_norm": 1.7943354845046997,
+      "learning_rate": 8.725714285714286e-06,
+      "loss": 0.4658,
+      "step": 950
+    },
+    {
+      "epoch": 121.93333333333334,
+      "grad_norm": 2.171827793121338,
+      "learning_rate": 8.654285714285715e-06,
+      "loss": 0.4561,
+      "step": 975
+    },
+    {
+      "epoch": 125.0,
+      "grad_norm": 7.516489505767822,
+      "learning_rate": 8.582857142857144e-06,
+      "loss": 0.4472,
+      "step": 1000
+    },
+    {
+      "epoch": 125.0,
+      "eval_loss": 0.5154594779014587,
+      "eval_runtime": 0.7837,
+      "eval_samples_per_second": 33.175,
+      "eval_steps_per_second": 5.104,
+      "step": 1000
+    },
+    {
+      "epoch": 128.13333333333333,
+      "grad_norm": 2.5167343616485596,
+      "learning_rate": 8.511428571428571e-06,
+      "loss": 0.457,
+      "step": 1025
+    },
+    {
+      "epoch": 131.26666666666668,
+      "grad_norm": 3.3089983463287354,
+      "learning_rate": 8.44e-06,
+      "loss": 0.4456,
+      "step": 1050
+    },
+    {
+      "epoch": 134.4,
+      "grad_norm": 2.778348445892334,
+      "learning_rate": 8.36857142857143e-06,
+      "loss": 0.4612,
+      "step": 1075
+    },
+    {
+      "epoch": 137.53333333333333,
+      "grad_norm": 2.529778480529785,
+      "learning_rate": 8.297142857142859e-06,
+      "loss": 0.4429,
+      "step": 1100
+    },
+    {
+      "epoch": 140.66666666666666,
+      "grad_norm": 1.76685631275177,
+      "learning_rate": 8.225714285714288e-06,
+      "loss": 0.4399,
+      "step": 1125
+    },
+    {
+      "epoch": 143.8,
+      "grad_norm": 1.8449666500091553,
+      "learning_rate": 8.154285714285715e-06,
+      "loss": 0.4329,
+      "step": 1150
+    },
+    {
+      "epoch": 146.93333333333334,
+      "grad_norm": 1.9097468852996826,
+      "learning_rate": 8.082857142857144e-06,
+      "loss": 0.4527,
+      "step": 1175
+    },
+    {
+      "epoch": 150.0,
+      "grad_norm": 3.892838716506958,
+      "learning_rate": 8.011428571428573e-06,
+      "loss": 0.4448,
+      "step": 1200
+    },
+    {
+      "epoch": 153.13333333333333,
+      "grad_norm": 2.1518826484680176,
+      "learning_rate": 7.94e-06,
+      "loss": 0.4412,
+      "step": 1225
+    },
+    {
+      "epoch": 156.26666666666668,
+      "grad_norm": 1.5322662591934204,
+      "learning_rate": 7.86857142857143e-06,
+      "loss": 0.4388,
+      "step": 1250
+    },
+    {
+      "epoch": 159.4,
+      "grad_norm": 1.4961107969284058,
+      "learning_rate": 7.797142857142858e-06,
+      "loss": 0.4363,
+      "step": 1275
+    },
+    {
+      "epoch": 162.53333333333333,
+      "grad_norm": 1.8992841243743896,
+      "learning_rate": 7.725714285714286e-06,
+      "loss": 0.4474,
+      "step": 1300
+    },
+    {
+      "epoch": 165.66666666666666,
+      "grad_norm": 1.5015554428100586,
+      "learning_rate": 7.654285714285715e-06,
+      "loss": 0.4327,
+      "step": 1325
+    },
+    {
+      "epoch": 168.8,
+      "grad_norm": 2.0730693340301514,
+      "learning_rate": 7.5828571428571444e-06,
+      "loss": 0.4348,
+      "step": 1350
+    },
+    {
+      "epoch": 171.93333333333334,
+      "grad_norm": 2.0838747024536133,
+      "learning_rate": 7.511428571428572e-06,
+      "loss": 0.4393,
+      "step": 1375
+    },
+    {
+      "epoch": 175.0,
+      "grad_norm": 4.3804030418396,
+      "learning_rate": 7.440000000000001e-06,
+      "loss": 0.4386,
+      "step": 1400
+    },
+    {
+      "epoch": 178.13333333333333,
+      "grad_norm": 1.8927189111709595,
+      "learning_rate": 7.36857142857143e-06,
+      "loss": 0.4318,
+      "step": 1425
+    },
+    {
+      "epoch": 181.26666666666668,
+      "grad_norm": 1.5456620454788208,
+      "learning_rate": 7.297142857142858e-06,
+      "loss": 0.4336,
+      "step": 1450
+    },
+    {
+      "epoch": 184.4,
+      "grad_norm": 2.722612142562866,
+      "learning_rate": 7.225714285714286e-06,
+      "loss": 0.4281,
+      "step": 1475
+    },
+    {
+      "epoch": 187.53333333333333,
+      "grad_norm": 1.9484314918518066,
+      "learning_rate": 7.154285714285715e-06,
+      "loss": 0.4312,
+      "step": 1500
+    },
+    {
+      "epoch": 190.66666666666666,
+      "grad_norm": 2.101043224334717,
+      "learning_rate": 7.082857142857143e-06,
+      "loss": 0.427,
+      "step": 1525
+    },
+    {
+      "epoch": 193.8,
+      "grad_norm": 1.9785490036010742,
+      "learning_rate": 7.011428571428572e-06,
+      "loss": 0.4298,
+      "step": 1550
+    },
+    {
+      "epoch": 196.93333333333334,
+      "grad_norm": 2.319054126739502,
+      "learning_rate": 6.9400000000000005e-06,
+      "loss": 0.4376,
+      "step": 1575
+    },
+    {
+      "epoch": 200.0,
+      "grad_norm": 1.3612741231918335,
+      "learning_rate": 6.868571428571429e-06,
+      "loss": 0.4217,
+      "step": 1600
+    },
+    {
+      "epoch": 203.13333333333333,
+      "grad_norm": 2.128363847732544,
+      "learning_rate": 6.797142857142858e-06,
+      "loss": 0.4217,
+      "step": 1625
+    },
+    {
+      "epoch": 206.26666666666668,
+      "grad_norm": 1.7985234260559082,
+      "learning_rate": 6.725714285714287e-06,
+      "loss": 0.4147,
+      "step": 1650
+    },
+    {
+      "epoch": 209.4,
+      "grad_norm": 1.3478573560714722,
+      "learning_rate": 6.654285714285716e-06,
+      "loss": 0.4357,
+      "step": 1675
+    },
+    {
+      "epoch": 212.53333333333333,
+      "grad_norm": 1.5389248132705688,
+      "learning_rate": 6.582857142857143e-06,
+      "loss": 0.419,
+      "step": 1700
+    },
+    {
+      "epoch": 215.66666666666666,
+      "grad_norm": 1.9558783769607544,
+      "learning_rate": 6.511428571428572e-06,
+      "loss": 0.4289,
+      "step": 1725
+    },
+    {
+      "epoch": 218.8,
+      "grad_norm": 1.756585955619812,
+      "learning_rate": 6.440000000000001e-06,
+      "loss": 0.4168,
+      "step": 1750
+    },
+    {
+      "epoch": 221.93333333333334,
+      "grad_norm": 1.8744903802871704,
+      "learning_rate": 6.368571428571429e-06,
+      "loss": 0.4296,
+      "step": 1775
+    },
+    {
+      "epoch": 225.0,
+      "grad_norm": 1.133415699005127,
+      "learning_rate": 6.297142857142857e-06,
+      "loss": 0.4162,
+      "step": 1800
+    },
+    {
+      "epoch": 228.13333333333333,
+      "grad_norm": 2.819840908050537,
+      "learning_rate": 6.225714285714286e-06,
+      "loss": 0.4275,
+      "step": 1825
+    },
+    {
+      "epoch": 231.26666666666668,
+      "grad_norm": 1.5150210857391357,
+      "learning_rate": 6.1542857142857145e-06,
+      "loss": 0.4244,
+      "step": 1850
+    },
+    {
+      "epoch": 234.4,
+      "grad_norm": 2.184819459915161,
+      "learning_rate": 6.0828571428571435e-06,
+      "loss": 0.4282,
+      "step": 1875
+    },
+    {
+      "epoch": 237.53333333333333,
+      "grad_norm": 3.293454170227051,
+      "learning_rate": 6.011428571428572e-06,
+      "loss": 0.4215,
+      "step": 1900
+    },
+    {
+      "epoch": 240.66666666666666,
+      "grad_norm": 1.210433006286621,
+      "learning_rate": 5.94e-06,
+      "loss": 0.4103,
+      "step": 1925
+    },
+    {
+      "epoch": 243.8,
+      "grad_norm": 2.5027923583984375,
+      "learning_rate": 5.868571428571429e-06,
+      "loss": 0.4186,
+      "step": 1950
+    },
+    {
+      "epoch": 246.93333333333334,
+      "grad_norm": 1.9649789333343506,
+      "learning_rate": 5.797142857142858e-06,
+      "loss": 0.427,
+      "step": 1975
+    },
+    {
+      "epoch": 250.0,
+      "grad_norm": 5.899420261383057,
+      "learning_rate": 5.725714285714287e-06,
+      "loss": 0.4113,
+      "step": 2000
+    },
+    {
+      "epoch": 250.0,
+      "eval_loss": 0.4833647906780243,
+      "eval_runtime": 0.7095,
+      "eval_samples_per_second": 36.646,
+      "eval_steps_per_second": 5.638,
+      "step": 2000
+    },
+    {
+      "epoch": 253.13333333333333,
+      "grad_norm": 2.0845134258270264,
+      "learning_rate": 5.654285714285714e-06,
+      "loss": 0.4168,
+      "step": 2025
+    },
+    {
+      "epoch": 256.26666666666665,
+      "grad_norm": 1.3729593753814697,
+      "learning_rate": 5.582857142857143e-06,
+      "loss": 0.4099,
+      "step": 2050
+    },
+    {
+      "epoch": 259.4,
+      "grad_norm": 1.8317629098892212,
+      "learning_rate": 5.511428571428572e-06,
+      "loss": 0.4136,
+      "step": 2075
+    },
+    {
+      "epoch": 262.53333333333336,
+      "grad_norm": 1.6238123178482056,
+      "learning_rate": 5.4400000000000004e-06,
+      "loss": 0.4204,
+      "step": 2100
+    },
+    {
+      "epoch": 265.6666666666667,
+      "grad_norm": 1.6968961954116821,
+      "learning_rate": 5.368571428571429e-06,
+      "loss": 0.4119,
+      "step": 2125
+    },
+    {
+      "epoch": 268.8,
+      "grad_norm": 2.1868855953216553,
+      "learning_rate": 5.297142857142858e-06,
+      "loss": 0.4114,
+      "step": 2150
+    },
+    {
+      "epoch": 271.93333333333334,
+      "grad_norm": 1.3070896863937378,
+      "learning_rate": 5.225714285714286e-06,
+      "loss": 0.4108,
+      "step": 2175
+    },
+    {
+      "epoch": 275.0,
+      "grad_norm": 1.977940559387207,
+      "learning_rate": 5.154285714285715e-06,
+      "loss": 0.4045,
+      "step": 2200
+    },
+    {
+      "epoch": 278.1333333333333,
+      "grad_norm": 1.6485978364944458,
+      "learning_rate": 5.082857142857144e-06,
+      "loss": 0.4119,
+      "step": 2225
+    },
+    {
+      "epoch": 281.26666666666665,
+      "grad_norm": 1.9459550380706787,
+      "learning_rate": 5.011428571428571e-06,
+      "loss": 0.411,
+      "step": 2250
+    },
+    {
+      "epoch": 284.4,
+      "grad_norm": 1.5531017780303955,
+      "learning_rate": 4.94e-06,
+      "loss": 0.4083,
+      "step": 2275
+    },
+    {
+      "epoch": 287.53333333333336,
+      "grad_norm": 1.232640027999878,
+      "learning_rate": 4.868571428571429e-06,
+      "loss": 0.4121,
+      "step": 2300
+    },
+    {
+      "epoch": 290.6666666666667,
+      "grad_norm": 7.107569217681885,
+      "learning_rate": 4.800000000000001e-06,
+      "loss": 0.4013,
+      "step": 2325
+    },
+    {
+      "epoch": 293.8,
+      "grad_norm": 1.387934684753418,
+      "learning_rate": 4.728571428571429e-06,
+      "loss": 0.4135,
+      "step": 2350
+    },
+    {
+      "epoch": 296.93333333333334,
+      "grad_norm": 1.8122384548187256,
+      "learning_rate": 4.657142857142857e-06,
+      "loss": 0.4025,
+      "step": 2375
+    },
+    {
+      "epoch": 300.0,
+      "grad_norm": 3.2206528186798096,
+      "learning_rate": 4.585714285714286e-06,
+      "loss": 0.4055,
+      "step": 2400
+    },
+    {
+      "epoch": 303.1333333333333,
+      "grad_norm": 1.6222842931747437,
+      "learning_rate": 4.514285714285714e-06,
+      "loss": 0.4125,
+      "step": 2425
+    },
+    {
+      "epoch": 306.26666666666665,
+      "grad_norm": 1.4375584125518799,
+      "learning_rate": 4.442857142857143e-06,
+      "loss": 0.4033,
+      "step": 2450
+    },
+    {
+      "epoch": 309.4,
+      "grad_norm": 1.173034906387329,
+      "learning_rate": 4.371428571428572e-06,
+      "loss": 0.4081,
+      "step": 2475
+    },
+    {
+      "epoch": 312.53333333333336,
+      "grad_norm": 1.9508713483810425,
+      "learning_rate": 4.3e-06,
+      "loss": 0.4126,
+      "step": 2500
+    },
+    {
+      "epoch": 315.6666666666667,
+      "grad_norm": 1.6111533641815186,
+      "learning_rate": 4.228571428571429e-06,
+      "loss": 0.3956,
+      "step": 2525
+    },
+    {
+      "epoch": 318.8,
+      "grad_norm": 2.0711958408355713,
+      "learning_rate": 4.1571428571428575e-06,
+      "loss": 0.4079,
+      "step": 2550
+    },
+    {
+      "epoch": 321.93333333333334,
+      "grad_norm": 2.312619924545288,
+      "learning_rate": 4.0857142857142865e-06,
+      "loss": 0.4172,
+      "step": 2575
+    },
+    {
+      "epoch": 325.0,
+      "grad_norm": 8.329635620117188,
+      "learning_rate": 4.014285714285715e-06,
+      "loss": 0.3956,
+      "step": 2600
+    },
+    {
+      "epoch": 328.1333333333333,
+      "grad_norm": 6.655773639678955,
+      "learning_rate": 3.942857142857143e-06,
+      "loss": 0.3998,
+      "step": 2625
+    },
+    {
+      "epoch": 331.26666666666665,
+      "grad_norm": 1.7531079053878784,
+      "learning_rate": 3.871428571428572e-06,
+      "loss": 0.4023,
+      "step": 2650
+    },
+    {
+      "epoch": 334.4,
+      "grad_norm": 2.5502614974975586,
+      "learning_rate": 3.8000000000000005e-06,
+      "loss": 0.4026,
+      "step": 2675
+    },
+    {
+      "epoch": 337.53333333333336,
+      "grad_norm": 1.471871256828308,
+      "learning_rate": 3.7285714285714286e-06,
+      "loss": 0.3981,
+      "step": 2700
+    },
+    {
+      "epoch": 340.6666666666667,
+      "grad_norm": 2.094290018081665,
+      "learning_rate": 3.6571428571428576e-06,
+      "loss": 0.4006,
+      "step": 2725
+    },
+    {
+      "epoch": 343.8,
+      "grad_norm": 1.3232810497283936,
+      "learning_rate": 3.5857142857142862e-06,
+      "loss": 0.4013,
+      "step": 2750
+    },
+    {
+      "epoch": 346.93333333333334,
+      "grad_norm": 1.5902683734893799,
+      "learning_rate": 3.5142857142857144e-06,
+      "loss": 0.4042,
+      "step": 2775
+    },
+    {
+      "epoch": 350.0,
+      "grad_norm": 5.186419486999512,
+      "learning_rate": 3.4428571428571434e-06,
+      "loss": 0.3843,
+      "step": 2800
+    },
+    {
+      "epoch": 353.1333333333333,
+      "grad_norm": 2.3405115604400635,
+      "learning_rate": 3.3714285714285716e-06,
+      "loss": 0.409,
+      "step": 2825
+    },
+    {
+      "epoch": 356.26666666666665,
+      "grad_norm": 1.1804980039596558,
+      "learning_rate": 3.3000000000000006e-06,
+      "loss": 0.405,
+      "step": 2850
+    },
+    {
+      "epoch": 359.4,
+      "grad_norm": 1.596712589263916,
+      "learning_rate": 3.2285714285714288e-06,
+      "loss": 0.4098,
+      "step": 2875
+    },
+    {
+      "epoch": 362.53333333333336,
+      "grad_norm": 1.9429064989089966,
+      "learning_rate": 3.1571428571428573e-06,
+      "loss": 0.413,
+      "step": 2900
+    },
+    {
+      "epoch": 365.6666666666667,
+      "grad_norm": 1.3636008501052856,
+      "learning_rate": 3.085714285714286e-06,
+      "loss": 0.394,
+      "step": 2925
+    },
+    {
+      "epoch": 368.8,
+      "grad_norm": 1.2349225282669067,
+      "learning_rate": 3.0142857142857145e-06,
+      "loss": 0.3964,
+      "step": 2950
+    },
+    {
+      "epoch": 371.93333333333334,
+      "grad_norm": 1.3793219327926636,
+      "learning_rate": 2.9428571428571427e-06,
+      "loss": 0.3958,
+      "step": 2975
+    },
+    {
+      "epoch": 375.0,
+      "grad_norm": 7.785330772399902,
+      "learning_rate": 2.8714285714285717e-06,
+      "loss": 0.3906,
+      "step": 3000
+    },
+    {
+      "epoch": 375.0,
+      "eval_loss": 0.46799278259277344,
+      "eval_runtime": 0.7043,
+      "eval_samples_per_second": 36.916,
+      "eval_steps_per_second": 5.679,
+      "step": 3000
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 4000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 572,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 7721257243235184.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-3000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:07e653cd3b83482a0939ee4c8a207df9a996f44ce9dd82197c4ab6cde60cf2bb
+size 5432

checkpoint-4000/added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "<ctc_blank>": 80,
+  "<mask>": 79
+}

checkpoint-4000/config.json ADDED Viewed

	@@ -0,0 +1,91 @@

+{
+  "activation_dropout": 0.1,
+  "apply_spec_augment": true,
+  "architectures": [
+    "SpeechT5ForTextToSpeech"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.1,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 2,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.1,
+  "encoder_layers": 12,
+  "encoder_max_relative_position": 160,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.0,
+  "guided_attention_loss_num_heads": 2,
+  "guided_attention_loss_scale": 10.0,
+  "guided_attention_loss_sigma": 0.4,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "is_encoder_decoder": true,
+  "layer_norm_eps": 1e-05,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": null,
+  "max_speech_positions": 1876,
+  "max_text_positions": 600,
+  "model_type": "speecht5",
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_mel_bins": 80,
+  "pad_token_id": 1,
+  "positional_dropout": 0.1,
+  "reduction_factor": 2,
+  "scale_embedding": false,
+  "speaker_embedding_dim": 512,
+  "speech_decoder_postnet_dropout": 0.5,
+  "speech_decoder_postnet_kernel": 5,
+  "speech_decoder_postnet_layers": 5,
+  "speech_decoder_postnet_units": 256,
+  "speech_decoder_prenet_dropout": 0.5,
+  "speech_decoder_prenet_layers": 2,
+  "speech_decoder_prenet_units": 256,
+  "torch_dtype": "float32",
+  "transformers_version": "4.52.0.dev0",
+  "use_cache": false,
+  "use_guided_attention_loss": true,
+  "vocab_size": 81
+}

checkpoint-4000/generation_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "decoder_start_token_id": 2,
+  "eos_token_id": 2,
+  "max_length": 1876,
+  "pad_token_id": 1,
+  "transformers_version": "4.52.0.dev0"
+}

checkpoint-4000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:055e59911edf63563dfc4431a4301a869cc74effda4ba5c905ace376e831bd5d
+size 577789320

checkpoint-4000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e74b50ecc65d621a0c3fd3c8d0516e0345843175eb2a55af482c69d69da162e7
+size 1155772233

checkpoint-4000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:75ff95056306e75aaca85257572ca65ded44b3fc874ae842724682d1ad4067c2
+size 14244

checkpoint-4000/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:030f559b5aedef78935dd7632eb67ec4527791e9aca3eb758b902243f597abd2
+size 988

checkpoint-4000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7efbf80617c96c78286826ce59d9a12c86da62d7631874b3d6364a8e993ada60
+size 1064

checkpoint-4000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}