diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7dbaed370958b5fc1a24a8c07fe26b1b7f462d9c --- /dev/null +++ b/README.md @@ -0,0 +1,8 @@ +--- +tags: +- text-to-speech +- speecht5 +- mabama # Make sure no empty tags exist +library_name: transformers +license: mit +--- \ No newline at end of file diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cd5b477a9075c49d99de65622db37bb06a251985 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,4 @@ +{ + "": 80, + "": 79 +} diff --git a/checkpoint-1000/added_tokens.json b/checkpoint-1000/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cd5b477a9075c49d99de65622db37bb06a251985 --- /dev/null +++ b/checkpoint-1000/added_tokens.json @@ -0,0 +1,4 @@ +{ + "": 80, + "": 79 +} diff --git a/checkpoint-1000/config.json b/checkpoint-1000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..11c8ec6e9cb5a360feb11650e24ba1adc08e8d0e --- /dev/null +++ b/checkpoint-1000/config.json @@ -0,0 +1,91 @@ +{ + "activation_dropout": 0.1, + "apply_spec_augment": true, + "architectures": [ + "SpeechT5ForTextToSpeech" + ], + "attention_dropout": 0.1, + "bos_token_id": 0, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.1, + "decoder_layers": 6, + "decoder_start_token_id": 2, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.1, + "encoder_layers": 12, + "encoder_max_relative_position": 160, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "guided_attention_loss_num_heads": 2, + "guided_attention_loss_scale": 10.0, + "guided_attention_loss_sigma": 0.4, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "is_encoder_decoder": true, + "layer_norm_eps": 1e-05, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": null, + "max_speech_positions": 1876, + "max_text_positions": 600, + "model_type": "speecht5", + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_mel_bins": 80, + "pad_token_id": 1, + "positional_dropout": 0.1, + "reduction_factor": 2, + "scale_embedding": false, + "speaker_embedding_dim": 512, + "speech_decoder_postnet_dropout": 0.5, + "speech_decoder_postnet_kernel": 5, + "speech_decoder_postnet_layers": 5, + "speech_decoder_postnet_units": 256, + "speech_decoder_prenet_dropout": 0.5, + "speech_decoder_prenet_layers": 2, + "speech_decoder_prenet_units": 256, + "torch_dtype": "float32", + "transformers_version": "4.52.0.dev0", + "use_cache": false, + "use_guided_attention_loss": true, + "vocab_size": 81 +} diff --git a/checkpoint-1000/generation_config.json b/checkpoint-1000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3041cc103eccf911a925012f1ad8fd1d12cce149 --- /dev/null +++ b/checkpoint-1000/generation_config.json @@ -0,0 +1,9 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "decoder_start_token_id": 2, + "eos_token_id": 2, + "max_length": 1876, + "pad_token_id": 1, + "transformers_version": "4.52.0.dev0" +} diff --git a/checkpoint-1000/model.safetensors b/checkpoint-1000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5540e86f5478d7907f8e2b229391b344c5694ec0 --- /dev/null +++ b/checkpoint-1000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87514d400a13c6eefdeb2f89abd1795e621c2344f96e159ce2aeba3d0ce85944 +size 577789320 diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..26e2ef12b961da2cd0404a8f151e97f3b5dea730 --- /dev/null +++ b/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2699a889db8dbb0ae670281bc558951bffc765ae88e6bb0bb5222ac12288814b +size 1155772233 diff --git a/checkpoint-1000/rng_state.pth b/checkpoint-1000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7d6748f8e4525d7fe8c1e3995fc1adb59c705b37 --- /dev/null +++ b/checkpoint-1000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82d177efef76b1d9db7c817f74c58d37c483a0042c96999443934a8052be41aa +size 14244 diff --git a/checkpoint-1000/scaler.pt b/checkpoint-1000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..99fd9b9f6c56133927938af3ea5ee182baf431ae --- /dev/null +++ b/checkpoint-1000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27eb6d31126283f601b217f22a8971040a00a73abf0a2e26bfcb5064cd0afa48 +size 988 diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8565d90824cdbfd2ca3a73025f401c07d39317db --- /dev/null +++ b/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5101d8c1f86d6f48167e50b1164b9ba363ab76694ff2d5c1e326e3d5f94ecaef +size 1064 diff --git a/checkpoint-1000/special_tokens_map.json b/checkpoint-1000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..4ee24ec69861cfc94abbe2c8c934aa0744aa623c --- /dev/null +++ b/checkpoint-1000/special_tokens_map.json @@ -0,0 +1,13 @@ +{ + "bos_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": "" +} diff --git a/checkpoint-1000/spm_char.model b/checkpoint-1000/spm_char.model new file mode 100644 index 0000000000000000000000000000000000000000..8fb73691942626fa75df80b61aab0e9b9340d8e2 --- /dev/null +++ b/checkpoint-1000/spm_char.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fcc48f3e225f627b1641db410ceb0c8649bd2b0c982e150b03f8be3728ab560 +size 238473 diff --git a/checkpoint-1000/tokenizer_config.json b/checkpoint-1000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e88d44ba3be31ac8f53461ae7c1b02b4c5c830ab --- /dev/null +++ b/checkpoint-1000/tokenizer_config.json @@ -0,0 +1,64 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "79": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "80": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_special_tokens": {}, + "mask_token": "", + "model_max_length": 600, + "normalize": false, + "pad_token": "", + "processor_class": "SpeechT5Processor", + "sp_model_kwargs": {}, + "tokenizer_class": "SpeechT5Tokenizer", + "unk_token": "" +} diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..561078f38d40883d12c484952530e59a49e8a5d1 --- /dev/null +++ b/checkpoint-1000/trainer_state.json @@ -0,0 +1,322 @@ +{ + "best_global_step": 1000, + "best_metric": 0.5154594779014587, + "best_model_checkpoint": "./speecht5_tts_mabama/checkpoint-1000", + "epoch": 125.0, + "eval_steps": 1000, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 3.1333333333333333, + "grad_norm": 13.092179298400879, + "learning_rate": 4.2000000000000006e-07, + "loss": 1.0978, + "step": 25 + }, + { + "epoch": 6.266666666666667, + "grad_norm": 13.538804054260254, + "learning_rate": 9.200000000000001e-07, + "loss": 1.0057, + "step": 50 + }, + { + "epoch": 9.4, + "grad_norm": 3.8923733234405518, + "learning_rate": 1.42e-06, + "loss": 0.8155, + "step": 75 + }, + { + "epoch": 12.533333333333333, + "grad_norm": 2.569746732711792, + "learning_rate": 1.9200000000000003e-06, + "loss": 0.7921, + "step": 100 + }, + { + "epoch": 15.666666666666666, + "grad_norm": 2.390493631362915, + "learning_rate": 2.42e-06, + "loss": 0.7531, + "step": 125 + }, + { + "epoch": 18.8, + "grad_norm": 2.7168779373168945, + "learning_rate": 2.92e-06, + "loss": 0.7393, + "step": 150 + }, + { + "epoch": 21.933333333333334, + "grad_norm": 10.27633285522461, + "learning_rate": 3.4200000000000007e-06, + "loss": 0.7292, + "step": 175 + }, + { + "epoch": 25.0, + "grad_norm": 5.6921000480651855, + "learning_rate": 3.920000000000001e-06, + "loss": 0.6642, + "step": 200 + }, + { + "epoch": 28.133333333333333, + "grad_norm": 2.6206777095794678, + "learning_rate": 4.42e-06, + "loss": 0.6555, + "step": 225 + }, + { + "epoch": 31.266666666666666, + "grad_norm": 1.9396028518676758, + "learning_rate": 4.92e-06, + "loss": 0.6484, + "step": 250 + }, + { + "epoch": 34.4, + "grad_norm": 3.44437575340271, + "learning_rate": 5.420000000000001e-06, + "loss": 0.6414, + "step": 275 + }, + { + "epoch": 37.53333333333333, + "grad_norm": 2.729497194290161, + "learning_rate": 5.92e-06, + "loss": 0.6323, + "step": 300 + }, + { + "epoch": 40.666666666666664, + "grad_norm": 2.3852877616882324, + "learning_rate": 6.42e-06, + "loss": 0.6073, + "step": 325 + }, + { + "epoch": 43.8, + "grad_norm": 4.4287109375, + "learning_rate": 6.92e-06, + "loss": 0.6034, + "step": 350 + }, + { + "epoch": 46.93333333333333, + "grad_norm": 2.1653966903686523, + "learning_rate": 7.420000000000001e-06, + "loss": 0.5865, + "step": 375 + }, + { + "epoch": 50.0, + "grad_norm": 2.8120265007019043, + "learning_rate": 7.92e-06, + "loss": 0.5556, + "step": 400 + }, + { + "epoch": 53.13333333333333, + "grad_norm": 2.0973806381225586, + "learning_rate": 8.42e-06, + "loss": 0.5416, + "step": 425 + }, + { + "epoch": 56.266666666666666, + "grad_norm": 2.6723616123199463, + "learning_rate": 8.920000000000001e-06, + "loss": 0.5407, + "step": 450 + }, + { + "epoch": 59.4, + "grad_norm": 2.1810383796691895, + "learning_rate": 9.42e-06, + "loss": 0.5174, + "step": 475 + }, + { + "epoch": 62.53333333333333, + "grad_norm": 3.464071750640869, + "learning_rate": 9.920000000000002e-06, + "loss": 0.5327, + "step": 500 + }, + { + "epoch": 65.66666666666667, + "grad_norm": 3.6148977279663086, + "learning_rate": 9.940000000000001e-06, + "loss": 0.5141, + "step": 525 + }, + { + "epoch": 68.8, + "grad_norm": 2.5631027221679688, + "learning_rate": 9.86857142857143e-06, + "loss": 0.5246, + "step": 550 + }, + { + "epoch": 71.93333333333334, + "grad_norm": 2.058468818664551, + "learning_rate": 9.797142857142858e-06, + "loss": 0.5065, + "step": 575 + }, + { + "epoch": 75.0, + "grad_norm": 1.7559466361999512, + "learning_rate": 9.725714285714287e-06, + "loss": 0.4871, + "step": 600 + }, + { + "epoch": 78.13333333333334, + "grad_norm": 2.653345823287964, + "learning_rate": 9.654285714285716e-06, + "loss": 0.4941, + "step": 625 + }, + { + "epoch": 81.26666666666667, + "grad_norm": 2.612226724624634, + "learning_rate": 9.582857142857143e-06, + "loss": 0.4796, + "step": 650 + }, + { + "epoch": 84.4, + "grad_norm": 1.7446099519729614, + "learning_rate": 9.511428571428572e-06, + "loss": 0.487, + "step": 675 + }, + { + "epoch": 87.53333333333333, + "grad_norm": 2.627315044403076, + "learning_rate": 9.440000000000001e-06, + "loss": 0.4731, + "step": 700 + }, + { + "epoch": 90.66666666666667, + "grad_norm": 2.4315383434295654, + "learning_rate": 9.368571428571428e-06, + "loss": 0.4812, + "step": 725 + }, + { + "epoch": 93.8, + "grad_norm": 2.4056336879730225, + "learning_rate": 9.297142857142857e-06, + "loss": 0.468, + "step": 750 + }, + { + "epoch": 96.93333333333334, + "grad_norm": 2.153116464614868, + "learning_rate": 9.225714285714286e-06, + "loss": 0.4829, + "step": 775 + }, + { + "epoch": 100.0, + "grad_norm": 2.9421756267547607, + "learning_rate": 9.154285714285715e-06, + "loss": 0.4555, + "step": 800 + }, + { + "epoch": 103.13333333333334, + "grad_norm": 1.6771883964538574, + "learning_rate": 9.082857142857143e-06, + "loss": 0.462, + "step": 825 + }, + { + "epoch": 106.26666666666667, + "grad_norm": 2.9711899757385254, + "learning_rate": 9.011428571428572e-06, + "loss": 0.471, + "step": 850 + }, + { + "epoch": 109.4, + "grad_norm": 1.922980546951294, + "learning_rate": 8.94e-06, + "loss": 0.4673, + "step": 875 + }, + { + "epoch": 112.53333333333333, + "grad_norm": 2.49945068359375, + "learning_rate": 8.86857142857143e-06, + "loss": 0.4611, + "step": 900 + }, + { + "epoch": 115.66666666666667, + "grad_norm": 2.646510362625122, + "learning_rate": 8.797142857142857e-06, + "loss": 0.4574, + "step": 925 + }, + { + "epoch": 118.8, + "grad_norm": 1.7943354845046997, + "learning_rate": 8.725714285714286e-06, + "loss": 0.4658, + "step": 950 + }, + { + "epoch": 121.93333333333334, + "grad_norm": 2.171827793121338, + "learning_rate": 8.654285714285715e-06, + "loss": 0.4561, + "step": 975 + }, + { + "epoch": 125.0, + "grad_norm": 7.516489505767822, + "learning_rate": 8.582857142857144e-06, + "loss": 0.4472, + "step": 1000 + }, + { + "epoch": 125.0, + "eval_loss": 0.5154594779014587, + "eval_runtime": 0.7837, + "eval_samples_per_second": 33.175, + "eval_steps_per_second": 5.104, + "step": 1000 + } + ], + "logging_steps": 25, + "max_steps": 4000, + "num_input_tokens_seen": 0, + "num_train_epochs": 572, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2568713479659360.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a61b2643a9f565814c2aa62a171605debf82a9c0 --- /dev/null +++ b/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07e653cd3b83482a0939ee4c8a207df9a996f44ce9dd82197c4ab6cde60cf2bb +size 5432 diff --git a/checkpoint-2000/added_tokens.json b/checkpoint-2000/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cd5b477a9075c49d99de65622db37bb06a251985 --- /dev/null +++ b/checkpoint-2000/added_tokens.json @@ -0,0 +1,4 @@ +{ + "": 80, + "": 79 +} diff --git a/checkpoint-2000/config.json b/checkpoint-2000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..11c8ec6e9cb5a360feb11650e24ba1adc08e8d0e --- /dev/null +++ b/checkpoint-2000/config.json @@ -0,0 +1,91 @@ +{ + "activation_dropout": 0.1, + "apply_spec_augment": true, + "architectures": [ + "SpeechT5ForTextToSpeech" + ], + "attention_dropout": 0.1, + "bos_token_id": 0, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.1, + "decoder_layers": 6, + "decoder_start_token_id": 2, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.1, + "encoder_layers": 12, + "encoder_max_relative_position": 160, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "guided_attention_loss_num_heads": 2, + "guided_attention_loss_scale": 10.0, + "guided_attention_loss_sigma": 0.4, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "is_encoder_decoder": true, + "layer_norm_eps": 1e-05, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": null, + "max_speech_positions": 1876, + "max_text_positions": 600, + "model_type": "speecht5", + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_mel_bins": 80, + "pad_token_id": 1, + "positional_dropout": 0.1, + "reduction_factor": 2, + "scale_embedding": false, + "speaker_embedding_dim": 512, + "speech_decoder_postnet_dropout": 0.5, + "speech_decoder_postnet_kernel": 5, + "speech_decoder_postnet_layers": 5, + "speech_decoder_postnet_units": 256, + "speech_decoder_prenet_dropout": 0.5, + "speech_decoder_prenet_layers": 2, + "speech_decoder_prenet_units": 256, + "torch_dtype": "float32", + "transformers_version": "4.52.0.dev0", + "use_cache": false, + "use_guided_attention_loss": true, + "vocab_size": 81 +} diff --git a/checkpoint-2000/generation_config.json b/checkpoint-2000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3041cc103eccf911a925012f1ad8fd1d12cce149 --- /dev/null +++ b/checkpoint-2000/generation_config.json @@ -0,0 +1,9 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "decoder_start_token_id": 2, + "eos_token_id": 2, + "max_length": 1876, + "pad_token_id": 1, + "transformers_version": "4.52.0.dev0" +} diff --git a/checkpoint-2000/model.safetensors b/checkpoint-2000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6e0b6396d9f590a58a3fe32f7a2a77f0587b4a8a --- /dev/null +++ b/checkpoint-2000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abac69d52746eff2a8ac4fea48c076a031effd3d774eaa79c34c25289b78a9ad +size 577789320 diff --git a/checkpoint-2000/optimizer.pt b/checkpoint-2000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..43314c8f7174d70adfb383a4571110603aabb196 --- /dev/null +++ b/checkpoint-2000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df818e2b4cf58851158e10b3d57754c198be51c0b852b9cd4b587b629a205640 +size 1155772233 diff --git a/checkpoint-2000/rng_state.pth b/checkpoint-2000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7fd9a1224c43c6ea63f9c6313bab015aa0c29295 --- /dev/null +++ b/checkpoint-2000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b766bf7909addbb49e8f135f2f8aa3b6e99cb053e36395d8560f93e71c2776e7 +size 14244 diff --git a/checkpoint-2000/scaler.pt b/checkpoint-2000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..177496ba5922b3811ae20b8354e55ee6d4aaeff8 --- /dev/null +++ b/checkpoint-2000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49bc19d5712fad43d5cef95c2e01c73bd75bdb71e4c16fa8781d626d978f5452 +size 988 diff --git a/checkpoint-2000/scheduler.pt b/checkpoint-2000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a6460a89462994303545a8f3997e58e1490360f --- /dev/null +++ b/checkpoint-2000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b80a609c64a12b4db2f38941ea479b9a30f9351b7aac74f4956e8686dc338317 +size 1064 diff --git a/checkpoint-2000/special_tokens_map.json b/checkpoint-2000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..4ee24ec69861cfc94abbe2c8c934aa0744aa623c --- /dev/null +++ b/checkpoint-2000/special_tokens_map.json @@ -0,0 +1,13 @@ +{ + "bos_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": "" +} diff --git a/checkpoint-2000/spm_char.model b/checkpoint-2000/spm_char.model new file mode 100644 index 0000000000000000000000000000000000000000..8fb73691942626fa75df80b61aab0e9b9340d8e2 --- /dev/null +++ b/checkpoint-2000/spm_char.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fcc48f3e225f627b1641db410ceb0c8649bd2b0c982e150b03f8be3728ab560 +size 238473 diff --git a/checkpoint-2000/tokenizer_config.json b/checkpoint-2000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e88d44ba3be31ac8f53461ae7c1b02b4c5c830ab --- /dev/null +++ b/checkpoint-2000/tokenizer_config.json @@ -0,0 +1,64 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "79": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "80": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_special_tokens": {}, + "mask_token": "", + "model_max_length": 600, + "normalize": false, + "pad_token": "", + "processor_class": "SpeechT5Processor", + "sp_model_kwargs": {}, + "tokenizer_class": "SpeechT5Tokenizer", + "unk_token": "" +} diff --git a/checkpoint-2000/trainer_state.json b/checkpoint-2000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d3857b0a031ed9222d8530018fd26b3025f4b10e --- /dev/null +++ b/checkpoint-2000/trainer_state.json @@ -0,0 +1,610 @@ +{ + "best_global_step": 2000, + "best_metric": 0.4833647906780243, + "best_model_checkpoint": "./speecht5_tts_mabama/checkpoint-2000", + "epoch": 250.0, + "eval_steps": 1000, + "global_step": 2000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 3.1333333333333333, + "grad_norm": 13.092179298400879, + "learning_rate": 4.2000000000000006e-07, + "loss": 1.0978, + "step": 25 + }, + { + "epoch": 6.266666666666667, + "grad_norm": 13.538804054260254, + "learning_rate": 9.200000000000001e-07, + "loss": 1.0057, + "step": 50 + }, + { + "epoch": 9.4, + "grad_norm": 3.8923733234405518, + "learning_rate": 1.42e-06, + "loss": 0.8155, + "step": 75 + }, + { + "epoch": 12.533333333333333, + "grad_norm": 2.569746732711792, + "learning_rate": 1.9200000000000003e-06, + "loss": 0.7921, + "step": 100 + }, + { + "epoch": 15.666666666666666, + "grad_norm": 2.390493631362915, + "learning_rate": 2.42e-06, + "loss": 0.7531, + "step": 125 + }, + { + "epoch": 18.8, + "grad_norm": 2.7168779373168945, + "learning_rate": 2.92e-06, + "loss": 0.7393, + "step": 150 + }, + { + "epoch": 21.933333333333334, + "grad_norm": 10.27633285522461, + "learning_rate": 3.4200000000000007e-06, + "loss": 0.7292, + "step": 175 + }, + { + "epoch": 25.0, + "grad_norm": 5.6921000480651855, + "learning_rate": 3.920000000000001e-06, + "loss": 0.6642, + "step": 200 + }, + { + "epoch": 28.133333333333333, + "grad_norm": 2.6206777095794678, + "learning_rate": 4.42e-06, + "loss": 0.6555, + "step": 225 + }, + { + "epoch": 31.266666666666666, + "grad_norm": 1.9396028518676758, + "learning_rate": 4.92e-06, + "loss": 0.6484, + "step": 250 + }, + { + "epoch": 34.4, + "grad_norm": 3.44437575340271, + "learning_rate": 5.420000000000001e-06, + "loss": 0.6414, + "step": 275 + }, + { + "epoch": 37.53333333333333, + "grad_norm": 2.729497194290161, + "learning_rate": 5.92e-06, + "loss": 0.6323, + "step": 300 + }, + { + "epoch": 40.666666666666664, + "grad_norm": 2.3852877616882324, + "learning_rate": 6.42e-06, + "loss": 0.6073, + "step": 325 + }, + { + "epoch": 43.8, + "grad_norm": 4.4287109375, + "learning_rate": 6.92e-06, + "loss": 0.6034, + "step": 350 + }, + { + "epoch": 46.93333333333333, + "grad_norm": 2.1653966903686523, + "learning_rate": 7.420000000000001e-06, + "loss": 0.5865, + "step": 375 + }, + { + "epoch": 50.0, + "grad_norm": 2.8120265007019043, + "learning_rate": 7.92e-06, + "loss": 0.5556, + "step": 400 + }, + { + "epoch": 53.13333333333333, + "grad_norm": 2.0973806381225586, + "learning_rate": 8.42e-06, + "loss": 0.5416, + "step": 425 + }, + { + "epoch": 56.266666666666666, + "grad_norm": 2.6723616123199463, + "learning_rate": 8.920000000000001e-06, + "loss": 0.5407, + "step": 450 + }, + { + "epoch": 59.4, + "grad_norm": 2.1810383796691895, + "learning_rate": 9.42e-06, + "loss": 0.5174, + "step": 475 + }, + { + "epoch": 62.53333333333333, + "grad_norm": 3.464071750640869, + "learning_rate": 9.920000000000002e-06, + "loss": 0.5327, + "step": 500 + }, + { + "epoch": 65.66666666666667, + "grad_norm": 3.6148977279663086, + "learning_rate": 9.940000000000001e-06, + "loss": 0.5141, + "step": 525 + }, + { + "epoch": 68.8, + "grad_norm": 2.5631027221679688, + "learning_rate": 9.86857142857143e-06, + "loss": 0.5246, + "step": 550 + }, + { + "epoch": 71.93333333333334, + "grad_norm": 2.058468818664551, + "learning_rate": 9.797142857142858e-06, + "loss": 0.5065, + "step": 575 + }, + { + "epoch": 75.0, + "grad_norm": 1.7559466361999512, + "learning_rate": 9.725714285714287e-06, + "loss": 0.4871, + "step": 600 + }, + { + "epoch": 78.13333333333334, + "grad_norm": 2.653345823287964, + "learning_rate": 9.654285714285716e-06, + "loss": 0.4941, + "step": 625 + }, + { + "epoch": 81.26666666666667, + "grad_norm": 2.612226724624634, + "learning_rate": 9.582857142857143e-06, + "loss": 0.4796, + "step": 650 + }, + { + "epoch": 84.4, + "grad_norm": 1.7446099519729614, + "learning_rate": 9.511428571428572e-06, + "loss": 0.487, + "step": 675 + }, + { + "epoch": 87.53333333333333, + "grad_norm": 2.627315044403076, + "learning_rate": 9.440000000000001e-06, + "loss": 0.4731, + "step": 700 + }, + { + "epoch": 90.66666666666667, + "grad_norm": 2.4315383434295654, + "learning_rate": 9.368571428571428e-06, + "loss": 0.4812, + "step": 725 + }, + { + "epoch": 93.8, + "grad_norm": 2.4056336879730225, + "learning_rate": 9.297142857142857e-06, + "loss": 0.468, + "step": 750 + }, + { + "epoch": 96.93333333333334, + "grad_norm": 2.153116464614868, + "learning_rate": 9.225714285714286e-06, + "loss": 0.4829, + "step": 775 + }, + { + "epoch": 100.0, + "grad_norm": 2.9421756267547607, + "learning_rate": 9.154285714285715e-06, + "loss": 0.4555, + "step": 800 + }, + { + "epoch": 103.13333333333334, + "grad_norm": 1.6771883964538574, + "learning_rate": 9.082857142857143e-06, + "loss": 0.462, + "step": 825 + }, + { + "epoch": 106.26666666666667, + "grad_norm": 2.9711899757385254, + "learning_rate": 9.011428571428572e-06, + "loss": 0.471, + "step": 850 + }, + { + "epoch": 109.4, + "grad_norm": 1.922980546951294, + "learning_rate": 8.94e-06, + "loss": 0.4673, + "step": 875 + }, + { + "epoch": 112.53333333333333, + "grad_norm": 2.49945068359375, + "learning_rate": 8.86857142857143e-06, + "loss": 0.4611, + "step": 900 + }, + { + "epoch": 115.66666666666667, + "grad_norm": 2.646510362625122, + "learning_rate": 8.797142857142857e-06, + "loss": 0.4574, + "step": 925 + }, + { + "epoch": 118.8, + "grad_norm": 1.7943354845046997, + "learning_rate": 8.725714285714286e-06, + "loss": 0.4658, + "step": 950 + }, + { + "epoch": 121.93333333333334, + "grad_norm": 2.171827793121338, + "learning_rate": 8.654285714285715e-06, + "loss": 0.4561, + "step": 975 + }, + { + "epoch": 125.0, + "grad_norm": 7.516489505767822, + "learning_rate": 8.582857142857144e-06, + "loss": 0.4472, + "step": 1000 + }, + { + "epoch": 125.0, + "eval_loss": 0.5154594779014587, + "eval_runtime": 0.7837, + "eval_samples_per_second": 33.175, + "eval_steps_per_second": 5.104, + "step": 1000 + }, + { + "epoch": 128.13333333333333, + "grad_norm": 2.5167343616485596, + "learning_rate": 8.511428571428571e-06, + "loss": 0.457, + "step": 1025 + }, + { + "epoch": 131.26666666666668, + "grad_norm": 3.3089983463287354, + "learning_rate": 8.44e-06, + "loss": 0.4456, + "step": 1050 + }, + { + "epoch": 134.4, + "grad_norm": 2.778348445892334, + "learning_rate": 8.36857142857143e-06, + "loss": 0.4612, + "step": 1075 + }, + { + "epoch": 137.53333333333333, + "grad_norm": 2.529778480529785, + "learning_rate": 8.297142857142859e-06, + "loss": 0.4429, + "step": 1100 + }, + { + "epoch": 140.66666666666666, + "grad_norm": 1.76685631275177, + "learning_rate": 8.225714285714288e-06, + "loss": 0.4399, + "step": 1125 + }, + { + "epoch": 143.8, + "grad_norm": 1.8449666500091553, + "learning_rate": 8.154285714285715e-06, + "loss": 0.4329, + "step": 1150 + }, + { + "epoch": 146.93333333333334, + "grad_norm": 1.9097468852996826, + "learning_rate": 8.082857142857144e-06, + "loss": 0.4527, + "step": 1175 + }, + { + "epoch": 150.0, + "grad_norm": 3.892838716506958, + "learning_rate": 8.011428571428573e-06, + "loss": 0.4448, + "step": 1200 + }, + { + "epoch": 153.13333333333333, + "grad_norm": 2.1518826484680176, + "learning_rate": 7.94e-06, + "loss": 0.4412, + "step": 1225 + }, + { + "epoch": 156.26666666666668, + "grad_norm": 1.5322662591934204, + "learning_rate": 7.86857142857143e-06, + "loss": 0.4388, + "step": 1250 + }, + { + "epoch": 159.4, + "grad_norm": 1.4961107969284058, + "learning_rate": 7.797142857142858e-06, + "loss": 0.4363, + "step": 1275 + }, + { + "epoch": 162.53333333333333, + "grad_norm": 1.8992841243743896, + "learning_rate": 7.725714285714286e-06, + "loss": 0.4474, + "step": 1300 + }, + { + "epoch": 165.66666666666666, + "grad_norm": 1.5015554428100586, + "learning_rate": 7.654285714285715e-06, + "loss": 0.4327, + "step": 1325 + }, + { + "epoch": 168.8, + "grad_norm": 2.0730693340301514, + "learning_rate": 7.5828571428571444e-06, + "loss": 0.4348, + "step": 1350 + }, + { + "epoch": 171.93333333333334, + "grad_norm": 2.0838747024536133, + "learning_rate": 7.511428571428572e-06, + "loss": 0.4393, + "step": 1375 + }, + { + "epoch": 175.0, + "grad_norm": 4.3804030418396, + "learning_rate": 7.440000000000001e-06, + "loss": 0.4386, + "step": 1400 + }, + { + "epoch": 178.13333333333333, + "grad_norm": 1.8927189111709595, + "learning_rate": 7.36857142857143e-06, + "loss": 0.4318, + "step": 1425 + }, + { + "epoch": 181.26666666666668, + "grad_norm": 1.5456620454788208, + "learning_rate": 7.297142857142858e-06, + "loss": 0.4336, + "step": 1450 + }, + { + "epoch": 184.4, + "grad_norm": 2.722612142562866, + "learning_rate": 7.225714285714286e-06, + "loss": 0.4281, + "step": 1475 + }, + { + "epoch": 187.53333333333333, + "grad_norm": 1.9484314918518066, + "learning_rate": 7.154285714285715e-06, + "loss": 0.4312, + "step": 1500 + }, + { + "epoch": 190.66666666666666, + "grad_norm": 2.101043224334717, + "learning_rate": 7.082857142857143e-06, + "loss": 0.427, + "step": 1525 + }, + { + "epoch": 193.8, + "grad_norm": 1.9785490036010742, + "learning_rate": 7.011428571428572e-06, + "loss": 0.4298, + "step": 1550 + }, + { + "epoch": 196.93333333333334, + "grad_norm": 2.319054126739502, + "learning_rate": 6.9400000000000005e-06, + "loss": 0.4376, + "step": 1575 + }, + { + "epoch": 200.0, + "grad_norm": 1.3612741231918335, + "learning_rate": 6.868571428571429e-06, + "loss": 0.4217, + "step": 1600 + }, + { + "epoch": 203.13333333333333, + "grad_norm": 2.128363847732544, + "learning_rate": 6.797142857142858e-06, + "loss": 0.4217, + "step": 1625 + }, + { + "epoch": 206.26666666666668, + "grad_norm": 1.7985234260559082, + "learning_rate": 6.725714285714287e-06, + "loss": 0.4147, + "step": 1650 + }, + { + "epoch": 209.4, + "grad_norm": 1.3478573560714722, + "learning_rate": 6.654285714285716e-06, + "loss": 0.4357, + "step": 1675 + }, + { + "epoch": 212.53333333333333, + "grad_norm": 1.5389248132705688, + "learning_rate": 6.582857142857143e-06, + "loss": 0.419, + "step": 1700 + }, + { + "epoch": 215.66666666666666, + "grad_norm": 1.9558783769607544, + "learning_rate": 6.511428571428572e-06, + "loss": 0.4289, + "step": 1725 + }, + { + "epoch": 218.8, + "grad_norm": 1.756585955619812, + "learning_rate": 6.440000000000001e-06, + "loss": 0.4168, + "step": 1750 + }, + { + "epoch": 221.93333333333334, + "grad_norm": 1.8744903802871704, + "learning_rate": 6.368571428571429e-06, + "loss": 0.4296, + "step": 1775 + }, + { + "epoch": 225.0, + "grad_norm": 1.133415699005127, + "learning_rate": 6.297142857142857e-06, + "loss": 0.4162, + "step": 1800 + }, + { + "epoch": 228.13333333333333, + "grad_norm": 2.819840908050537, + "learning_rate": 6.225714285714286e-06, + "loss": 0.4275, + "step": 1825 + }, + { + "epoch": 231.26666666666668, + "grad_norm": 1.5150210857391357, + "learning_rate": 6.1542857142857145e-06, + "loss": 0.4244, + "step": 1850 + }, + { + "epoch": 234.4, + "grad_norm": 2.184819459915161, + "learning_rate": 6.0828571428571435e-06, + "loss": 0.4282, + "step": 1875 + }, + { + "epoch": 237.53333333333333, + "grad_norm": 3.293454170227051, + "learning_rate": 6.011428571428572e-06, + "loss": 0.4215, + "step": 1900 + }, + { + "epoch": 240.66666666666666, + "grad_norm": 1.210433006286621, + "learning_rate": 5.94e-06, + "loss": 0.4103, + "step": 1925 + }, + { + "epoch": 243.8, + "grad_norm": 2.5027923583984375, + "learning_rate": 5.868571428571429e-06, + "loss": 0.4186, + "step": 1950 + }, + { + "epoch": 246.93333333333334, + "grad_norm": 1.9649789333343506, + "learning_rate": 5.797142857142858e-06, + "loss": 0.427, + "step": 1975 + }, + { + "epoch": 250.0, + "grad_norm": 5.899420261383057, + "learning_rate": 5.725714285714287e-06, + "loss": 0.4113, + "step": 2000 + }, + { + "epoch": 250.0, + "eval_loss": 0.4833647906780243, + "eval_runtime": 0.7095, + "eval_samples_per_second": 36.646, + "eval_steps_per_second": 5.638, + "step": 2000 + } + ], + "logging_steps": 25, + "max_steps": 4000, + "num_input_tokens_seen": 0, + "num_train_epochs": 572, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5140893067410672.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2000/training_args.bin b/checkpoint-2000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a61b2643a9f565814c2aa62a171605debf82a9c0 --- /dev/null +++ b/checkpoint-2000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07e653cd3b83482a0939ee4c8a207df9a996f44ce9dd82197c4ab6cde60cf2bb +size 5432 diff --git a/checkpoint-3000/added_tokens.json b/checkpoint-3000/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cd5b477a9075c49d99de65622db37bb06a251985 --- /dev/null +++ b/checkpoint-3000/added_tokens.json @@ -0,0 +1,4 @@ +{ + "": 80, + "": 79 +} diff --git a/checkpoint-3000/config.json b/checkpoint-3000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..11c8ec6e9cb5a360feb11650e24ba1adc08e8d0e --- /dev/null +++ b/checkpoint-3000/config.json @@ -0,0 +1,91 @@ +{ + "activation_dropout": 0.1, + "apply_spec_augment": true, + "architectures": [ + "SpeechT5ForTextToSpeech" + ], + "attention_dropout": 0.1, + "bos_token_id": 0, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.1, + "decoder_layers": 6, + "decoder_start_token_id": 2, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.1, + "encoder_layers": 12, + "encoder_max_relative_position": 160, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "guided_attention_loss_num_heads": 2, + "guided_attention_loss_scale": 10.0, + "guided_attention_loss_sigma": 0.4, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "is_encoder_decoder": true, + "layer_norm_eps": 1e-05, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": null, + "max_speech_positions": 1876, + "max_text_positions": 600, + "model_type": "speecht5", + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_mel_bins": 80, + "pad_token_id": 1, + "positional_dropout": 0.1, + "reduction_factor": 2, + "scale_embedding": false, + "speaker_embedding_dim": 512, + "speech_decoder_postnet_dropout": 0.5, + "speech_decoder_postnet_kernel": 5, + "speech_decoder_postnet_layers": 5, + "speech_decoder_postnet_units": 256, + "speech_decoder_prenet_dropout": 0.5, + "speech_decoder_prenet_layers": 2, + "speech_decoder_prenet_units": 256, + "torch_dtype": "float32", + "transformers_version": "4.52.0.dev0", + "use_cache": false, + "use_guided_attention_loss": true, + "vocab_size": 81 +} diff --git a/checkpoint-3000/generation_config.json b/checkpoint-3000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3041cc103eccf911a925012f1ad8fd1d12cce149 --- /dev/null +++ b/checkpoint-3000/generation_config.json @@ -0,0 +1,9 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "decoder_start_token_id": 2, + "eos_token_id": 2, + "max_length": 1876, + "pad_token_id": 1, + "transformers_version": "4.52.0.dev0" +} diff --git a/checkpoint-3000/model.safetensors b/checkpoint-3000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e0fcae15e7cc654d100ee8a6038bf337da110444 --- /dev/null +++ b/checkpoint-3000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d78d191d64b057ef7708236443ccd41bb24d44484f04b36d4fd46df31daa1c6 +size 577789320 diff --git a/checkpoint-3000/optimizer.pt b/checkpoint-3000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7b5339aa4aa73f7f4bfc775204fbe55dfa8c694b --- /dev/null +++ b/checkpoint-3000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abb1e34922901fa49e90b622d421fcc022123b3db879e5c48cdc697dd3a9c2d3 +size 1155772233 diff --git a/checkpoint-3000/rng_state.pth b/checkpoint-3000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e799596e250f2e298a2124c7a1ad86b87b5eb19d --- /dev/null +++ b/checkpoint-3000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a258daad5ac5df9273072647bd5fccfa416cdadb91b7707278c61cc1145a5964 +size 14244 diff --git a/checkpoint-3000/scaler.pt b/checkpoint-3000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6a144ce0190defeeb9f7d69e33a30cba839bee5f --- /dev/null +++ b/checkpoint-3000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0c9142a3b98e645e9dc3ffae8c602fb70b74046fea7664e6d081ebb3d0bbb58 +size 988 diff --git a/checkpoint-3000/scheduler.pt b/checkpoint-3000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..00802b7d7ba610674c2662a6888f78aa35c14408 --- /dev/null +++ b/checkpoint-3000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ff56efc76c16a3b9a712527179ae61c8d6dfccc7e3a53f8c421d6329adacfbb +size 1064 diff --git a/checkpoint-3000/special_tokens_map.json b/checkpoint-3000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..4ee24ec69861cfc94abbe2c8c934aa0744aa623c --- /dev/null +++ b/checkpoint-3000/special_tokens_map.json @@ -0,0 +1,13 @@ +{ + "bos_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": "" +} diff --git a/checkpoint-3000/spm_char.model b/checkpoint-3000/spm_char.model new file mode 100644 index 0000000000000000000000000000000000000000..8fb73691942626fa75df80b61aab0e9b9340d8e2 --- /dev/null +++ b/checkpoint-3000/spm_char.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fcc48f3e225f627b1641db410ceb0c8649bd2b0c982e150b03f8be3728ab560 +size 238473 diff --git a/checkpoint-3000/tokenizer_config.json b/checkpoint-3000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e88d44ba3be31ac8f53461ae7c1b02b4c5c830ab --- /dev/null +++ b/checkpoint-3000/tokenizer_config.json @@ -0,0 +1,64 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "79": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "80": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_special_tokens": {}, + "mask_token": "", + "model_max_length": 600, + "normalize": false, + "pad_token": "", + "processor_class": "SpeechT5Processor", + "sp_model_kwargs": {}, + "tokenizer_class": "SpeechT5Tokenizer", + "unk_token": "" +} diff --git a/checkpoint-3000/trainer_state.json b/checkpoint-3000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..32f7d0ece3153553bd2f22a87cbc990f249809ce --- /dev/null +++ b/checkpoint-3000/trainer_state.json @@ -0,0 +1,898 @@ +{ + "best_global_step": 3000, + "best_metric": 0.46799278259277344, + "best_model_checkpoint": "./speecht5_tts_mabama/checkpoint-3000", + "epoch": 375.0, + "eval_steps": 1000, + "global_step": 3000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 3.1333333333333333, + "grad_norm": 13.092179298400879, + "learning_rate": 4.2000000000000006e-07, + "loss": 1.0978, + "step": 25 + }, + { + "epoch": 6.266666666666667, + "grad_norm": 13.538804054260254, + "learning_rate": 9.200000000000001e-07, + "loss": 1.0057, + "step": 50 + }, + { + "epoch": 9.4, + "grad_norm": 3.8923733234405518, + "learning_rate": 1.42e-06, + "loss": 0.8155, + "step": 75 + }, + { + "epoch": 12.533333333333333, + "grad_norm": 2.569746732711792, + "learning_rate": 1.9200000000000003e-06, + "loss": 0.7921, + "step": 100 + }, + { + "epoch": 15.666666666666666, + "grad_norm": 2.390493631362915, + "learning_rate": 2.42e-06, + "loss": 0.7531, + "step": 125 + }, + { + "epoch": 18.8, + "grad_norm": 2.7168779373168945, + "learning_rate": 2.92e-06, + "loss": 0.7393, + "step": 150 + }, + { + "epoch": 21.933333333333334, + "grad_norm": 10.27633285522461, + "learning_rate": 3.4200000000000007e-06, + "loss": 0.7292, + "step": 175 + }, + { + "epoch": 25.0, + "grad_norm": 5.6921000480651855, + "learning_rate": 3.920000000000001e-06, + "loss": 0.6642, + "step": 200 + }, + { + "epoch": 28.133333333333333, + "grad_norm": 2.6206777095794678, + "learning_rate": 4.42e-06, + "loss": 0.6555, + "step": 225 + }, + { + "epoch": 31.266666666666666, + "grad_norm": 1.9396028518676758, + "learning_rate": 4.92e-06, + "loss": 0.6484, + "step": 250 + }, + { + "epoch": 34.4, + "grad_norm": 3.44437575340271, + "learning_rate": 5.420000000000001e-06, + "loss": 0.6414, + "step": 275 + }, + { + "epoch": 37.53333333333333, + "grad_norm": 2.729497194290161, + "learning_rate": 5.92e-06, + "loss": 0.6323, + "step": 300 + }, + { + "epoch": 40.666666666666664, + "grad_norm": 2.3852877616882324, + "learning_rate": 6.42e-06, + "loss": 0.6073, + "step": 325 + }, + { + "epoch": 43.8, + "grad_norm": 4.4287109375, + "learning_rate": 6.92e-06, + "loss": 0.6034, + "step": 350 + }, + { + "epoch": 46.93333333333333, + "grad_norm": 2.1653966903686523, + "learning_rate": 7.420000000000001e-06, + "loss": 0.5865, + "step": 375 + }, + { + "epoch": 50.0, + "grad_norm": 2.8120265007019043, + "learning_rate": 7.92e-06, + "loss": 0.5556, + "step": 400 + }, + { + "epoch": 53.13333333333333, + "grad_norm": 2.0973806381225586, + "learning_rate": 8.42e-06, + "loss": 0.5416, + "step": 425 + }, + { + "epoch": 56.266666666666666, + "grad_norm": 2.6723616123199463, + "learning_rate": 8.920000000000001e-06, + "loss": 0.5407, + "step": 450 + }, + { + "epoch": 59.4, + "grad_norm": 2.1810383796691895, + "learning_rate": 9.42e-06, + "loss": 0.5174, + "step": 475 + }, + { + "epoch": 62.53333333333333, + "grad_norm": 3.464071750640869, + "learning_rate": 9.920000000000002e-06, + "loss": 0.5327, + "step": 500 + }, + { + "epoch": 65.66666666666667, + "grad_norm": 3.6148977279663086, + "learning_rate": 9.940000000000001e-06, + "loss": 0.5141, + "step": 525 + }, + { + "epoch": 68.8, + "grad_norm": 2.5631027221679688, + "learning_rate": 9.86857142857143e-06, + "loss": 0.5246, + "step": 550 + }, + { + "epoch": 71.93333333333334, + "grad_norm": 2.058468818664551, + "learning_rate": 9.797142857142858e-06, + "loss": 0.5065, + "step": 575 + }, + { + "epoch": 75.0, + "grad_norm": 1.7559466361999512, + "learning_rate": 9.725714285714287e-06, + "loss": 0.4871, + "step": 600 + }, + { + "epoch": 78.13333333333334, + "grad_norm": 2.653345823287964, + "learning_rate": 9.654285714285716e-06, + "loss": 0.4941, + "step": 625 + }, + { + "epoch": 81.26666666666667, + "grad_norm": 2.612226724624634, + "learning_rate": 9.582857142857143e-06, + "loss": 0.4796, + "step": 650 + }, + { + "epoch": 84.4, + "grad_norm": 1.7446099519729614, + "learning_rate": 9.511428571428572e-06, + "loss": 0.487, + "step": 675 + }, + { + "epoch": 87.53333333333333, + "grad_norm": 2.627315044403076, + "learning_rate": 9.440000000000001e-06, + "loss": 0.4731, + "step": 700 + }, + { + "epoch": 90.66666666666667, + "grad_norm": 2.4315383434295654, + "learning_rate": 9.368571428571428e-06, + "loss": 0.4812, + "step": 725 + }, + { + "epoch": 93.8, + "grad_norm": 2.4056336879730225, + "learning_rate": 9.297142857142857e-06, + "loss": 0.468, + "step": 750 + }, + { + "epoch": 96.93333333333334, + "grad_norm": 2.153116464614868, + "learning_rate": 9.225714285714286e-06, + "loss": 0.4829, + "step": 775 + }, + { + "epoch": 100.0, + "grad_norm": 2.9421756267547607, + "learning_rate": 9.154285714285715e-06, + "loss": 0.4555, + "step": 800 + }, + { + "epoch": 103.13333333333334, + "grad_norm": 1.6771883964538574, + "learning_rate": 9.082857142857143e-06, + "loss": 0.462, + "step": 825 + }, + { + "epoch": 106.26666666666667, + "grad_norm": 2.9711899757385254, + "learning_rate": 9.011428571428572e-06, + "loss": 0.471, + "step": 850 + }, + { + "epoch": 109.4, + "grad_norm": 1.922980546951294, + "learning_rate": 8.94e-06, + "loss": 0.4673, + "step": 875 + }, + { + "epoch": 112.53333333333333, + "grad_norm": 2.49945068359375, + "learning_rate": 8.86857142857143e-06, + "loss": 0.4611, + "step": 900 + }, + { + "epoch": 115.66666666666667, + "grad_norm": 2.646510362625122, + "learning_rate": 8.797142857142857e-06, + "loss": 0.4574, + "step": 925 + }, + { + "epoch": 118.8, + "grad_norm": 1.7943354845046997, + "learning_rate": 8.725714285714286e-06, + "loss": 0.4658, + "step": 950 + }, + { + "epoch": 121.93333333333334, + "grad_norm": 2.171827793121338, + "learning_rate": 8.654285714285715e-06, + "loss": 0.4561, + "step": 975 + }, + { + "epoch": 125.0, + "grad_norm": 7.516489505767822, + "learning_rate": 8.582857142857144e-06, + "loss": 0.4472, + "step": 1000 + }, + { + "epoch": 125.0, + "eval_loss": 0.5154594779014587, + "eval_runtime": 0.7837, + "eval_samples_per_second": 33.175, + "eval_steps_per_second": 5.104, + "step": 1000 + }, + { + "epoch": 128.13333333333333, + "grad_norm": 2.5167343616485596, + "learning_rate": 8.511428571428571e-06, + "loss": 0.457, + "step": 1025 + }, + { + "epoch": 131.26666666666668, + "grad_norm": 3.3089983463287354, + "learning_rate": 8.44e-06, + "loss": 0.4456, + "step": 1050 + }, + { + "epoch": 134.4, + "grad_norm": 2.778348445892334, + "learning_rate": 8.36857142857143e-06, + "loss": 0.4612, + "step": 1075 + }, + { + "epoch": 137.53333333333333, + "grad_norm": 2.529778480529785, + "learning_rate": 8.297142857142859e-06, + "loss": 0.4429, + "step": 1100 + }, + { + "epoch": 140.66666666666666, + "grad_norm": 1.76685631275177, + "learning_rate": 8.225714285714288e-06, + "loss": 0.4399, + "step": 1125 + }, + { + "epoch": 143.8, + "grad_norm": 1.8449666500091553, + "learning_rate": 8.154285714285715e-06, + "loss": 0.4329, + "step": 1150 + }, + { + "epoch": 146.93333333333334, + "grad_norm": 1.9097468852996826, + "learning_rate": 8.082857142857144e-06, + "loss": 0.4527, + "step": 1175 + }, + { + "epoch": 150.0, + "grad_norm": 3.892838716506958, + "learning_rate": 8.011428571428573e-06, + "loss": 0.4448, + "step": 1200 + }, + { + "epoch": 153.13333333333333, + "grad_norm": 2.1518826484680176, + "learning_rate": 7.94e-06, + "loss": 0.4412, + "step": 1225 + }, + { + "epoch": 156.26666666666668, + "grad_norm": 1.5322662591934204, + "learning_rate": 7.86857142857143e-06, + "loss": 0.4388, + "step": 1250 + }, + { + "epoch": 159.4, + "grad_norm": 1.4961107969284058, + "learning_rate": 7.797142857142858e-06, + "loss": 0.4363, + "step": 1275 + }, + { + "epoch": 162.53333333333333, + "grad_norm": 1.8992841243743896, + "learning_rate": 7.725714285714286e-06, + "loss": 0.4474, + "step": 1300 + }, + { + "epoch": 165.66666666666666, + "grad_norm": 1.5015554428100586, + "learning_rate": 7.654285714285715e-06, + "loss": 0.4327, + "step": 1325 + }, + { + "epoch": 168.8, + "grad_norm": 2.0730693340301514, + "learning_rate": 7.5828571428571444e-06, + "loss": 0.4348, + "step": 1350 + }, + { + "epoch": 171.93333333333334, + "grad_norm": 2.0838747024536133, + "learning_rate": 7.511428571428572e-06, + "loss": 0.4393, + "step": 1375 + }, + { + "epoch": 175.0, + "grad_norm": 4.3804030418396, + "learning_rate": 7.440000000000001e-06, + "loss": 0.4386, + "step": 1400 + }, + { + "epoch": 178.13333333333333, + "grad_norm": 1.8927189111709595, + "learning_rate": 7.36857142857143e-06, + "loss": 0.4318, + "step": 1425 + }, + { + "epoch": 181.26666666666668, + "grad_norm": 1.5456620454788208, + "learning_rate": 7.297142857142858e-06, + "loss": 0.4336, + "step": 1450 + }, + { + "epoch": 184.4, + "grad_norm": 2.722612142562866, + "learning_rate": 7.225714285714286e-06, + "loss": 0.4281, + "step": 1475 + }, + { + "epoch": 187.53333333333333, + "grad_norm": 1.9484314918518066, + "learning_rate": 7.154285714285715e-06, + "loss": 0.4312, + "step": 1500 + }, + { + "epoch": 190.66666666666666, + "grad_norm": 2.101043224334717, + "learning_rate": 7.082857142857143e-06, + "loss": 0.427, + "step": 1525 + }, + { + "epoch": 193.8, + "grad_norm": 1.9785490036010742, + "learning_rate": 7.011428571428572e-06, + "loss": 0.4298, + "step": 1550 + }, + { + "epoch": 196.93333333333334, + "grad_norm": 2.319054126739502, + "learning_rate": 6.9400000000000005e-06, + "loss": 0.4376, + "step": 1575 + }, + { + "epoch": 200.0, + "grad_norm": 1.3612741231918335, + "learning_rate": 6.868571428571429e-06, + "loss": 0.4217, + "step": 1600 + }, + { + "epoch": 203.13333333333333, + "grad_norm": 2.128363847732544, + "learning_rate": 6.797142857142858e-06, + "loss": 0.4217, + "step": 1625 + }, + { + "epoch": 206.26666666666668, + "grad_norm": 1.7985234260559082, + "learning_rate": 6.725714285714287e-06, + "loss": 0.4147, + "step": 1650 + }, + { + "epoch": 209.4, + "grad_norm": 1.3478573560714722, + "learning_rate": 6.654285714285716e-06, + "loss": 0.4357, + "step": 1675 + }, + { + "epoch": 212.53333333333333, + "grad_norm": 1.5389248132705688, + "learning_rate": 6.582857142857143e-06, + "loss": 0.419, + "step": 1700 + }, + { + "epoch": 215.66666666666666, + "grad_norm": 1.9558783769607544, + "learning_rate": 6.511428571428572e-06, + "loss": 0.4289, + "step": 1725 + }, + { + "epoch": 218.8, + "grad_norm": 1.756585955619812, + "learning_rate": 6.440000000000001e-06, + "loss": 0.4168, + "step": 1750 + }, + { + "epoch": 221.93333333333334, + "grad_norm": 1.8744903802871704, + "learning_rate": 6.368571428571429e-06, + "loss": 0.4296, + "step": 1775 + }, + { + "epoch": 225.0, + "grad_norm": 1.133415699005127, + "learning_rate": 6.297142857142857e-06, + "loss": 0.4162, + "step": 1800 + }, + { + "epoch": 228.13333333333333, + "grad_norm": 2.819840908050537, + "learning_rate": 6.225714285714286e-06, + "loss": 0.4275, + "step": 1825 + }, + { + "epoch": 231.26666666666668, + "grad_norm": 1.5150210857391357, + "learning_rate": 6.1542857142857145e-06, + "loss": 0.4244, + "step": 1850 + }, + { + "epoch": 234.4, + "grad_norm": 2.184819459915161, + "learning_rate": 6.0828571428571435e-06, + "loss": 0.4282, + "step": 1875 + }, + { + "epoch": 237.53333333333333, + "grad_norm": 3.293454170227051, + "learning_rate": 6.011428571428572e-06, + "loss": 0.4215, + "step": 1900 + }, + { + "epoch": 240.66666666666666, + "grad_norm": 1.210433006286621, + "learning_rate": 5.94e-06, + "loss": 0.4103, + "step": 1925 + }, + { + "epoch": 243.8, + "grad_norm": 2.5027923583984375, + "learning_rate": 5.868571428571429e-06, + "loss": 0.4186, + "step": 1950 + }, + { + "epoch": 246.93333333333334, + "grad_norm": 1.9649789333343506, + "learning_rate": 5.797142857142858e-06, + "loss": 0.427, + "step": 1975 + }, + { + "epoch": 250.0, + "grad_norm": 5.899420261383057, + "learning_rate": 5.725714285714287e-06, + "loss": 0.4113, + "step": 2000 + }, + { + "epoch": 250.0, + "eval_loss": 0.4833647906780243, + "eval_runtime": 0.7095, + "eval_samples_per_second": 36.646, + "eval_steps_per_second": 5.638, + "step": 2000 + }, + { + "epoch": 253.13333333333333, + "grad_norm": 2.0845134258270264, + "learning_rate": 5.654285714285714e-06, + "loss": 0.4168, + "step": 2025 + }, + { + "epoch": 256.26666666666665, + "grad_norm": 1.3729593753814697, + "learning_rate": 5.582857142857143e-06, + "loss": 0.4099, + "step": 2050 + }, + { + "epoch": 259.4, + "grad_norm": 1.8317629098892212, + "learning_rate": 5.511428571428572e-06, + "loss": 0.4136, + "step": 2075 + }, + { + "epoch": 262.53333333333336, + "grad_norm": 1.6238123178482056, + "learning_rate": 5.4400000000000004e-06, + "loss": 0.4204, + "step": 2100 + }, + { + "epoch": 265.6666666666667, + "grad_norm": 1.6968961954116821, + "learning_rate": 5.368571428571429e-06, + "loss": 0.4119, + "step": 2125 + }, + { + "epoch": 268.8, + "grad_norm": 2.1868855953216553, + "learning_rate": 5.297142857142858e-06, + "loss": 0.4114, + "step": 2150 + }, + { + "epoch": 271.93333333333334, + "grad_norm": 1.3070896863937378, + "learning_rate": 5.225714285714286e-06, + "loss": 0.4108, + "step": 2175 + }, + { + "epoch": 275.0, + "grad_norm": 1.977940559387207, + "learning_rate": 5.154285714285715e-06, + "loss": 0.4045, + "step": 2200 + }, + { + "epoch": 278.1333333333333, + "grad_norm": 1.6485978364944458, + "learning_rate": 5.082857142857144e-06, + "loss": 0.4119, + "step": 2225 + }, + { + "epoch": 281.26666666666665, + "grad_norm": 1.9459550380706787, + "learning_rate": 5.011428571428571e-06, + "loss": 0.411, + "step": 2250 + }, + { + "epoch": 284.4, + "grad_norm": 1.5531017780303955, + "learning_rate": 4.94e-06, + "loss": 0.4083, + "step": 2275 + }, + { + "epoch": 287.53333333333336, + "grad_norm": 1.232640027999878, + "learning_rate": 4.868571428571429e-06, + "loss": 0.4121, + "step": 2300 + }, + { + "epoch": 290.6666666666667, + "grad_norm": 7.107569217681885, + "learning_rate": 4.800000000000001e-06, + "loss": 0.4013, + "step": 2325 + }, + { + "epoch": 293.8, + "grad_norm": 1.387934684753418, + "learning_rate": 4.728571428571429e-06, + "loss": 0.4135, + "step": 2350 + }, + { + "epoch": 296.93333333333334, + "grad_norm": 1.8122384548187256, + "learning_rate": 4.657142857142857e-06, + "loss": 0.4025, + "step": 2375 + }, + { + "epoch": 300.0, + "grad_norm": 3.2206528186798096, + "learning_rate": 4.585714285714286e-06, + "loss": 0.4055, + "step": 2400 + }, + { + "epoch": 303.1333333333333, + "grad_norm": 1.6222842931747437, + "learning_rate": 4.514285714285714e-06, + "loss": 0.4125, + "step": 2425 + }, + { + "epoch": 306.26666666666665, + "grad_norm": 1.4375584125518799, + "learning_rate": 4.442857142857143e-06, + "loss": 0.4033, + "step": 2450 + }, + { + "epoch": 309.4, + "grad_norm": 1.173034906387329, + "learning_rate": 4.371428571428572e-06, + "loss": 0.4081, + "step": 2475 + }, + { + "epoch": 312.53333333333336, + "grad_norm": 1.9508713483810425, + "learning_rate": 4.3e-06, + "loss": 0.4126, + "step": 2500 + }, + { + "epoch": 315.6666666666667, + "grad_norm": 1.6111533641815186, + "learning_rate": 4.228571428571429e-06, + "loss": 0.3956, + "step": 2525 + }, + { + "epoch": 318.8, + "grad_norm": 2.0711958408355713, + "learning_rate": 4.1571428571428575e-06, + "loss": 0.4079, + "step": 2550 + }, + { + "epoch": 321.93333333333334, + "grad_norm": 2.312619924545288, + "learning_rate": 4.0857142857142865e-06, + "loss": 0.4172, + "step": 2575 + }, + { + "epoch": 325.0, + "grad_norm": 8.329635620117188, + "learning_rate": 4.014285714285715e-06, + "loss": 0.3956, + "step": 2600 + }, + { + "epoch": 328.1333333333333, + "grad_norm": 6.655773639678955, + "learning_rate": 3.942857142857143e-06, + "loss": 0.3998, + "step": 2625 + }, + { + "epoch": 331.26666666666665, + "grad_norm": 1.7531079053878784, + "learning_rate": 3.871428571428572e-06, + "loss": 0.4023, + "step": 2650 + }, + { + "epoch": 334.4, + "grad_norm": 2.5502614974975586, + "learning_rate": 3.8000000000000005e-06, + "loss": 0.4026, + "step": 2675 + }, + { + "epoch": 337.53333333333336, + "grad_norm": 1.471871256828308, + "learning_rate": 3.7285714285714286e-06, + "loss": 0.3981, + "step": 2700 + }, + { + "epoch": 340.6666666666667, + "grad_norm": 2.094290018081665, + "learning_rate": 3.6571428571428576e-06, + "loss": 0.4006, + "step": 2725 + }, + { + "epoch": 343.8, + "grad_norm": 1.3232810497283936, + "learning_rate": 3.5857142857142862e-06, + "loss": 0.4013, + "step": 2750 + }, + { + "epoch": 346.93333333333334, + "grad_norm": 1.5902683734893799, + "learning_rate": 3.5142857142857144e-06, + "loss": 0.4042, + "step": 2775 + }, + { + "epoch": 350.0, + "grad_norm": 5.186419486999512, + "learning_rate": 3.4428571428571434e-06, + "loss": 0.3843, + "step": 2800 + }, + { + "epoch": 353.1333333333333, + "grad_norm": 2.3405115604400635, + "learning_rate": 3.3714285714285716e-06, + "loss": 0.409, + "step": 2825 + }, + { + "epoch": 356.26666666666665, + "grad_norm": 1.1804980039596558, + "learning_rate": 3.3000000000000006e-06, + "loss": 0.405, + "step": 2850 + }, + { + "epoch": 359.4, + "grad_norm": 1.596712589263916, + "learning_rate": 3.2285714285714288e-06, + "loss": 0.4098, + "step": 2875 + }, + { + "epoch": 362.53333333333336, + "grad_norm": 1.9429064989089966, + "learning_rate": 3.1571428571428573e-06, + "loss": 0.413, + "step": 2900 + }, + { + "epoch": 365.6666666666667, + "grad_norm": 1.3636008501052856, + "learning_rate": 3.085714285714286e-06, + "loss": 0.394, + "step": 2925 + }, + { + "epoch": 368.8, + "grad_norm": 1.2349225282669067, + "learning_rate": 3.0142857142857145e-06, + "loss": 0.3964, + "step": 2950 + }, + { + "epoch": 371.93333333333334, + "grad_norm": 1.3793219327926636, + "learning_rate": 2.9428571428571427e-06, + "loss": 0.3958, + "step": 2975 + }, + { + "epoch": 375.0, + "grad_norm": 7.785330772399902, + "learning_rate": 2.8714285714285717e-06, + "loss": 0.3906, + "step": 3000 + }, + { + "epoch": 375.0, + "eval_loss": 0.46799278259277344, + "eval_runtime": 0.7043, + "eval_samples_per_second": 36.916, + "eval_steps_per_second": 5.679, + "step": 3000 + } + ], + "logging_steps": 25, + "max_steps": 4000, + "num_input_tokens_seen": 0, + "num_train_epochs": 572, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 7721257243235184.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3000/training_args.bin b/checkpoint-3000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a61b2643a9f565814c2aa62a171605debf82a9c0 --- /dev/null +++ b/checkpoint-3000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07e653cd3b83482a0939ee4c8a207df9a996f44ce9dd82197c4ab6cde60cf2bb +size 5432 diff --git a/checkpoint-4000/added_tokens.json b/checkpoint-4000/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cd5b477a9075c49d99de65622db37bb06a251985 --- /dev/null +++ b/checkpoint-4000/added_tokens.json @@ -0,0 +1,4 @@ +{ + "": 80, + "": 79 +} diff --git a/checkpoint-4000/config.json b/checkpoint-4000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..11c8ec6e9cb5a360feb11650e24ba1adc08e8d0e --- /dev/null +++ b/checkpoint-4000/config.json @@ -0,0 +1,91 @@ +{ + "activation_dropout": 0.1, + "apply_spec_augment": true, + "architectures": [ + "SpeechT5ForTextToSpeech" + ], + "attention_dropout": 0.1, + "bos_token_id": 0, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.1, + "decoder_layers": 6, + "decoder_start_token_id": 2, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.1, + "encoder_layers": 12, + "encoder_max_relative_position": 160, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "guided_attention_loss_num_heads": 2, + "guided_attention_loss_scale": 10.0, + "guided_attention_loss_sigma": 0.4, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "is_encoder_decoder": true, + "layer_norm_eps": 1e-05, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": null, + "max_speech_positions": 1876, + "max_text_positions": 600, + "model_type": "speecht5", + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_mel_bins": 80, + "pad_token_id": 1, + "positional_dropout": 0.1, + "reduction_factor": 2, + "scale_embedding": false, + "speaker_embedding_dim": 512, + "speech_decoder_postnet_dropout": 0.5, + "speech_decoder_postnet_kernel": 5, + "speech_decoder_postnet_layers": 5, + "speech_decoder_postnet_units": 256, + "speech_decoder_prenet_dropout": 0.5, + "speech_decoder_prenet_layers": 2, + "speech_decoder_prenet_units": 256, + "torch_dtype": "float32", + "transformers_version": "4.52.0.dev0", + "use_cache": false, + "use_guided_attention_loss": true, + "vocab_size": 81 +} diff --git a/checkpoint-4000/generation_config.json b/checkpoint-4000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3041cc103eccf911a925012f1ad8fd1d12cce149 --- /dev/null +++ b/checkpoint-4000/generation_config.json @@ -0,0 +1,9 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "decoder_start_token_id": 2, + "eos_token_id": 2, + "max_length": 1876, + "pad_token_id": 1, + "transformers_version": "4.52.0.dev0" +} diff --git a/checkpoint-4000/model.safetensors b/checkpoint-4000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..866c230062873c112e9b94191d9412b3e69f198f --- /dev/null +++ b/checkpoint-4000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:055e59911edf63563dfc4431a4301a869cc74effda4ba5c905ace376e831bd5d +size 577789320 diff --git a/checkpoint-4000/optimizer.pt b/checkpoint-4000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c3e45897b3d4795ce914d6ce27c6183b74bb9008 --- /dev/null +++ b/checkpoint-4000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e74b50ecc65d621a0c3fd3c8d0516e0345843175eb2a55af482c69d69da162e7 +size 1155772233 diff --git a/checkpoint-4000/rng_state.pth b/checkpoint-4000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e4d29a09b3287c78394eee4069e702e1fe5b8391 --- /dev/null +++ b/checkpoint-4000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75ff95056306e75aaca85257572ca65ded44b3fc874ae842724682d1ad4067c2 +size 14244 diff --git a/checkpoint-4000/scaler.pt b/checkpoint-4000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9abe9a00c2b45b15cd85258f1807eefeb3a51ead --- /dev/null +++ b/checkpoint-4000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:030f559b5aedef78935dd7632eb67ec4527791e9aca3eb758b902243f597abd2 +size 988 diff --git a/checkpoint-4000/scheduler.pt b/checkpoint-4000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..bbf6940bd360ad866b84117412902669203f8f21 --- /dev/null +++ b/checkpoint-4000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7efbf80617c96c78286826ce59d9a12c86da62d7631874b3d6364a8e993ada60 +size 1064 diff --git a/checkpoint-4000/special_tokens_map.json b/checkpoint-4000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..4ee24ec69861cfc94abbe2c8c934aa0744aa623c --- /dev/null +++ b/checkpoint-4000/special_tokens_map.json @@ -0,0 +1,13 @@ +{ + "bos_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": "" +} diff --git a/checkpoint-4000/spm_char.model b/checkpoint-4000/spm_char.model new file mode 100644 index 0000000000000000000000000000000000000000..8fb73691942626fa75df80b61aab0e9b9340d8e2 --- /dev/null +++ b/checkpoint-4000/spm_char.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fcc48f3e225f627b1641db410ceb0c8649bd2b0c982e150b03f8be3728ab560 +size 238473 diff --git a/checkpoint-4000/tokenizer_config.json b/checkpoint-4000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e88d44ba3be31ac8f53461ae7c1b02b4c5c830ab --- /dev/null +++ b/checkpoint-4000/tokenizer_config.json @@ -0,0 +1,64 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "79": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "80": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_special_tokens": {}, + "mask_token": "", + "model_max_length": 600, + "normalize": false, + "pad_token": "", + "processor_class": "SpeechT5Processor", + "sp_model_kwargs": {}, + "tokenizer_class": "SpeechT5Tokenizer", + "unk_token": "" +} diff --git a/checkpoint-4000/trainer_state.json b/checkpoint-4000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..92418265f5d30b89bb0f9c5fee970afb2c02d295 --- /dev/null +++ b/checkpoint-4000/trainer_state.json @@ -0,0 +1,1186 @@ +{ + "best_global_step": 4000, + "best_metric": 0.4673193097114563, + "best_model_checkpoint": "./speecht5_tts_mabama/checkpoint-4000", + "epoch": 500.0, + "eval_steps": 1000, + "global_step": 4000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 3.1333333333333333, + "grad_norm": 13.092179298400879, + "learning_rate": 4.2000000000000006e-07, + "loss": 1.0978, + "step": 25 + }, + { + "epoch": 6.266666666666667, + "grad_norm": 13.538804054260254, + "learning_rate": 9.200000000000001e-07, + "loss": 1.0057, + "step": 50 + }, + { + "epoch": 9.4, + "grad_norm": 3.8923733234405518, + "learning_rate": 1.42e-06, + "loss": 0.8155, + "step": 75 + }, + { + "epoch": 12.533333333333333, + "grad_norm": 2.569746732711792, + "learning_rate": 1.9200000000000003e-06, + "loss": 0.7921, + "step": 100 + }, + { + "epoch": 15.666666666666666, + "grad_norm": 2.390493631362915, + "learning_rate": 2.42e-06, + "loss": 0.7531, + "step": 125 + }, + { + "epoch": 18.8, + "grad_norm": 2.7168779373168945, + "learning_rate": 2.92e-06, + "loss": 0.7393, + "step": 150 + }, + { + "epoch": 21.933333333333334, + "grad_norm": 10.27633285522461, + "learning_rate": 3.4200000000000007e-06, + "loss": 0.7292, + "step": 175 + }, + { + "epoch": 25.0, + "grad_norm": 5.6921000480651855, + "learning_rate": 3.920000000000001e-06, + "loss": 0.6642, + "step": 200 + }, + { + "epoch": 28.133333333333333, + "grad_norm": 2.6206777095794678, + "learning_rate": 4.42e-06, + "loss": 0.6555, + "step": 225 + }, + { + "epoch": 31.266666666666666, + "grad_norm": 1.9396028518676758, + "learning_rate": 4.92e-06, + "loss": 0.6484, + "step": 250 + }, + { + "epoch": 34.4, + "grad_norm": 3.44437575340271, + "learning_rate": 5.420000000000001e-06, + "loss": 0.6414, + "step": 275 + }, + { + "epoch": 37.53333333333333, + "grad_norm": 2.729497194290161, + "learning_rate": 5.92e-06, + "loss": 0.6323, + "step": 300 + }, + { + "epoch": 40.666666666666664, + "grad_norm": 2.3852877616882324, + "learning_rate": 6.42e-06, + "loss": 0.6073, + "step": 325 + }, + { + "epoch": 43.8, + "grad_norm": 4.4287109375, + "learning_rate": 6.92e-06, + "loss": 0.6034, + "step": 350 + }, + { + "epoch": 46.93333333333333, + "grad_norm": 2.1653966903686523, + "learning_rate": 7.420000000000001e-06, + "loss": 0.5865, + "step": 375 + }, + { + "epoch": 50.0, + "grad_norm": 2.8120265007019043, + "learning_rate": 7.92e-06, + "loss": 0.5556, + "step": 400 + }, + { + "epoch": 53.13333333333333, + "grad_norm": 2.0973806381225586, + "learning_rate": 8.42e-06, + "loss": 0.5416, + "step": 425 + }, + { + "epoch": 56.266666666666666, + "grad_norm": 2.6723616123199463, + "learning_rate": 8.920000000000001e-06, + "loss": 0.5407, + "step": 450 + }, + { + "epoch": 59.4, + "grad_norm": 2.1810383796691895, + "learning_rate": 9.42e-06, + "loss": 0.5174, + "step": 475 + }, + { + "epoch": 62.53333333333333, + "grad_norm": 3.464071750640869, + "learning_rate": 9.920000000000002e-06, + "loss": 0.5327, + "step": 500 + }, + { + "epoch": 65.66666666666667, + "grad_norm": 3.6148977279663086, + "learning_rate": 9.940000000000001e-06, + "loss": 0.5141, + "step": 525 + }, + { + "epoch": 68.8, + "grad_norm": 2.5631027221679688, + "learning_rate": 9.86857142857143e-06, + "loss": 0.5246, + "step": 550 + }, + { + "epoch": 71.93333333333334, + "grad_norm": 2.058468818664551, + "learning_rate": 9.797142857142858e-06, + "loss": 0.5065, + "step": 575 + }, + { + "epoch": 75.0, + "grad_norm": 1.7559466361999512, + "learning_rate": 9.725714285714287e-06, + "loss": 0.4871, + "step": 600 + }, + { + "epoch": 78.13333333333334, + "grad_norm": 2.653345823287964, + "learning_rate": 9.654285714285716e-06, + "loss": 0.4941, + "step": 625 + }, + { + "epoch": 81.26666666666667, + "grad_norm": 2.612226724624634, + "learning_rate": 9.582857142857143e-06, + "loss": 0.4796, + "step": 650 + }, + { + "epoch": 84.4, + "grad_norm": 1.7446099519729614, + "learning_rate": 9.511428571428572e-06, + "loss": 0.487, + "step": 675 + }, + { + "epoch": 87.53333333333333, + "grad_norm": 2.627315044403076, + "learning_rate": 9.440000000000001e-06, + "loss": 0.4731, + "step": 700 + }, + { + "epoch": 90.66666666666667, + "grad_norm": 2.4315383434295654, + "learning_rate": 9.368571428571428e-06, + "loss": 0.4812, + "step": 725 + }, + { + "epoch": 93.8, + "grad_norm": 2.4056336879730225, + "learning_rate": 9.297142857142857e-06, + "loss": 0.468, + "step": 750 + }, + { + "epoch": 96.93333333333334, + "grad_norm": 2.153116464614868, + "learning_rate": 9.225714285714286e-06, + "loss": 0.4829, + "step": 775 + }, + { + "epoch": 100.0, + "grad_norm": 2.9421756267547607, + "learning_rate": 9.154285714285715e-06, + "loss": 0.4555, + "step": 800 + }, + { + "epoch": 103.13333333333334, + "grad_norm": 1.6771883964538574, + "learning_rate": 9.082857142857143e-06, + "loss": 0.462, + "step": 825 + }, + { + "epoch": 106.26666666666667, + "grad_norm": 2.9711899757385254, + "learning_rate": 9.011428571428572e-06, + "loss": 0.471, + "step": 850 + }, + { + "epoch": 109.4, + "grad_norm": 1.922980546951294, + "learning_rate": 8.94e-06, + "loss": 0.4673, + "step": 875 + }, + { + "epoch": 112.53333333333333, + "grad_norm": 2.49945068359375, + "learning_rate": 8.86857142857143e-06, + "loss": 0.4611, + "step": 900 + }, + { + "epoch": 115.66666666666667, + "grad_norm": 2.646510362625122, + "learning_rate": 8.797142857142857e-06, + "loss": 0.4574, + "step": 925 + }, + { + "epoch": 118.8, + "grad_norm": 1.7943354845046997, + "learning_rate": 8.725714285714286e-06, + "loss": 0.4658, + "step": 950 + }, + { + "epoch": 121.93333333333334, + "grad_norm": 2.171827793121338, + "learning_rate": 8.654285714285715e-06, + "loss": 0.4561, + "step": 975 + }, + { + "epoch": 125.0, + "grad_norm": 7.516489505767822, + "learning_rate": 8.582857142857144e-06, + "loss": 0.4472, + "step": 1000 + }, + { + "epoch": 125.0, + "eval_loss": 0.5154594779014587, + "eval_runtime": 0.7837, + "eval_samples_per_second": 33.175, + "eval_steps_per_second": 5.104, + "step": 1000 + }, + { + "epoch": 128.13333333333333, + "grad_norm": 2.5167343616485596, + "learning_rate": 8.511428571428571e-06, + "loss": 0.457, + "step": 1025 + }, + { + "epoch": 131.26666666666668, + "grad_norm": 3.3089983463287354, + "learning_rate": 8.44e-06, + "loss": 0.4456, + "step": 1050 + }, + { + "epoch": 134.4, + "grad_norm": 2.778348445892334, + "learning_rate": 8.36857142857143e-06, + "loss": 0.4612, + "step": 1075 + }, + { + "epoch": 137.53333333333333, + "grad_norm": 2.529778480529785, + "learning_rate": 8.297142857142859e-06, + "loss": 0.4429, + "step": 1100 + }, + { + "epoch": 140.66666666666666, + "grad_norm": 1.76685631275177, + "learning_rate": 8.225714285714288e-06, + "loss": 0.4399, + "step": 1125 + }, + { + "epoch": 143.8, + "grad_norm": 1.8449666500091553, + "learning_rate": 8.154285714285715e-06, + "loss": 0.4329, + "step": 1150 + }, + { + "epoch": 146.93333333333334, + "grad_norm": 1.9097468852996826, + "learning_rate": 8.082857142857144e-06, + "loss": 0.4527, + "step": 1175 + }, + { + "epoch": 150.0, + "grad_norm": 3.892838716506958, + "learning_rate": 8.011428571428573e-06, + "loss": 0.4448, + "step": 1200 + }, + { + "epoch": 153.13333333333333, + "grad_norm": 2.1518826484680176, + "learning_rate": 7.94e-06, + "loss": 0.4412, + "step": 1225 + }, + { + "epoch": 156.26666666666668, + "grad_norm": 1.5322662591934204, + "learning_rate": 7.86857142857143e-06, + "loss": 0.4388, + "step": 1250 + }, + { + "epoch": 159.4, + "grad_norm": 1.4961107969284058, + "learning_rate": 7.797142857142858e-06, + "loss": 0.4363, + "step": 1275 + }, + { + "epoch": 162.53333333333333, + "grad_norm": 1.8992841243743896, + "learning_rate": 7.725714285714286e-06, + "loss": 0.4474, + "step": 1300 + }, + { + "epoch": 165.66666666666666, + "grad_norm": 1.5015554428100586, + "learning_rate": 7.654285714285715e-06, + "loss": 0.4327, + "step": 1325 + }, + { + "epoch": 168.8, + "grad_norm": 2.0730693340301514, + "learning_rate": 7.5828571428571444e-06, + "loss": 0.4348, + "step": 1350 + }, + { + "epoch": 171.93333333333334, + "grad_norm": 2.0838747024536133, + "learning_rate": 7.511428571428572e-06, + "loss": 0.4393, + "step": 1375 + }, + { + "epoch": 175.0, + "grad_norm": 4.3804030418396, + "learning_rate": 7.440000000000001e-06, + "loss": 0.4386, + "step": 1400 + }, + { + "epoch": 178.13333333333333, + "grad_norm": 1.8927189111709595, + "learning_rate": 7.36857142857143e-06, + "loss": 0.4318, + "step": 1425 + }, + { + "epoch": 181.26666666666668, + "grad_norm": 1.5456620454788208, + "learning_rate": 7.297142857142858e-06, + "loss": 0.4336, + "step": 1450 + }, + { + "epoch": 184.4, + "grad_norm": 2.722612142562866, + "learning_rate": 7.225714285714286e-06, + "loss": 0.4281, + "step": 1475 + }, + { + "epoch": 187.53333333333333, + "grad_norm": 1.9484314918518066, + "learning_rate": 7.154285714285715e-06, + "loss": 0.4312, + "step": 1500 + }, + { + "epoch": 190.66666666666666, + "grad_norm": 2.101043224334717, + "learning_rate": 7.082857142857143e-06, + "loss": 0.427, + "step": 1525 + }, + { + "epoch": 193.8, + "grad_norm": 1.9785490036010742, + "learning_rate": 7.011428571428572e-06, + "loss": 0.4298, + "step": 1550 + }, + { + "epoch": 196.93333333333334, + "grad_norm": 2.319054126739502, + "learning_rate": 6.9400000000000005e-06, + "loss": 0.4376, + "step": 1575 + }, + { + "epoch": 200.0, + "grad_norm": 1.3612741231918335, + "learning_rate": 6.868571428571429e-06, + "loss": 0.4217, + "step": 1600 + }, + { + "epoch": 203.13333333333333, + "grad_norm": 2.128363847732544, + "learning_rate": 6.797142857142858e-06, + "loss": 0.4217, + "step": 1625 + }, + { + "epoch": 206.26666666666668, + "grad_norm": 1.7985234260559082, + "learning_rate": 6.725714285714287e-06, + "loss": 0.4147, + "step": 1650 + }, + { + "epoch": 209.4, + "grad_norm": 1.3478573560714722, + "learning_rate": 6.654285714285716e-06, + "loss": 0.4357, + "step": 1675 + }, + { + "epoch": 212.53333333333333, + "grad_norm": 1.5389248132705688, + "learning_rate": 6.582857142857143e-06, + "loss": 0.419, + "step": 1700 + }, + { + "epoch": 215.66666666666666, + "grad_norm": 1.9558783769607544, + "learning_rate": 6.511428571428572e-06, + "loss": 0.4289, + "step": 1725 + }, + { + "epoch": 218.8, + "grad_norm": 1.756585955619812, + "learning_rate": 6.440000000000001e-06, + "loss": 0.4168, + "step": 1750 + }, + { + "epoch": 221.93333333333334, + "grad_norm": 1.8744903802871704, + "learning_rate": 6.368571428571429e-06, + "loss": 0.4296, + "step": 1775 + }, + { + "epoch": 225.0, + "grad_norm": 1.133415699005127, + "learning_rate": 6.297142857142857e-06, + "loss": 0.4162, + "step": 1800 + }, + { + "epoch": 228.13333333333333, + "grad_norm": 2.819840908050537, + "learning_rate": 6.225714285714286e-06, + "loss": 0.4275, + "step": 1825 + }, + { + "epoch": 231.26666666666668, + "grad_norm": 1.5150210857391357, + "learning_rate": 6.1542857142857145e-06, + "loss": 0.4244, + "step": 1850 + }, + { + "epoch": 234.4, + "grad_norm": 2.184819459915161, + "learning_rate": 6.0828571428571435e-06, + "loss": 0.4282, + "step": 1875 + }, + { + "epoch": 237.53333333333333, + "grad_norm": 3.293454170227051, + "learning_rate": 6.011428571428572e-06, + "loss": 0.4215, + "step": 1900 + }, + { + "epoch": 240.66666666666666, + "grad_norm": 1.210433006286621, + "learning_rate": 5.94e-06, + "loss": 0.4103, + "step": 1925 + }, + { + "epoch": 243.8, + "grad_norm": 2.5027923583984375, + "learning_rate": 5.868571428571429e-06, + "loss": 0.4186, + "step": 1950 + }, + { + "epoch": 246.93333333333334, + "grad_norm": 1.9649789333343506, + "learning_rate": 5.797142857142858e-06, + "loss": 0.427, + "step": 1975 + }, + { + "epoch": 250.0, + "grad_norm": 5.899420261383057, + "learning_rate": 5.725714285714287e-06, + "loss": 0.4113, + "step": 2000 + }, + { + "epoch": 250.0, + "eval_loss": 0.4833647906780243, + "eval_runtime": 0.7095, + "eval_samples_per_second": 36.646, + "eval_steps_per_second": 5.638, + "step": 2000 + }, + { + "epoch": 253.13333333333333, + "grad_norm": 2.0845134258270264, + "learning_rate": 5.654285714285714e-06, + "loss": 0.4168, + "step": 2025 + }, + { + "epoch": 256.26666666666665, + "grad_norm": 1.3729593753814697, + "learning_rate": 5.582857142857143e-06, + "loss": 0.4099, + "step": 2050 + }, + { + "epoch": 259.4, + "grad_norm": 1.8317629098892212, + "learning_rate": 5.511428571428572e-06, + "loss": 0.4136, + "step": 2075 + }, + { + "epoch": 262.53333333333336, + "grad_norm": 1.6238123178482056, + "learning_rate": 5.4400000000000004e-06, + "loss": 0.4204, + "step": 2100 + }, + { + "epoch": 265.6666666666667, + "grad_norm": 1.6968961954116821, + "learning_rate": 5.368571428571429e-06, + "loss": 0.4119, + "step": 2125 + }, + { + "epoch": 268.8, + "grad_norm": 2.1868855953216553, + "learning_rate": 5.297142857142858e-06, + "loss": 0.4114, + "step": 2150 + }, + { + "epoch": 271.93333333333334, + "grad_norm": 1.3070896863937378, + "learning_rate": 5.225714285714286e-06, + "loss": 0.4108, + "step": 2175 + }, + { + "epoch": 275.0, + "grad_norm": 1.977940559387207, + "learning_rate": 5.154285714285715e-06, + "loss": 0.4045, + "step": 2200 + }, + { + "epoch": 278.1333333333333, + "grad_norm": 1.6485978364944458, + "learning_rate": 5.082857142857144e-06, + "loss": 0.4119, + "step": 2225 + }, + { + "epoch": 281.26666666666665, + "grad_norm": 1.9459550380706787, + "learning_rate": 5.011428571428571e-06, + "loss": 0.411, + "step": 2250 + }, + { + "epoch": 284.4, + "grad_norm": 1.5531017780303955, + "learning_rate": 4.94e-06, + "loss": 0.4083, + "step": 2275 + }, + { + "epoch": 287.53333333333336, + "grad_norm": 1.232640027999878, + "learning_rate": 4.868571428571429e-06, + "loss": 0.4121, + "step": 2300 + }, + { + "epoch": 290.6666666666667, + "grad_norm": 7.107569217681885, + "learning_rate": 4.800000000000001e-06, + "loss": 0.4013, + "step": 2325 + }, + { + "epoch": 293.8, + "grad_norm": 1.387934684753418, + "learning_rate": 4.728571428571429e-06, + "loss": 0.4135, + "step": 2350 + }, + { + "epoch": 296.93333333333334, + "grad_norm": 1.8122384548187256, + "learning_rate": 4.657142857142857e-06, + "loss": 0.4025, + "step": 2375 + }, + { + "epoch": 300.0, + "grad_norm": 3.2206528186798096, + "learning_rate": 4.585714285714286e-06, + "loss": 0.4055, + "step": 2400 + }, + { + "epoch": 303.1333333333333, + "grad_norm": 1.6222842931747437, + "learning_rate": 4.514285714285714e-06, + "loss": 0.4125, + "step": 2425 + }, + { + "epoch": 306.26666666666665, + "grad_norm": 1.4375584125518799, + "learning_rate": 4.442857142857143e-06, + "loss": 0.4033, + "step": 2450 + }, + { + "epoch": 309.4, + "grad_norm": 1.173034906387329, + "learning_rate": 4.371428571428572e-06, + "loss": 0.4081, + "step": 2475 + }, + { + "epoch": 312.53333333333336, + "grad_norm": 1.9508713483810425, + "learning_rate": 4.3e-06, + "loss": 0.4126, + "step": 2500 + }, + { + "epoch": 315.6666666666667, + "grad_norm": 1.6111533641815186, + "learning_rate": 4.228571428571429e-06, + "loss": 0.3956, + "step": 2525 + }, + { + "epoch": 318.8, + "grad_norm": 2.0711958408355713, + "learning_rate": 4.1571428571428575e-06, + "loss": 0.4079, + "step": 2550 + }, + { + "epoch": 321.93333333333334, + "grad_norm": 2.312619924545288, + "learning_rate": 4.0857142857142865e-06, + "loss": 0.4172, + "step": 2575 + }, + { + "epoch": 325.0, + "grad_norm": 8.329635620117188, + "learning_rate": 4.014285714285715e-06, + "loss": 0.3956, + "step": 2600 + }, + { + "epoch": 328.1333333333333, + "grad_norm": 6.655773639678955, + "learning_rate": 3.942857142857143e-06, + "loss": 0.3998, + "step": 2625 + }, + { + "epoch": 331.26666666666665, + "grad_norm": 1.7531079053878784, + "learning_rate": 3.871428571428572e-06, + "loss": 0.4023, + "step": 2650 + }, + { + "epoch": 334.4, + "grad_norm": 2.5502614974975586, + "learning_rate": 3.8000000000000005e-06, + "loss": 0.4026, + "step": 2675 + }, + { + "epoch": 337.53333333333336, + "grad_norm": 1.471871256828308, + "learning_rate": 3.7285714285714286e-06, + "loss": 0.3981, + "step": 2700 + }, + { + "epoch": 340.6666666666667, + "grad_norm": 2.094290018081665, + "learning_rate": 3.6571428571428576e-06, + "loss": 0.4006, + "step": 2725 + }, + { + "epoch": 343.8, + "grad_norm": 1.3232810497283936, + "learning_rate": 3.5857142857142862e-06, + "loss": 0.4013, + "step": 2750 + }, + { + "epoch": 346.93333333333334, + "grad_norm": 1.5902683734893799, + "learning_rate": 3.5142857142857144e-06, + "loss": 0.4042, + "step": 2775 + }, + { + "epoch": 350.0, + "grad_norm": 5.186419486999512, + "learning_rate": 3.4428571428571434e-06, + "loss": 0.3843, + "step": 2800 + }, + { + "epoch": 353.1333333333333, + "grad_norm": 2.3405115604400635, + "learning_rate": 3.3714285714285716e-06, + "loss": 0.409, + "step": 2825 + }, + { + "epoch": 356.26666666666665, + "grad_norm": 1.1804980039596558, + "learning_rate": 3.3000000000000006e-06, + "loss": 0.405, + "step": 2850 + }, + { + "epoch": 359.4, + "grad_norm": 1.596712589263916, + "learning_rate": 3.2285714285714288e-06, + "loss": 0.4098, + "step": 2875 + }, + { + "epoch": 362.53333333333336, + "grad_norm": 1.9429064989089966, + "learning_rate": 3.1571428571428573e-06, + "loss": 0.413, + "step": 2900 + }, + { + "epoch": 365.6666666666667, + "grad_norm": 1.3636008501052856, + "learning_rate": 3.085714285714286e-06, + "loss": 0.394, + "step": 2925 + }, + { + "epoch": 368.8, + "grad_norm": 1.2349225282669067, + "learning_rate": 3.0142857142857145e-06, + "loss": 0.3964, + "step": 2950 + }, + { + "epoch": 371.93333333333334, + "grad_norm": 1.3793219327926636, + "learning_rate": 2.9428571428571427e-06, + "loss": 0.3958, + "step": 2975 + }, + { + "epoch": 375.0, + "grad_norm": 7.785330772399902, + "learning_rate": 2.8714285714285717e-06, + "loss": 0.3906, + "step": 3000 + }, + { + "epoch": 375.0, + "eval_loss": 0.46799278259277344, + "eval_runtime": 0.7043, + "eval_samples_per_second": 36.916, + "eval_steps_per_second": 5.679, + "step": 3000 + }, + { + "epoch": 378.1333333333333, + "grad_norm": 1.4927845001220703, + "learning_rate": 2.8000000000000003e-06, + "loss": 0.4, + "step": 3025 + }, + { + "epoch": 381.26666666666665, + "grad_norm": 1.2291827201843262, + "learning_rate": 2.728571428571429e-06, + "loss": 0.3989, + "step": 3050 + }, + { + "epoch": 384.4, + "grad_norm": 1.579060673713684, + "learning_rate": 2.6571428571428575e-06, + "loss": 0.3999, + "step": 3075 + }, + { + "epoch": 387.53333333333336, + "grad_norm": 1.6574435234069824, + "learning_rate": 2.5857142857142856e-06, + "loss": 0.388, + "step": 3100 + }, + { + "epoch": 390.6666666666667, + "grad_norm": 1.3004297018051147, + "learning_rate": 2.5142857142857147e-06, + "loss": 0.3921, + "step": 3125 + }, + { + "epoch": 393.8, + "grad_norm": 1.097733497619629, + "learning_rate": 2.442857142857143e-06, + "loss": 0.389, + "step": 3150 + }, + { + "epoch": 396.93333333333334, + "grad_norm": 2.4902069568634033, + "learning_rate": 2.371428571428572e-06, + "loss": 0.3969, + "step": 3175 + }, + { + "epoch": 400.0, + "grad_norm": 2.962905168533325, + "learning_rate": 2.3000000000000004e-06, + "loss": 0.3915, + "step": 3200 + }, + { + "epoch": 403.1333333333333, + "grad_norm": 1.7403258085250854, + "learning_rate": 2.228571428571429e-06, + "loss": 0.3917, + "step": 3225 + }, + { + "epoch": 406.26666666666665, + "grad_norm": 1.8885612487792969, + "learning_rate": 2.157142857142857e-06, + "loss": 0.3994, + "step": 3250 + }, + { + "epoch": 409.4, + "grad_norm": 1.08004891872406, + "learning_rate": 2.0857142857142858e-06, + "loss": 0.3957, + "step": 3275 + }, + { + "epoch": 412.53333333333336, + "grad_norm": 1.0919499397277832, + "learning_rate": 2.0142857142857144e-06, + "loss": 0.4011, + "step": 3300 + }, + { + "epoch": 415.6666666666667, + "grad_norm": 1.176896095275879, + "learning_rate": 1.942857142857143e-06, + "loss": 0.4051, + "step": 3325 + }, + { + "epoch": 418.8, + "grad_norm": 1.5768754482269287, + "learning_rate": 1.8714285714285715e-06, + "loss": 0.3922, + "step": 3350 + }, + { + "epoch": 421.93333333333334, + "grad_norm": 1.541318416595459, + "learning_rate": 1.8000000000000001e-06, + "loss": 0.3921, + "step": 3375 + }, + { + "epoch": 425.0, + "grad_norm": 2.7317168712615967, + "learning_rate": 1.7285714285714287e-06, + "loss": 0.3867, + "step": 3400 + }, + { + "epoch": 428.1333333333333, + "grad_norm": 1.5212819576263428, + "learning_rate": 1.657142857142857e-06, + "loss": 0.3994, + "step": 3425 + }, + { + "epoch": 431.26666666666665, + "grad_norm": 1.6700373888015747, + "learning_rate": 1.5857142857142857e-06, + "loss": 0.404, + "step": 3450 + }, + { + "epoch": 434.4, + "grad_norm": 1.2883983850479126, + "learning_rate": 1.5142857142857145e-06, + "loss": 0.383, + "step": 3475 + }, + { + "epoch": 437.53333333333336, + "grad_norm": 1.3779336214065552, + "learning_rate": 1.442857142857143e-06, + "loss": 0.3913, + "step": 3500 + }, + { + "epoch": 440.6666666666667, + "grad_norm": 3.994981050491333, + "learning_rate": 1.3714285714285717e-06, + "loss": 0.3936, + "step": 3525 + }, + { + "epoch": 443.8, + "grad_norm": 1.1143438816070557, + "learning_rate": 1.3e-06, + "loss": 0.389, + "step": 3550 + }, + { + "epoch": 446.93333333333334, + "grad_norm": 1.7732421159744263, + "learning_rate": 1.2285714285714286e-06, + "loss": 0.3996, + "step": 3575 + }, + { + "epoch": 450.0, + "grad_norm": 2.198235273361206, + "learning_rate": 1.1571428571428572e-06, + "loss": 0.383, + "step": 3600 + }, + { + "epoch": 453.1333333333333, + "grad_norm": 1.0842177867889404, + "learning_rate": 1.0857142857142858e-06, + "loss": 0.3959, + "step": 3625 + }, + { + "epoch": 456.26666666666665, + "grad_norm": 1.633346676826477, + "learning_rate": 1.0142857142857144e-06, + "loss": 0.3954, + "step": 3650 + }, + { + "epoch": 459.4, + "grad_norm": 1.3174960613250732, + "learning_rate": 9.42857142857143e-07, + "loss": 0.3921, + "step": 3675 + }, + { + "epoch": 462.53333333333336, + "grad_norm": 5.679307460784912, + "learning_rate": 8.714285714285716e-07, + "loss": 0.3967, + "step": 3700 + }, + { + "epoch": 465.6666666666667, + "grad_norm": 1.3262453079223633, + "learning_rate": 8.000000000000001e-07, + "loss": 0.389, + "step": 3725 + }, + { + "epoch": 468.8, + "grad_norm": 2.5061025619506836, + "learning_rate": 7.285714285714287e-07, + "loss": 0.4017, + "step": 3750 + }, + { + "epoch": 471.93333333333334, + "grad_norm": 9.512356758117676, + "learning_rate": 6.571428571428571e-07, + "loss": 0.3969, + "step": 3775 + }, + { + "epoch": 475.0, + "grad_norm": 2.8925583362579346, + "learning_rate": 5.857142857142857e-07, + "loss": 0.3851, + "step": 3800 + }, + { + "epoch": 478.1333333333333, + "grad_norm": 1.8175698518753052, + "learning_rate": 5.142857142857143e-07, + "loss": 0.3964, + "step": 3825 + }, + { + "epoch": 481.26666666666665, + "grad_norm": 1.5850640535354614, + "learning_rate": 4.4285714285714286e-07, + "loss": 0.3932, + "step": 3850 + }, + { + "epoch": 484.4, + "grad_norm": 1.6796472072601318, + "learning_rate": 3.7142857142857145e-07, + "loss": 0.3965, + "step": 3875 + }, + { + "epoch": 487.53333333333336, + "grad_norm": 1.5245673656463623, + "learning_rate": 3.0000000000000004e-07, + "loss": 0.3889, + "step": 3900 + }, + { + "epoch": 490.6666666666667, + "grad_norm": 1.3968061208724976, + "learning_rate": 2.285714285714286e-07, + "loss": 0.3939, + "step": 3925 + }, + { + "epoch": 493.8, + "grad_norm": 1.4190447330474854, + "learning_rate": 1.5714285714285717e-07, + "loss": 0.3888, + "step": 3950 + }, + { + "epoch": 496.93333333333334, + "grad_norm": 1.2263931035995483, + "learning_rate": 8.571428571428573e-08, + "loss": 0.399, + "step": 3975 + }, + { + "epoch": 500.0, + "grad_norm": 7.713298797607422, + "learning_rate": 1.4285714285714288e-08, + "loss": 0.3902, + "step": 4000 + }, + { + "epoch": 500.0, + "eval_loss": 0.4673193097114563, + "eval_runtime": 0.7262, + "eval_samples_per_second": 35.803, + "eval_steps_per_second": 5.508, + "step": 4000 + } + ], + "logging_steps": 25, + "max_steps": 4000, + "num_input_tokens_seen": 0, + "num_train_epochs": 572, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.029284269051896e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-4000/training_args.bin b/checkpoint-4000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a61b2643a9f565814c2aa62a171605debf82a9c0 --- /dev/null +++ b/checkpoint-4000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07e653cd3b83482a0939ee4c8a207df9a996f44ce9dd82197c4ab6cde60cf2bb +size 5432 diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..11c8ec6e9cb5a360feb11650e24ba1adc08e8d0e --- /dev/null +++ b/config.json @@ -0,0 +1,91 @@ +{ + "activation_dropout": 0.1, + "apply_spec_augment": true, + "architectures": [ + "SpeechT5ForTextToSpeech" + ], + "attention_dropout": 0.1, + "bos_token_id": 0, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.1, + "decoder_layers": 6, + "decoder_start_token_id": 2, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.1, + "encoder_layers": 12, + "encoder_max_relative_position": 160, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "guided_attention_loss_num_heads": 2, + "guided_attention_loss_scale": 10.0, + "guided_attention_loss_sigma": 0.4, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "is_encoder_decoder": true, + "layer_norm_eps": 1e-05, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": null, + "max_speech_positions": 1876, + "max_text_positions": 600, + "model_type": "speecht5", + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_mel_bins": 80, + "pad_token_id": 1, + "positional_dropout": 0.1, + "reduction_factor": 2, + "scale_embedding": false, + "speaker_embedding_dim": 512, + "speech_decoder_postnet_dropout": 0.5, + "speech_decoder_postnet_kernel": 5, + "speech_decoder_postnet_layers": 5, + "speech_decoder_postnet_units": 256, + "speech_decoder_prenet_dropout": 0.5, + "speech_decoder_prenet_layers": 2, + "speech_decoder_prenet_units": 256, + "torch_dtype": "float32", + "transformers_version": "4.52.0.dev0", + "use_cache": false, + "use_guided_attention_loss": true, + "vocab_size": 81 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3041cc103eccf911a925012f1ad8fd1d12cce149 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,9 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "decoder_start_token_id": 2, + "eos_token_id": 2, + "max_length": 1876, + "pad_token_id": 1, + "transformers_version": "4.52.0.dev0" +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..866c230062873c112e9b94191d9412b3e69f198f --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:055e59911edf63563dfc4431a4301a869cc74effda4ba5c905ace376e831bd5d +size 577789320 diff --git a/runs/Apr21_05-30-54_9a6e619cb7d1/events.out.tfevents.1745213478.9a6e619cb7d1.455.0 b/runs/Apr21_05-30-54_9a6e619cb7d1/events.out.tfevents.1745213478.9a6e619cb7d1.455.0 new file mode 100644 index 0000000000000000000000000000000000000000..a8684a224ed518b4f1779c1a558537cb1728267d --- /dev/null +++ b/runs/Apr21_05-30-54_9a6e619cb7d1/events.out.tfevents.1745213478.9a6e619cb7d1.455.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8077436aa583fddfe2ef4a048c8ca21d368355fa2c74a1e17aba4c226d4dbd03 +size 41743 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..4ee24ec69861cfc94abbe2c8c934aa0744aa623c --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,13 @@ +{ + "bos_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": "" +} diff --git a/spm_char.model b/spm_char.model new file mode 100644 index 0000000000000000000000000000000000000000..8fb73691942626fa75df80b61aab0e9b9340d8e2 --- /dev/null +++ b/spm_char.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fcc48f3e225f627b1641db410ceb0c8649bd2b0c982e150b03f8be3728ab560 +size 238473 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e88d44ba3be31ac8f53461ae7c1b02b4c5c830ab --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,64 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "79": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "80": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_special_tokens": {}, + "mask_token": "", + "model_max_length": 600, + "normalize": false, + "pad_token": "", + "processor_class": "SpeechT5Processor", + "sp_model_kwargs": {}, + "tokenizer_class": "SpeechT5Tokenizer", + "unk_token": "" +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a61b2643a9f565814c2aa62a171605debf82a9c0 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07e653cd3b83482a0939ee4c8a207df9a996f44ce9dd82197c4ab6cde60cf2bb +size 5432