{ "data_config": { "training_files": { "LJS": { "basedir": "filelists/", "audiodir": "wavs", "filelist": "3speakers_ukrainian_train_filelist_dc.txt", "lmdbpath": "" } }, "validation_files": { "LJS": { "basedir": "filelists/", "audiodir": "wavs", "filelist": "3speakers_ukrainian_val_filelist_dc.txt", "lmdbpath": "" } }, "dur_min": 0.1, "dur_max": 10.2, "sampling_rate": 22050, "filter_length": 1024, "hop_length": 256, "win_length": 1024, "n_mel_channels": 80, "mel_fmin": 0.0, "mel_fmax": 8000.0, "f0_min": 80.0, "f0_max": 640.0, "max_wav_value": 32768.0, "use_f0": true, "use_log_f0": 0, "use_energy_avg": true, "use_scaled_energy": true, "symbol_set": "ukrainian", "cleaner_names": [ "ukrainian_cleaners" ], "heteronyms_path": "tts_text_processing/heteronyms", "phoneme_dict_path": "tts_text_processing/cmudict-0.7b", "p_phoneme": 0.0, "handle_phoneme": "word", "handle_phoneme_ambiguous": "ignore", "include_speakers": null, "n_frames": -1, "betabinom_cache_path": "/home/dmytro_chaplinsky/RAD-TTS/radtts-code/cache", "lmdb_cache_path": "", "use_attn_prior_masking": true, "prepend_space_to_text": true, "append_space_to_text": true, "add_bos_eos_to_text": false, "betabinom_scaling_factor": 1.0, "distance_tx_unvoiced": false, "mel_noise_scale": 0.0 }, "model_config": { "n_speakers": 3, "n_speaker_dim": 16, "n_text": 185, "n_text_dim": 512, "n_flows": 8, "n_conv_layers_per_step": 4, "n_mel_channels": 80, "n_hidden": 1024, "mel_encoder_n_hidden": 512, "dummy_speaker_embedding": false, "n_early_size": 2, "n_early_every": 2, "n_group_size": 2, "affine_model": "wavenet", "include_modules": "decatndpmvpredapm", "scaling_fn": "tanh", "matrix_decomposition": "LUS", "learn_alignments": true, "use_speaker_emb_for_alignment": false, "attn_straight_through_estimator": true, "use_context_lstm": true, "context_lstm_norm": "spectral", "context_lstm_w_f0_and_energy": true, "text_encoder_lstm_norm": "spectral", "n_f0_dims": 1, "n_energy_avg_dims": 1, "use_first_order_features": false, "unvoiced_bias_activation": "relu", "decoder_use_partial_padding": true, "decoder_use_unvoiced_bias": true, "ap_pred_log_f0": true, "ap_use_unvoiced_bias": false, "ap_use_voiced_embeddings": true, "dur_model_config": { "name": "dap", "hparams": { "n_speaker_dim": 16, "bottleneck_hparams": { "in_dim": 512, "reduction_factor": 16, "norm": "weightnorm", "non_linearity": "relu" }, "take_log_of_input": true, "arch_hparams": { "out_dim": 1, "n_layers": 2, "n_channels": 256, "kernel_size": 3, "p_dropout": 0.25, "in_dim": 48 } } }, "f0_model_config": { "name": "dap", "hparams": { "n_speaker_dim": 16, "bottleneck_hparams": { "in_dim": 512, "reduction_factor": 16, "norm": "weightnorm", "non_linearity": "relu" }, "take_log_of_input": false, "use_transformer": false, "arch_hparams": { "out_dim": 1, "n_layers": 2, "n_channels": 256, "kernel_size": 11, "p_dropout": 0.5, "in_dim": 48 } } }, "energy_model_config": { "name": "dap", "hparams": { "n_speaker_dim": 16, "bottleneck_hparams": { "in_dim": 512, "reduction_factor": 16, "norm": "weightnorm", "non_linearity": "relu" }, "take_log_of_input": false, "use_transformer": false, "arch_hparams": { "out_dim": 1, "n_layers": 2, "n_channels": 256, "kernel_size": 3, "p_dropout": 0.25, "in_dim": 48 } } }, "v_model_config": { "name": "dap", "hparams": { "n_speaker_dim": 16, "take_log_of_input": false, "bottleneck_hparams": { "in_dim": 512, "reduction_factor": 16, "norm": "weightnorm", "non_linearity": "relu" }, "arch_hparams": { "out_dim": 1, "n_layers": 2, "n_channels": 256, "kernel_size": 3, "p_dropout": 0.5, "lstm_type": "", "use_linear": 1, "in_dim": 48 } } } } }