defaults: | |
- common | |
- config | |
data: | |
use_lang_emb: true | |
num_langs: 11 | |
lang_dim: 192 # same size as hidden_channels to facilitate the addition | |
lang2id: | |
chinese: 0 | |
dutch: 1 | |
english: 2 | |
french: 3 | |
german: 4 | |
italian: 5 | |
japanese: 6 | |
other: 7 | |
polish: 8 | |
portuguese: 9 | |
spanish: 10 | |
use_spk_emb: false | |
spk_embeddings_dir: null # compute on forward (model) | |
spk_encoder_type: null # compute on forward (model) | ECAPA2SpeakerEncoder16k | |
content_encoder_type: null # compute on forward (model) | hubert | |
content_encoder_ckpt: null # compute on forward (model) | lengyue233/content-vec-best | |
model: | |
use_spk_emb: true | |
spk_encoder_type: ECAPA2SpeakerEncoder16k | |
spk_encoder_ckpt: null # Not used for ECAPA2SpeakerEncoder16k | |
content_encoder_type: spin # hubert | wavlm | spin | |
content_encoder_config: models/spin/spin.yaml # path to the config file for the content encoder | |
content_encoder_ckpt: models/spin/spin.ckpt # or models/wavlm/WavLM-Large.pt | |