khanusa commited on
Commit
efc2070
·
verified ·
1 Parent(s): ee09fe7

Create config.yaml

Browse files
Files changed (1) hide show
  1. config.yaml +71 -0
config.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_type: spark-tts
2
+ architectures:
3
+ - SparkTTSModel
4
+ auto_map:
5
+ AutoConfig: configuration_spark_tts.SparkTTSConfig
6
+ AutoModel: modeling_spark_tts.SparkTTSModel
7
+ AutoProcessor: processing_spark_tts.SparkTTSProcessor
8
+ processor_class: processing_spark_tts.SparkTTSProcessor
9
+ llm_model_name_or_path: ./LLM
10
+ bicodec_model_name_or_path: ./BiCodec
11
+ wav2vec2_model_name_or_path: ./wav2vec2-large-xlsr-53
12
+ sample_rate: 16000
13
+ highpass_cutoff_freq: 40
14
+ latent_hop_length: 320
15
+ ref_segment_duration: 6.0
16
+ volume_normalize: true
17
+ torch_dtype: bfloat16
18
+ transformers_version: "4.50.3"
19
+ _commit_hash: null
20
+ bicodec_config:
21
+ mel_params:
22
+ sample_rate: 16000
23
+ n_fft: 1024
24
+ win_length: 640
25
+ hop_length: 320
26
+ mel_fmin: 10
27
+ mel_fmax: null
28
+ num_mels: 128
29
+ encoder_config:
30
+ input_channels: 1024
31
+ vocos_dim: 384
32
+ vocos_intermediate_dim: 2048
33
+ vocos_num_layers: 12
34
+ out_channels: 1024
35
+ sample_ratios: [1, 1]
36
+ decoder_config:
37
+ input_channel: 1024
38
+ channels: 1536
39
+ rates: [8, 5, 4, 2]
40
+ kernel_sizes: [16, 11, 8, 4]
41
+ quantizer_config:
42
+ input_dim: 1024
43
+ codebook_size: 8192
44
+ codebook_dim: 8
45
+ commitment: 0.25
46
+ codebook_loss_weight: 2.0
47
+ decay: 0.99
48
+ threshold_ema_dead_code: 0.2
49
+ speaker_encoder_config:
50
+ input_dim: 128
51
+ out_dim: 1024
52
+ latent_dim: 128
53
+ token_num: 32
54
+ fsq_levels: [4, 4, 4, 4, 4, 4]
55
+ fsq_num_quantizers: 1
56
+ prenet_config:
57
+ input_channels: 1024
58
+ vocos_dim: 384
59
+ vocos_intermediate_dim: 2048
60
+ vocos_num_layers: 12
61
+ out_channels: 1024
62
+ condition_dim: 1024
63
+ sample_ratios: [1, 1]
64
+ use_tanh_at_final: false
65
+ postnet_config:
66
+ input_channels: 1024
67
+ vocos_dim: 384
68
+ vocos_intermediate_dim: 2048
69
+ vocos_num_layers: 6
70
+ out_channels: 1024
71
+ use_tanh_at_final: false