Update zipvoice_distill/model.json
Browse files
zipvoice_distill/model.json
CHANGED
|
@@ -1,13 +1,12 @@
|
|
| 1 |
{
|
| 2 |
"model" : {
|
| 3 |
-
"fm_decoder_downsampling_factor" :
|
| 4 |
-
"fm_decoder_num_layers" :
|
| 5 |
-
"fm_decoder_cnn_module_kernel" :
|
| 6 |
"fm_decoder_feedforward_dim" : 1536,
|
| 7 |
"fm_decoder_num_heads" : 4,
|
| 8 |
"fm_decoder_dim" : 512,
|
| 9 |
-
"
|
| 10 |
-
"text_encoder_num_layers" : "4",
|
| 11 |
"text_encoder_feedforward_dim" : 512,
|
| 12 |
"text_encoder_cnn_module_kernel" : 9,
|
| 13 |
"text_encoder_num_heads" : 4,
|
|
@@ -17,12 +16,11 @@
|
|
| 17 |
"pos_head_dim" : 4,
|
| 18 |
"pos_dim" : 48,
|
| 19 |
"time_embed_dim" : 192,
|
| 20 |
-
"text_embed_dim" : 192
|
|
|
|
| 21 |
},
|
| 22 |
"feature" : {
|
| 23 |
"sampling_rate": 24000,
|
| 24 |
-
"
|
| 25 |
-
"n_fft" : 1024,
|
| 26 |
-
"hop_length" : 256
|
| 27 |
}
|
| 28 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"model" : {
|
| 3 |
+
"fm_decoder_downsampling_factor" : [1,2,4,2,1],
|
| 4 |
+
"fm_decoder_num_layers" : [2,2,4,4,4],
|
| 5 |
+
"fm_decoder_cnn_module_kernel" : [31,15,7,15,31],
|
| 6 |
"fm_decoder_feedforward_dim" : 1536,
|
| 7 |
"fm_decoder_num_heads" : 4,
|
| 8 |
"fm_decoder_dim" : 512,
|
| 9 |
+
"text_encoder_num_layers" : 4,
|
|
|
|
| 10 |
"text_encoder_feedforward_dim" : 512,
|
| 11 |
"text_encoder_cnn_module_kernel" : 9,
|
| 12 |
"text_encoder_num_heads" : 4,
|
|
|
|
| 16 |
"pos_head_dim" : 4,
|
| 17 |
"pos_dim" : 48,
|
| 18 |
"time_embed_dim" : 192,
|
| 19 |
+
"text_embed_dim" : 192,
|
| 20 |
+
"feat_dim": 100
|
| 21 |
},
|
| 22 |
"feature" : {
|
| 23 |
"sampling_rate": 24000,
|
| 24 |
+
"type": "vocos"
|
|
|
|
|
|
|
| 25 |
}
|
| 26 |
}
|