File size: 2,024 Bytes
c44455c
 
3e29fa0
c44455c
3e29fa0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0fab2df
d7a3a95
3e29fa0
 
 
 
 
c44455c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
{
  "architectures": [
    "FastSpeech2ConformerModel"
  ],
  "decoder_attention_dropout_rate": 0.2,
  "decoder_concat_after": false,
  "decoder_dropout_rate": 0.2,
  "decoder_kernel_size": 31,
  "decoder_layers": 4,
  "decoder_linear_units": 1536,
  "decoder_normalize_before": false,
  "decoder_num_attention_heads": 2,
  "decoder_positional_dropout_rate": 0.2,
  "duration_predictor_channels": 256,
  "duration_predictor_dropout_rate": 0.2,
  "duration_predictor_kernel_size": 3,
  "duration_predictor_layers": 2,
  "encoder_attention_dropout_rate": 0.2,
  "encoder_concat_after": false,
  "encoder_dropout_rate": 0.2,
  "encoder_kernel_size": 7,
  "encoder_layers": 4,
  "encoder_linear_units": 1536,
  "encoder_normalize_before": false,
  "encoder_num_attention_heads": 2,
  "encoder_positional_dropout_rate": 0.2,
  "energy_embed_dropout": 0.0,
  "energy_embed_kernel_size": 1,
  "energy_predictor_channels": 256,
  "energy_predictor_dropout": 0.5,
  "energy_predictor_kernel_size": 3,
  "energy_predictor_layers": 2,
  "hidden_size": 384,
  "input_dim": 78,
  "is_encoder_decoder": true,
  "max_source_positions": 5000,
  "model_type": "fastspeech2_conformer",
  "num_languages": null,
  "num_mel_bins": 80,
  "num_speakers": null,
  "pitch_embed_dropout": 0.0,
  "pitch_embed_kernel_size": 1,
  "pitch_predictor_channels": 256,
  "pitch_predictor_dropout": 0.5,
  "pitch_predictor_kernel_size": 5,
  "pitch_predictor_layers": 5,
  "positionwise_conv_kernel_size": 3,
  "reduction_factor": 1,
  "speaker_embed_dim": null,
  "speaking_speed": 1.0,
  "speech_decoder_postnet_dropout": 0.5,
  "speech_decoder_postnet_kernel": 5,
  "speech_decoder_postnet_layers": 5,
  "speech_decoder_postnet_units": 256,
  "stop_gradient_from_energy_predictor": false,
  "stop_gradient_from_pitch_predictor": true,
  "torch_dtype": "float32",
  "transformers_version": "4.32.0.dev0",
  "use_cnn_in_conformer": true,
  "use_macaron_style_in_conformer": true,
  "use_masking": true,
  "use_weighted_masking": false,
  "vocab_size": 78
}