Upload 70 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- AgnesTachyon/G_latest.pth +3 -0
- AgnesTachyon/config.json +145 -0
- AirGroove/G_latest.pth +3 -0
- AirGroove/config.json +145 -0
- AirShakur/G_latest.pth +3 -0
- AirShakur/config.json +145 -0
- BikoPegasus/G_latest.pth +3 -0
- BikoPegasus/config.json +145 -0
- BiwaHayahide/G_latest.pth +3 -0
- BiwaHayahide/config.json +145 -0
- ChairwomanAkikawa/G_latest.pth +3 -0
- ChairwomanAkikawa/config.json +145 -0
- ElCondorPasa/G_latest.pth +3 -0
- ElCondorPasa/config.json +145 -0
- GoldShip/G_latest.pth +3 -0
- GoldShip/config.json +145 -0
- HayakawaTazuna/G_latest.pth +3 -0
- HayakawaTazuna/config.json +145 -0
- HishiAkebono/G_latest.pth +3 -0
- HishiAkebono/config.json +145 -0
- IkunoDictus/G_latest.pth +3 -0
- IkunoDictus/config.json +145 -0
- JunglePocket/G_latest.pth +3 -0
- JunglePocket/config.json +145 -0
- KatsuragiAce/G_latest.pth +3 -0
- KatsuragiAce/config.json +145 -0
- KawakamiPrincess/G_latest.pth +3 -0
- KawakamiPrincess/config.json +145 -0
- KingHalo/G_latest.pth +3 -0
- KingHalo/config.json +145 -0
- MarvelousSunday/G_latest.pth +3 -0
- MarvelousSunday/config.json +145 -0
- MejiroPalmer/G_latest.pth +3 -0
- MejiroPalmer/config.json +145 -0
- MejiroRamonu/G_latest.pth +3 -0
- MejiroRamonu/config.json +145 -0
- MejiroRyan/G_latest.pth +3 -0
- MejiroRyan/config.json +145 -0
- NakayamaFesta/G_latest.pth +3 -0
- NakayamaFesta/config.json +145 -0
- NaritaBrian/G_latest.pth +3 -0
- NaritaBrian/config.json +145 -0
- NaritaTopRoad/G_latest.pth +3 -0
- NaritaTopRoad/config.json +145 -0
- SakuraBakushinO/G_latest.pth +3 -0
- SakuraBakushinO/config.json +145 -0
- SakuraLaurel/G_latest.pth +3 -0
- SakuraLaurel/config.json +145 -0
- SeekingthePearl/G_latest.pth +3 -0
- SeekingthePearl/config.json +145 -0
AgnesTachyon/G_latest.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0219844d65c9e926f617bcde04d8a2b84f655925f2e44a32c345b885cdfbc5af
|
3 |
+
size 158897385
|
AgnesTachyon/config.json
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 1000,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 10000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 32,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "final_annotation_train.txt",
|
24 |
+
"validation_files": "final_annotation_val.txt",
|
25 |
+
"text_cleaners": [
|
26 |
+
"zh_ja_mixture_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 22050,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 1,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false,
|
86 |
+
"gin_channels": 256
|
87 |
+
},
|
88 |
+
"speakers": {
|
89 |
+
"AgnesTachyon": 0
|
90 |
+
},
|
91 |
+
"symbols": [
|
92 |
+
"_",
|
93 |
+
",",
|
94 |
+
".",
|
95 |
+
"!",
|
96 |
+
"?",
|
97 |
+
"-",
|
98 |
+
"~",
|
99 |
+
"\u2026",
|
100 |
+
"A",
|
101 |
+
"E",
|
102 |
+
"I",
|
103 |
+
"N",
|
104 |
+
"O",
|
105 |
+
"Q",
|
106 |
+
"U",
|
107 |
+
"a",
|
108 |
+
"b",
|
109 |
+
"d",
|
110 |
+
"e",
|
111 |
+
"f",
|
112 |
+
"g",
|
113 |
+
"h",
|
114 |
+
"i",
|
115 |
+
"j",
|
116 |
+
"k",
|
117 |
+
"l",
|
118 |
+
"m",
|
119 |
+
"n",
|
120 |
+
"o",
|
121 |
+
"p",
|
122 |
+
"r",
|
123 |
+
"s",
|
124 |
+
"t",
|
125 |
+
"u",
|
126 |
+
"v",
|
127 |
+
"w",
|
128 |
+
"y",
|
129 |
+
"z",
|
130 |
+
"\u0283",
|
131 |
+
"\u02a7",
|
132 |
+
"\u02a6",
|
133 |
+
"\u026f",
|
134 |
+
"\u0279",
|
135 |
+
"\u0259",
|
136 |
+
"\u0265",
|
137 |
+
"\u207c",
|
138 |
+
"\u02b0",
|
139 |
+
"`",
|
140 |
+
"\u2192",
|
141 |
+
"\u2193",
|
142 |
+
"\u2191",
|
143 |
+
" "
|
144 |
+
]
|
145 |
+
}
|
AirGroove/G_latest.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e54b446578b578675b2befd1150f0fad014d9de069d6bcd81861414eca702dc4
|
3 |
+
size 158897385
|
AirGroove/config.json
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 1000,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 10000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 32,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "final_annotation_train.txt",
|
24 |
+
"validation_files": "final_annotation_val.txt",
|
25 |
+
"text_cleaners": [
|
26 |
+
"zh_ja_mixture_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 22050,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 1,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false,
|
86 |
+
"gin_channels": 256
|
87 |
+
},
|
88 |
+
"speakers": {
|
89 |
+
"AirGroove": 0
|
90 |
+
},
|
91 |
+
"symbols": [
|
92 |
+
"_",
|
93 |
+
",",
|
94 |
+
".",
|
95 |
+
"!",
|
96 |
+
"?",
|
97 |
+
"-",
|
98 |
+
"~",
|
99 |
+
"\u2026",
|
100 |
+
"A",
|
101 |
+
"E",
|
102 |
+
"I",
|
103 |
+
"N",
|
104 |
+
"O",
|
105 |
+
"Q",
|
106 |
+
"U",
|
107 |
+
"a",
|
108 |
+
"b",
|
109 |
+
"d",
|
110 |
+
"e",
|
111 |
+
"f",
|
112 |
+
"g",
|
113 |
+
"h",
|
114 |
+
"i",
|
115 |
+
"j",
|
116 |
+
"k",
|
117 |
+
"l",
|
118 |
+
"m",
|
119 |
+
"n",
|
120 |
+
"o",
|
121 |
+
"p",
|
122 |
+
"r",
|
123 |
+
"s",
|
124 |
+
"t",
|
125 |
+
"u",
|
126 |
+
"v",
|
127 |
+
"w",
|
128 |
+
"y",
|
129 |
+
"z",
|
130 |
+
"\u0283",
|
131 |
+
"\u02a7",
|
132 |
+
"\u02a6",
|
133 |
+
"\u026f",
|
134 |
+
"\u0279",
|
135 |
+
"\u0259",
|
136 |
+
"\u0265",
|
137 |
+
"\u207c",
|
138 |
+
"\u02b0",
|
139 |
+
"`",
|
140 |
+
"\u2192",
|
141 |
+
"\u2193",
|
142 |
+
"\u2191",
|
143 |
+
" "
|
144 |
+
]
|
145 |
+
}
|
AirShakur/G_latest.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d4cd7436d354382c358bd7eb0276fe51d1f4bd011b7c344b0121dc287aa0909
|
3 |
+
size 158897385
|
AirShakur/config.json
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 1000,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 10000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 32,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "final_annotation_train.txt",
|
24 |
+
"validation_files": "final_annotation_val.txt",
|
25 |
+
"text_cleaners": [
|
26 |
+
"zh_ja_mixture_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 22050,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 1,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false,
|
86 |
+
"gin_channels": 256
|
87 |
+
},
|
88 |
+
"speakers": {
|
89 |
+
"AirShakur": 0
|
90 |
+
},
|
91 |
+
"symbols": [
|
92 |
+
"_",
|
93 |
+
",",
|
94 |
+
".",
|
95 |
+
"!",
|
96 |
+
"?",
|
97 |
+
"-",
|
98 |
+
"~",
|
99 |
+
"\u2026",
|
100 |
+
"A",
|
101 |
+
"E",
|
102 |
+
"I",
|
103 |
+
"N",
|
104 |
+
"O",
|
105 |
+
"Q",
|
106 |
+
"U",
|
107 |
+
"a",
|
108 |
+
"b",
|
109 |
+
"d",
|
110 |
+
"e",
|
111 |
+
"f",
|
112 |
+
"g",
|
113 |
+
"h",
|
114 |
+
"i",
|
115 |
+
"j",
|
116 |
+
"k",
|
117 |
+
"l",
|
118 |
+
"m",
|
119 |
+
"n",
|
120 |
+
"o",
|
121 |
+
"p",
|
122 |
+
"r",
|
123 |
+
"s",
|
124 |
+
"t",
|
125 |
+
"u",
|
126 |
+
"v",
|
127 |
+
"w",
|
128 |
+
"y",
|
129 |
+
"z",
|
130 |
+
"\u0283",
|
131 |
+
"\u02a7",
|
132 |
+
"\u02a6",
|
133 |
+
"\u026f",
|
134 |
+
"\u0279",
|
135 |
+
"\u0259",
|
136 |
+
"\u0265",
|
137 |
+
"\u207c",
|
138 |
+
"\u02b0",
|
139 |
+
"`",
|
140 |
+
"\u2192",
|
141 |
+
"\u2193",
|
142 |
+
"\u2191",
|
143 |
+
" "
|
144 |
+
]
|
145 |
+
}
|
BikoPegasus/G_latest.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5dd100459f2754cef2ef371f78f791d0f7bc38589c15d032fffc19259fb0d4d9
|
3 |
+
size 158897385
|
BikoPegasus/config.json
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 1000,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 10000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 32,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "final_annotation_train.txt",
|
24 |
+
"validation_files": "final_annotation_val.txt",
|
25 |
+
"text_cleaners": [
|
26 |
+
"zh_ja_mixture_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 22050,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 1,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false,
|
86 |
+
"gin_channels": 256
|
87 |
+
},
|
88 |
+
"speakers": {
|
89 |
+
"BikoPegasus": 0
|
90 |
+
},
|
91 |
+
"symbols": [
|
92 |
+
"_",
|
93 |
+
",",
|
94 |
+
".",
|
95 |
+
"!",
|
96 |
+
"?",
|
97 |
+
"-",
|
98 |
+
"~",
|
99 |
+
"\u2026",
|
100 |
+
"A",
|
101 |
+
"E",
|
102 |
+
"I",
|
103 |
+
"N",
|
104 |
+
"O",
|
105 |
+
"Q",
|
106 |
+
"U",
|
107 |
+
"a",
|
108 |
+
"b",
|
109 |
+
"d",
|
110 |
+
"e",
|
111 |
+
"f",
|
112 |
+
"g",
|
113 |
+
"h",
|
114 |
+
"i",
|
115 |
+
"j",
|
116 |
+
"k",
|
117 |
+
"l",
|
118 |
+
"m",
|
119 |
+
"n",
|
120 |
+
"o",
|
121 |
+
"p",
|
122 |
+
"r",
|
123 |
+
"s",
|
124 |
+
"t",
|
125 |
+
"u",
|
126 |
+
"v",
|
127 |
+
"w",
|
128 |
+
"y",
|
129 |
+
"z",
|
130 |
+
"\u0283",
|
131 |
+
"\u02a7",
|
132 |
+
"\u02a6",
|
133 |
+
"\u026f",
|
134 |
+
"\u0279",
|
135 |
+
"\u0259",
|
136 |
+
"\u0265",
|
137 |
+
"\u207c",
|
138 |
+
"\u02b0",
|
139 |
+
"`",
|
140 |
+
"\u2192",
|
141 |
+
"\u2193",
|
142 |
+
"\u2191",
|
143 |
+
" "
|
144 |
+
]
|
145 |
+
}
|
BiwaHayahide/G_latest.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd14a4674518cdb562921edf8e7c549b0f477d7786ca884fd9a9465dee038f3f
|
3 |
+
size 158897385
|
BiwaHayahide/config.json
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 1000,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 10000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 32,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "final_annotation_train.txt",
|
24 |
+
"validation_files": "final_annotation_val.txt",
|
25 |
+
"text_cleaners": [
|
26 |
+
"zh_ja_mixture_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 22050,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 1,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false,
|
86 |
+
"gin_channels": 256
|
87 |
+
},
|
88 |
+
"speakers": {
|
89 |
+
"BiwaHayahide": 0
|
90 |
+
},
|
91 |
+
"symbols": [
|
92 |
+
"_",
|
93 |
+
",",
|
94 |
+
".",
|
95 |
+
"!",
|
96 |
+
"?",
|
97 |
+
"-",
|
98 |
+
"~",
|
99 |
+
"\u2026",
|
100 |
+
"A",
|
101 |
+
"E",
|
102 |
+
"I",
|
103 |
+
"N",
|
104 |
+
"O",
|
105 |
+
"Q",
|
106 |
+
"U",
|
107 |
+
"a",
|
108 |
+
"b",
|
109 |
+
"d",
|
110 |
+
"e",
|
111 |
+
"f",
|
112 |
+
"g",
|
113 |
+
"h",
|
114 |
+
"i",
|
115 |
+
"j",
|
116 |
+
"k",
|
117 |
+
"l",
|
118 |
+
"m",
|
119 |
+
"n",
|
120 |
+
"o",
|
121 |
+
"p",
|
122 |
+
"r",
|
123 |
+
"s",
|
124 |
+
"t",
|
125 |
+
"u",
|
126 |
+
"v",
|
127 |
+
"w",
|
128 |
+
"y",
|
129 |
+
"z",
|
130 |
+
"\u0283",
|
131 |
+
"\u02a7",
|
132 |
+
"\u02a6",
|
133 |
+
"\u026f",
|
134 |
+
"\u0279",
|
135 |
+
"\u0259",
|
136 |
+
"\u0265",
|
137 |
+
"\u207c",
|
138 |
+
"\u02b0",
|
139 |
+
"`",
|
140 |
+
"\u2192",
|
141 |
+
"\u2193",
|
142 |
+
"\u2191",
|
143 |
+
" "
|
144 |
+
]
|
145 |
+
}
|
ChairwomanAkikawa/G_latest.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba3f15e289faa036a63ef96441a13d69c0486b4cb8c474535816103a72f38393
|
3 |
+
size 158897385
|
ChairwomanAkikawa/config.json
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 1000,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 10000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 32,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "final_annotation_train.txt",
|
24 |
+
"validation_files": "final_annotation_val.txt",
|
25 |
+
"text_cleaners": [
|
26 |
+
"zh_ja_mixture_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 22050,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 1,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false,
|
86 |
+
"gin_channels": 256
|
87 |
+
},
|
88 |
+
"speakers": {
|
89 |
+
"ChairwomanAkikawa": 0
|
90 |
+
},
|
91 |
+
"symbols": [
|
92 |
+
"_",
|
93 |
+
",",
|
94 |
+
".",
|
95 |
+
"!",
|
96 |
+
"?",
|
97 |
+
"-",
|
98 |
+
"~",
|
99 |
+
"\u2026",
|
100 |
+
"A",
|
101 |
+
"E",
|
102 |
+
"I",
|
103 |
+
"N",
|
104 |
+
"O",
|
105 |
+
"Q",
|
106 |
+
"U",
|
107 |
+
"a",
|
108 |
+
"b",
|
109 |
+
"d",
|
110 |
+
"e",
|
111 |
+
"f",
|
112 |
+
"g",
|
113 |
+
"h",
|
114 |
+
"i",
|
115 |
+
"j",
|
116 |
+
"k",
|
117 |
+
"l",
|
118 |
+
"m",
|
119 |
+
"n",
|
120 |
+
"o",
|
121 |
+
"p",
|
122 |
+
"r",
|
123 |
+
"s",
|
124 |
+
"t",
|
125 |
+
"u",
|
126 |
+
"v",
|
127 |
+
"w",
|
128 |
+
"y",
|
129 |
+
"z",
|
130 |
+
"\u0283",
|
131 |
+
"\u02a7",
|
132 |
+
"\u02a6",
|
133 |
+
"\u026f",
|
134 |
+
"\u0279",
|
135 |
+
"\u0259",
|
136 |
+
"\u0265",
|
137 |
+
"\u207c",
|
138 |
+
"\u02b0",
|
139 |
+
"`",
|
140 |
+
"\u2192",
|
141 |
+
"\u2193",
|
142 |
+
"\u2191",
|
143 |
+
" "
|
144 |
+
]
|
145 |
+
}
|
ElCondorPasa/G_latest.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:35986f4c88593bd357a0632a71b749e182695a397733c6120d3a9fa444067a4c
|
3 |
+
size 158897385
|
ElCondorPasa/config.json
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 1000,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 10000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 32,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "final_annotation_train.txt",
|
24 |
+
"validation_files": "final_annotation_val.txt",
|
25 |
+
"text_cleaners": [
|
26 |
+
"zh_ja_mixture_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 22050,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 1,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false,
|
86 |
+
"gin_channels": 256
|
87 |
+
},
|
88 |
+
"speakers": {
|
89 |
+
"ElCondorPasa": 0
|
90 |
+
},
|
91 |
+
"symbols": [
|
92 |
+
"_",
|
93 |
+
",",
|
94 |
+
".",
|
95 |
+
"!",
|
96 |
+
"?",
|
97 |
+
"-",
|
98 |
+
"~",
|
99 |
+
"\u2026",
|
100 |
+
"A",
|
101 |
+
"E",
|
102 |
+
"I",
|
103 |
+
"N",
|
104 |
+
"O",
|
105 |
+
"Q",
|
106 |
+
"U",
|
107 |
+
"a",
|
108 |
+
"b",
|
109 |
+
"d",
|
110 |
+
"e",
|
111 |
+
"f",
|
112 |
+
"g",
|
113 |
+
"h",
|
114 |
+
"i",
|
115 |
+
"j",
|
116 |
+
"k",
|
117 |
+
"l",
|
118 |
+
"m",
|
119 |
+
"n",
|
120 |
+
"o",
|
121 |
+
"p",
|
122 |
+
"r",
|
123 |
+
"s",
|
124 |
+
"t",
|
125 |
+
"u",
|
126 |
+
"v",
|
127 |
+
"w",
|
128 |
+
"y",
|
129 |
+
"z",
|
130 |
+
"\u0283",
|
131 |
+
"\u02a7",
|
132 |
+
"\u02a6",
|
133 |
+
"\u026f",
|
134 |
+
"\u0279",
|
135 |
+
"\u0259",
|
136 |
+
"\u0265",
|
137 |
+
"\u207c",
|
138 |
+
"\u02b0",
|
139 |
+
"`",
|
140 |
+
"\u2192",
|
141 |
+
"\u2193",
|
142 |
+
"\u2191",
|
143 |
+
" "
|
144 |
+
]
|
145 |
+
}
|
GoldShip/G_latest.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d644ec4e2488ef63d3a047e6c43b9bc88abf138f0b45688c8a30125202350ef8
|
3 |
+
size 158897385
|
GoldShip/config.json
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 1000,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 10000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 32,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "final_annotation_train.txt",
|
24 |
+
"validation_files": "final_annotation_val.txt",
|
25 |
+
"text_cleaners": [
|
26 |
+
"zh_ja_mixture_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 22050,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 1,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false,
|
86 |
+
"gin_channels": 256
|
87 |
+
},
|
88 |
+
"speakers": {
|
89 |
+
"GoldShip": 0
|
90 |
+
},
|
91 |
+
"symbols": [
|
92 |
+
"_",
|
93 |
+
",",
|
94 |
+
".",
|
95 |
+
"!",
|
96 |
+
"?",
|
97 |
+
"-",
|
98 |
+
"~",
|
99 |
+
"\u2026",
|
100 |
+
"A",
|
101 |
+
"E",
|
102 |
+
"I",
|
103 |
+
"N",
|
104 |
+
"O",
|
105 |
+
"Q",
|
106 |
+
"U",
|
107 |
+
"a",
|
108 |
+
"b",
|
109 |
+
"d",
|
110 |
+
"e",
|
111 |
+
"f",
|
112 |
+
"g",
|
113 |
+
"h",
|
114 |
+
"i",
|
115 |
+
"j",
|
116 |
+
"k",
|
117 |
+
"l",
|
118 |
+
"m",
|
119 |
+
"n",
|
120 |
+
"o",
|
121 |
+
"p",
|
122 |
+
"r",
|
123 |
+
"s",
|
124 |
+
"t",
|
125 |
+
"u",
|
126 |
+
"v",
|
127 |
+
"w",
|
128 |
+
"y",
|
129 |
+
"z",
|
130 |
+
"\u0283",
|
131 |
+
"\u02a7",
|
132 |
+
"\u02a6",
|
133 |
+
"\u026f",
|
134 |
+
"\u0279",
|
135 |
+
"\u0259",
|
136 |
+
"\u0265",
|
137 |
+
"\u207c",
|
138 |
+
"\u02b0",
|
139 |
+
"`",
|
140 |
+
"\u2192",
|
141 |
+
"\u2193",
|
142 |
+
"\u2191",
|
143 |
+
" "
|
144 |
+
]
|
145 |
+
}
|
HayakawaTazuna/G_latest.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51e447643fab5caa1eac56fd572b51bdfc589d82066bb778ad0b5693c2d3ea56
|
3 |
+
size 158897385
|
HayakawaTazuna/config.json
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 1000,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 10000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 32,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "final_annotation_train.txt",
|
24 |
+
"validation_files": "final_annotation_val.txt",
|
25 |
+
"text_cleaners": [
|
26 |
+
"zh_ja_mixture_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 22050,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 1,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false,
|
86 |
+
"gin_channels": 256
|
87 |
+
},
|
88 |
+
"speakers": {
|
89 |
+
"HayakawaTazuna": 0
|
90 |
+
},
|
91 |
+
"symbols": [
|
92 |
+
"_",
|
93 |
+
",",
|
94 |
+
".",
|
95 |
+
"!",
|
96 |
+
"?",
|
97 |
+
"-",
|
98 |
+
"~",
|
99 |
+
"\u2026",
|
100 |
+
"A",
|
101 |
+
"E",
|
102 |
+
"I",
|
103 |
+
"N",
|
104 |
+
"O",
|
105 |
+
"Q",
|
106 |
+
"U",
|
107 |
+
"a",
|
108 |
+
"b",
|
109 |
+
"d",
|
110 |
+
"e",
|
111 |
+
"f",
|
112 |
+
"g",
|
113 |
+
"h",
|
114 |
+
"i",
|
115 |
+
"j",
|
116 |
+
"k",
|
117 |
+
"l",
|
118 |
+
"m",
|
119 |
+
"n",
|
120 |
+
"o",
|
121 |
+
"p",
|
122 |
+
"r",
|
123 |
+
"s",
|
124 |
+
"t",
|
125 |
+
"u",
|
126 |
+
"v",
|
127 |
+
"w",
|
128 |
+
"y",
|
129 |
+
"z",
|
130 |
+
"\u0283",
|
131 |
+
"\u02a7",
|
132 |
+
"\u02a6",
|
133 |
+
"\u026f",
|
134 |
+
"\u0279",
|
135 |
+
"\u0259",
|
136 |
+
"\u0265",
|
137 |
+
"\u207c",
|
138 |
+
"\u02b0",
|
139 |
+
"`",
|
140 |
+
"\u2192",
|
141 |
+
"\u2193",
|
142 |
+
"\u2191",
|
143 |
+
" "
|
144 |
+
]
|
145 |
+
}
|
HishiAkebono/G_latest.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8b9403e16d0a64893dcc7b07965f3911823a21c599c9e9be76129ccc61b4047b
|
3 |
+
size 158897385
|
HishiAkebono/config.json
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 1000,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 10000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 32,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "final_annotation_train.txt",
|
24 |
+
"validation_files": "final_annotation_val.txt",
|
25 |
+
"text_cleaners": [
|
26 |
+
"zh_ja_mixture_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 22050,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 1,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false,
|
86 |
+
"gin_channels": 256
|
87 |
+
},
|
88 |
+
"speakers": {
|
89 |
+
"HishiAkebono": 0
|
90 |
+
},
|
91 |
+
"symbols": [
|
92 |
+
"_",
|
93 |
+
",",
|
94 |
+
".",
|
95 |
+
"!",
|
96 |
+
"?",
|
97 |
+
"-",
|
98 |
+
"~",
|
99 |
+
"\u2026",
|
100 |
+
"A",
|
101 |
+
"E",
|
102 |
+
"I",
|
103 |
+
"N",
|
104 |
+
"O",
|
105 |
+
"Q",
|
106 |
+
"U",
|
107 |
+
"a",
|
108 |
+
"b",
|
109 |
+
"d",
|
110 |
+
"e",
|
111 |
+
"f",
|
112 |
+
"g",
|
113 |
+
"h",
|
114 |
+
"i",
|
115 |
+
"j",
|
116 |
+
"k",
|
117 |
+
"l",
|
118 |
+
"m",
|
119 |
+
"n",
|
120 |
+
"o",
|
121 |
+
"p",
|
122 |
+
"r",
|
123 |
+
"s",
|
124 |
+
"t",
|
125 |
+
"u",
|
126 |
+
"v",
|
127 |
+
"w",
|
128 |
+
"y",
|
129 |
+
"z",
|
130 |
+
"\u0283",
|
131 |
+
"\u02a7",
|
132 |
+
"\u02a6",
|
133 |
+
"\u026f",
|
134 |
+
"\u0279",
|
135 |
+
"\u0259",
|
136 |
+
"\u0265",
|
137 |
+
"\u207c",
|
138 |
+
"\u02b0",
|
139 |
+
"`",
|
140 |
+
"\u2192",
|
141 |
+
"\u2193",
|
142 |
+
"\u2191",
|
143 |
+
" "
|
144 |
+
]
|
145 |
+
}
|
IkunoDictus/G_latest.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e6b754a28a9de3aba60ebf3f7d6a8adddda3454a302a283bf58e5f2e768854d
|
3 |
+
size 158897385
|
IkunoDictus/config.json
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 1000,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 10000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 32,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "final_annotation_train.txt",
|
24 |
+
"validation_files": "final_annotation_val.txt",
|
25 |
+
"text_cleaners": [
|
26 |
+
"zh_ja_mixture_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 22050,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 1,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false,
|
86 |
+
"gin_channels": 256
|
87 |
+
},
|
88 |
+
"speakers": {
|
89 |
+
"IkunoDictus": 0
|
90 |
+
},
|
91 |
+
"symbols": [
|
92 |
+
"_",
|
93 |
+
",",
|
94 |
+
".",
|
95 |
+
"!",
|
96 |
+
"?",
|
97 |
+
"-",
|
98 |
+
"~",
|
99 |
+
"\u2026",
|
100 |
+
"A",
|
101 |
+
"E",
|
102 |
+
"I",
|
103 |
+
"N",
|
104 |
+
"O",
|
105 |
+
"Q",
|
106 |
+
"U",
|
107 |
+
"a",
|
108 |
+
"b",
|
109 |
+
"d",
|
110 |
+
"e",
|
111 |
+
"f",
|
112 |
+
"g",
|
113 |
+
"h",
|
114 |
+
"i",
|
115 |
+
"j",
|
116 |
+
"k",
|
117 |
+
"l",
|
118 |
+
"m",
|
119 |
+
"n",
|
120 |
+
"o",
|
121 |
+
"p",
|
122 |
+
"r",
|
123 |
+
"s",
|
124 |
+
"t",
|
125 |
+
"u",
|
126 |
+
"v",
|
127 |
+
"w",
|
128 |
+
"y",
|
129 |
+
"z",
|
130 |
+
"\u0283",
|
131 |
+
"\u02a7",
|
132 |
+
"\u02a6",
|
133 |
+
"\u026f",
|
134 |
+
"\u0279",
|
135 |
+
"\u0259",
|
136 |
+
"\u0265",
|
137 |
+
"\u207c",
|
138 |
+
"\u02b0",
|
139 |
+
"`",
|
140 |
+
"\u2192",
|
141 |
+
"\u2193",
|
142 |
+
"\u2191",
|
143 |
+
" "
|
144 |
+
]
|
145 |
+
}
|
JunglePocket/G_latest.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e596e943e7fe80d31fce45100f9e1165a1dd3069b4a45a878436b89d0e1d5dba
|
3 |
+
size 158897385
|
JunglePocket/config.json
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 1000,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 10000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 32,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "final_annotation_train.txt",
|
24 |
+
"validation_files": "final_annotation_val.txt",
|
25 |
+
"text_cleaners": [
|
26 |
+
"zh_ja_mixture_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 22050,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 1,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false,
|
86 |
+
"gin_channels": 256
|
87 |
+
},
|
88 |
+
"speakers": {
|
89 |
+
"JunglePocket": 0
|
90 |
+
},
|
91 |
+
"symbols": [
|
92 |
+
"_",
|
93 |
+
",",
|
94 |
+
".",
|
95 |
+
"!",
|
96 |
+
"?",
|
97 |
+
"-",
|
98 |
+
"~",
|
99 |
+
"\u2026",
|
100 |
+
"A",
|
101 |
+
"E",
|
102 |
+
"I",
|
103 |
+
"N",
|
104 |
+
"O",
|
105 |
+
"Q",
|
106 |
+
"U",
|
107 |
+
"a",
|
108 |
+
"b",
|
109 |
+
"d",
|
110 |
+
"e",
|
111 |
+
"f",
|
112 |
+
"g",
|
113 |
+
"h",
|
114 |
+
"i",
|
115 |
+
"j",
|
116 |
+
"k",
|
117 |
+
"l",
|
118 |
+
"m",
|
119 |
+
"n",
|
120 |
+
"o",
|
121 |
+
"p",
|
122 |
+
"r",
|
123 |
+
"s",
|
124 |
+
"t",
|
125 |
+
"u",
|
126 |
+
"v",
|
127 |
+
"w",
|
128 |
+
"y",
|
129 |
+
"z",
|
130 |
+
"\u0283",
|
131 |
+
"\u02a7",
|
132 |
+
"\u02a6",
|
133 |
+
"\u026f",
|
134 |
+
"\u0279",
|
135 |
+
"\u0259",
|
136 |
+
"\u0265",
|
137 |
+
"\u207c",
|
138 |
+
"\u02b0",
|
139 |
+
"`",
|
140 |
+
"\u2192",
|
141 |
+
"\u2193",
|
142 |
+
"\u2191",
|
143 |
+
" "
|
144 |
+
]
|
145 |
+
}
|
KatsuragiAce/G_latest.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d5a73476f8ab0212fa7af53cc7f29290cd3aa7c67e6aa9527a95e1cd11bafd6
|
3 |
+
size 158897385
|
KatsuragiAce/config.json
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 1000,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 10000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 32,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "final_annotation_train.txt",
|
24 |
+
"validation_files": "final_annotation_val.txt",
|
25 |
+
"text_cleaners": [
|
26 |
+
"zh_ja_mixture_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 22050,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 1,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false,
|
86 |
+
"gin_channels": 256
|
87 |
+
},
|
88 |
+
"speakers": {
|
89 |
+
"KatsuragiAce": 0
|
90 |
+
},
|
91 |
+
"symbols": [
|
92 |
+
"_",
|
93 |
+
",",
|
94 |
+
".",
|
95 |
+
"!",
|
96 |
+
"?",
|
97 |
+
"-",
|
98 |
+
"~",
|
99 |
+
"\u2026",
|
100 |
+
"A",
|
101 |
+
"E",
|
102 |
+
"I",
|
103 |
+
"N",
|
104 |
+
"O",
|
105 |
+
"Q",
|
106 |
+
"U",
|
107 |
+
"a",
|
108 |
+
"b",
|
109 |
+
"d",
|
110 |
+
"e",
|
111 |
+
"f",
|
112 |
+
"g",
|
113 |
+
"h",
|
114 |
+
"i",
|
115 |
+
"j",
|
116 |
+
"k",
|
117 |
+
"l",
|
118 |
+
"m",
|
119 |
+
"n",
|
120 |
+
"o",
|
121 |
+
"p",
|
122 |
+
"r",
|
123 |
+
"s",
|
124 |
+
"t",
|
125 |
+
"u",
|
126 |
+
"v",
|
127 |
+
"w",
|
128 |
+
"y",
|
129 |
+
"z",
|
130 |
+
"\u0283",
|
131 |
+
"\u02a7",
|
132 |
+
"\u02a6",
|
133 |
+
"\u026f",
|
134 |
+
"\u0279",
|
135 |
+
"\u0259",
|
136 |
+
"\u0265",
|
137 |
+
"\u207c",
|
138 |
+
"\u02b0",
|
139 |
+
"`",
|
140 |
+
"\u2192",
|
141 |
+
"\u2193",
|
142 |
+
"\u2191",
|
143 |
+
" "
|
144 |
+
]
|
145 |
+
}
|
KawakamiPrincess/G_latest.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c477f4ab40bdde14ce7ab27d474eae0a790fccef5d726e1bf03f2117b949dbec
|
3 |
+
size 158897385
|
KawakamiPrincess/config.json
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 1000,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 10000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 32,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "final_annotation_train.txt",
|
24 |
+
"validation_files": "final_annotation_val.txt",
|
25 |
+
"text_cleaners": [
|
26 |
+
"zh_ja_mixture_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 22050,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 1,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false,
|
86 |
+
"gin_channels": 256
|
87 |
+
},
|
88 |
+
"speakers": {
|
89 |
+
"KawakamiPrincess": 0
|
90 |
+
},
|
91 |
+
"symbols": [
|
92 |
+
"_",
|
93 |
+
",",
|
94 |
+
".",
|
95 |
+
"!",
|
96 |
+
"?",
|
97 |
+
"-",
|
98 |
+
"~",
|
99 |
+
"\u2026",
|
100 |
+
"A",
|
101 |
+
"E",
|
102 |
+
"I",
|
103 |
+
"N",
|
104 |
+
"O",
|
105 |
+
"Q",
|
106 |
+
"U",
|
107 |
+
"a",
|
108 |
+
"b",
|
109 |
+
"d",
|
110 |
+
"e",
|
111 |
+
"f",
|
112 |
+
"g",
|
113 |
+
"h",
|
114 |
+
"i",
|
115 |
+
"j",
|
116 |
+
"k",
|
117 |
+
"l",
|
118 |
+
"m",
|
119 |
+
"n",
|
120 |
+
"o",
|
121 |
+
"p",
|
122 |
+
"r",
|
123 |
+
"s",
|
124 |
+
"t",
|
125 |
+
"u",
|
126 |
+
"v",
|
127 |
+
"w",
|
128 |
+
"y",
|
129 |
+
"z",
|
130 |
+
"\u0283",
|
131 |
+
"\u02a7",
|
132 |
+
"\u02a6",
|
133 |
+
"\u026f",
|
134 |
+
"\u0279",
|
135 |
+
"\u0259",
|
136 |
+
"\u0265",
|
137 |
+
"\u207c",
|
138 |
+
"\u02b0",
|
139 |
+
"`",
|
140 |
+
"\u2192",
|
141 |
+
"\u2193",
|
142 |
+
"\u2191",
|
143 |
+
" "
|
144 |
+
]
|
145 |
+
}
|
KingHalo/G_latest.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec16384396008445eb4e48464df091dad5e6faf780554faaa388361bc64d176b
|
3 |
+
size 158897385
|
KingHalo/config.json
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 1000,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 10000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 32,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "final_annotation_train.txt",
|
24 |
+
"validation_files": "final_annotation_val.txt",
|
25 |
+
"text_cleaners": [
|
26 |
+
"zh_ja_mixture_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 22050,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 1,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false,
|
86 |
+
"gin_channels": 256
|
87 |
+
},
|
88 |
+
"speakers": {
|
89 |
+
"KingHalo": 0
|
90 |
+
},
|
91 |
+
"symbols": [
|
92 |
+
"_",
|
93 |
+
",",
|
94 |
+
".",
|
95 |
+
"!",
|
96 |
+
"?",
|
97 |
+
"-",
|
98 |
+
"~",
|
99 |
+
"\u2026",
|
100 |
+
"A",
|
101 |
+
"E",
|
102 |
+
"I",
|
103 |
+
"N",
|
104 |
+
"O",
|
105 |
+
"Q",
|
106 |
+
"U",
|
107 |
+
"a",
|
108 |
+
"b",
|
109 |
+
"d",
|
110 |
+
"e",
|
111 |
+
"f",
|
112 |
+
"g",
|
113 |
+
"h",
|
114 |
+
"i",
|
115 |
+
"j",
|
116 |
+
"k",
|
117 |
+
"l",
|
118 |
+
"m",
|
119 |
+
"n",
|
120 |
+
"o",
|
121 |
+
"p",
|
122 |
+
"r",
|
123 |
+
"s",
|
124 |
+
"t",
|
125 |
+
"u",
|
126 |
+
"v",
|
127 |
+
"w",
|
128 |
+
"y",
|
129 |
+
"z",
|
130 |
+
"\u0283",
|
131 |
+
"\u02a7",
|
132 |
+
"\u02a6",
|
133 |
+
"\u026f",
|
134 |
+
"\u0279",
|
135 |
+
"\u0259",
|
136 |
+
"\u0265",
|
137 |
+
"\u207c",
|
138 |
+
"\u02b0",
|
139 |
+
"`",
|
140 |
+
"\u2192",
|
141 |
+
"\u2193",
|
142 |
+
"\u2191",
|
143 |
+
" "
|
144 |
+
]
|
145 |
+
}
|
MarvelousSunday/G_latest.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf332b83ca7fdd26391a8249f977f8160194d296ec3184d25a0f9f2a38f83674
|
3 |
+
size 158897385
|
MarvelousSunday/config.json
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 1000,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 10000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 32,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "final_annotation_train.txt",
|
24 |
+
"validation_files": "final_annotation_val.txt",
|
25 |
+
"text_cleaners": [
|
26 |
+
"zh_ja_mixture_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 22050,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 1,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false,
|
86 |
+
"gin_channels": 256
|
87 |
+
},
|
88 |
+
"speakers": {
|
89 |
+
"MarvelousSunday": 0
|
90 |
+
},
|
91 |
+
"symbols": [
|
92 |
+
"_",
|
93 |
+
",",
|
94 |
+
".",
|
95 |
+
"!",
|
96 |
+
"?",
|
97 |
+
"-",
|
98 |
+
"~",
|
99 |
+
"\u2026",
|
100 |
+
"A",
|
101 |
+
"E",
|
102 |
+
"I",
|
103 |
+
"N",
|
104 |
+
"O",
|
105 |
+
"Q",
|
106 |
+
"U",
|
107 |
+
"a",
|
108 |
+
"b",
|
109 |
+
"d",
|
110 |
+
"e",
|
111 |
+
"f",
|
112 |
+
"g",
|
113 |
+
"h",
|
114 |
+
"i",
|
115 |
+
"j",
|
116 |
+
"k",
|
117 |
+
"l",
|
118 |
+
"m",
|
119 |
+
"n",
|
120 |
+
"o",
|
121 |
+
"p",
|
122 |
+
"r",
|
123 |
+
"s",
|
124 |
+
"t",
|
125 |
+
"u",
|
126 |
+
"v",
|
127 |
+
"w",
|
128 |
+
"y",
|
129 |
+
"z",
|
130 |
+
"\u0283",
|
131 |
+
"\u02a7",
|
132 |
+
"\u02a6",
|
133 |
+
"\u026f",
|
134 |
+
"\u0279",
|
135 |
+
"\u0259",
|
136 |
+
"\u0265",
|
137 |
+
"\u207c",
|
138 |
+
"\u02b0",
|
139 |
+
"`",
|
140 |
+
"\u2192",
|
141 |
+
"\u2193",
|
142 |
+
"\u2191",
|
143 |
+
" "
|
144 |
+
]
|
145 |
+
}
|
MejiroPalmer/G_latest.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:844ac0618b5313eb293df2a956add16e0545c70a87d5fcc822d48fcf89131790
|
3 |
+
size 158897385
|
MejiroPalmer/config.json
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 1000,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 10000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 32,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "final_annotation_train.txt",
|
24 |
+
"validation_files": "final_annotation_val.txt",
|
25 |
+
"text_cleaners": [
|
26 |
+
"zh_ja_mixture_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 22050,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 1,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false,
|
86 |
+
"gin_channels": 256
|
87 |
+
},
|
88 |
+
"speakers": {
|
89 |
+
"MejiroPalmer": 0
|
90 |
+
},
|
91 |
+
"symbols": [
|
92 |
+
"_",
|
93 |
+
",",
|
94 |
+
".",
|
95 |
+
"!",
|
96 |
+
"?",
|
97 |
+
"-",
|
98 |
+
"~",
|
99 |
+
"\u2026",
|
100 |
+
"A",
|
101 |
+
"E",
|
102 |
+
"I",
|
103 |
+
"N",
|
104 |
+
"O",
|
105 |
+
"Q",
|
106 |
+
"U",
|
107 |
+
"a",
|
108 |
+
"b",
|
109 |
+
"d",
|
110 |
+
"e",
|
111 |
+
"f",
|
112 |
+
"g",
|
113 |
+
"h",
|
114 |
+
"i",
|
115 |
+
"j",
|
116 |
+
"k",
|
117 |
+
"l",
|
118 |
+
"m",
|
119 |
+
"n",
|
120 |
+
"o",
|
121 |
+
"p",
|
122 |
+
"r",
|
123 |
+
"s",
|
124 |
+
"t",
|
125 |
+
"u",
|
126 |
+
"v",
|
127 |
+
"w",
|
128 |
+
"y",
|
129 |
+
"z",
|
130 |
+
"\u0283",
|
131 |
+
"\u02a7",
|
132 |
+
"\u02a6",
|
133 |
+
"\u026f",
|
134 |
+
"\u0279",
|
135 |
+
"\u0259",
|
136 |
+
"\u0265",
|
137 |
+
"\u207c",
|
138 |
+
"\u02b0",
|
139 |
+
"`",
|
140 |
+
"\u2192",
|
141 |
+
"\u2193",
|
142 |
+
"\u2191",
|
143 |
+
" "
|
144 |
+
]
|
145 |
+
}
|
MejiroRamonu/G_latest.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c53b9849be8f53c7e2522604ed0382e59d915f9c1e903e8ceec7b6bdfd074f2a
|
3 |
+
size 158897385
|
MejiroRamonu/config.json
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 1000,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 10000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 32,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "final_annotation_train.txt",
|
24 |
+
"validation_files": "final_annotation_val.txt",
|
25 |
+
"text_cleaners": [
|
26 |
+
"zh_ja_mixture_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 22050,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 1,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false,
|
86 |
+
"gin_channels": 256
|
87 |
+
},
|
88 |
+
"speakers": {
|
89 |
+
"MejiroRamonu": 0
|
90 |
+
},
|
91 |
+
"symbols": [
|
92 |
+
"_",
|
93 |
+
",",
|
94 |
+
".",
|
95 |
+
"!",
|
96 |
+
"?",
|
97 |
+
"-",
|
98 |
+
"~",
|
99 |
+
"\u2026",
|
100 |
+
"A",
|
101 |
+
"E",
|
102 |
+
"I",
|
103 |
+
"N",
|
104 |
+
"O",
|
105 |
+
"Q",
|
106 |
+
"U",
|
107 |
+
"a",
|
108 |
+
"b",
|
109 |
+
"d",
|
110 |
+
"e",
|
111 |
+
"f",
|
112 |
+
"g",
|
113 |
+
"h",
|
114 |
+
"i",
|
115 |
+
"j",
|
116 |
+
"k",
|
117 |
+
"l",
|
118 |
+
"m",
|
119 |
+
"n",
|
120 |
+
"o",
|
121 |
+
"p",
|
122 |
+
"r",
|
123 |
+
"s",
|
124 |
+
"t",
|
125 |
+
"u",
|
126 |
+
"v",
|
127 |
+
"w",
|
128 |
+
"y",
|
129 |
+
"z",
|
130 |
+
"\u0283",
|
131 |
+
"\u02a7",
|
132 |
+
"\u02a6",
|
133 |
+
"\u026f",
|
134 |
+
"\u0279",
|
135 |
+
"\u0259",
|
136 |
+
"\u0265",
|
137 |
+
"\u207c",
|
138 |
+
"\u02b0",
|
139 |
+
"`",
|
140 |
+
"\u2192",
|
141 |
+
"\u2193",
|
142 |
+
"\u2191",
|
143 |
+
" "
|
144 |
+
]
|
145 |
+
}
|
MejiroRyan/G_latest.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06fde530fc925f33f0045d16a19eb48c5c9c4e574a81a348ccb092c08532aa82
|
3 |
+
size 158897385
|
MejiroRyan/config.json
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 1000,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 10000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 32,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "final_annotation_train.txt",
|
24 |
+
"validation_files": "final_annotation_val.txt",
|
25 |
+
"text_cleaners": [
|
26 |
+
"zh_ja_mixture_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 22050,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 1,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false,
|
86 |
+
"gin_channels": 256
|
87 |
+
},
|
88 |
+
"speakers": {
|
89 |
+
"MejiroRyan": 0
|
90 |
+
},
|
91 |
+
"symbols": [
|
92 |
+
"_",
|
93 |
+
",",
|
94 |
+
".",
|
95 |
+
"!",
|
96 |
+
"?",
|
97 |
+
"-",
|
98 |
+
"~",
|
99 |
+
"\u2026",
|
100 |
+
"A",
|
101 |
+
"E",
|
102 |
+
"I",
|
103 |
+
"N",
|
104 |
+
"O",
|
105 |
+
"Q",
|
106 |
+
"U",
|
107 |
+
"a",
|
108 |
+
"b",
|
109 |
+
"d",
|
110 |
+
"e",
|
111 |
+
"f",
|
112 |
+
"g",
|
113 |
+
"h",
|
114 |
+
"i",
|
115 |
+
"j",
|
116 |
+
"k",
|
117 |
+
"l",
|
118 |
+
"m",
|
119 |
+
"n",
|
120 |
+
"o",
|
121 |
+
"p",
|
122 |
+
"r",
|
123 |
+
"s",
|
124 |
+
"t",
|
125 |
+
"u",
|
126 |
+
"v",
|
127 |
+
"w",
|
128 |
+
"y",
|
129 |
+
"z",
|
130 |
+
"\u0283",
|
131 |
+
"\u02a7",
|
132 |
+
"\u02a6",
|
133 |
+
"\u026f",
|
134 |
+
"\u0279",
|
135 |
+
"\u0259",
|
136 |
+
"\u0265",
|
137 |
+
"\u207c",
|
138 |
+
"\u02b0",
|
139 |
+
"`",
|
140 |
+
"\u2192",
|
141 |
+
"\u2193",
|
142 |
+
"\u2191",
|
143 |
+
" "
|
144 |
+
]
|
145 |
+
}
|
NakayamaFesta/G_latest.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e89d61e71dbdefb540eabed068cd1ac25e413e346dc5d2e5d1af265364236d68
|
3 |
+
size 158897385
|
NakayamaFesta/config.json
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 1000,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 10000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 32,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "final_annotation_train.txt",
|
24 |
+
"validation_files": "final_annotation_val.txt",
|
25 |
+
"text_cleaners": [
|
26 |
+
"zh_ja_mixture_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 22050,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 1,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false,
|
86 |
+
"gin_channels": 256
|
87 |
+
},
|
88 |
+
"speakers": {
|
89 |
+
"NakayamaFesta": 0
|
90 |
+
},
|
91 |
+
"symbols": [
|
92 |
+
"_",
|
93 |
+
",",
|
94 |
+
".",
|
95 |
+
"!",
|
96 |
+
"?",
|
97 |
+
"-",
|
98 |
+
"~",
|
99 |
+
"\u2026",
|
100 |
+
"A",
|
101 |
+
"E",
|
102 |
+
"I",
|
103 |
+
"N",
|
104 |
+
"O",
|
105 |
+
"Q",
|
106 |
+
"U",
|
107 |
+
"a",
|
108 |
+
"b",
|
109 |
+
"d",
|
110 |
+
"e",
|
111 |
+
"f",
|
112 |
+
"g",
|
113 |
+
"h",
|
114 |
+
"i",
|
115 |
+
"j",
|
116 |
+
"k",
|
117 |
+
"l",
|
118 |
+
"m",
|
119 |
+
"n",
|
120 |
+
"o",
|
121 |
+
"p",
|
122 |
+
"r",
|
123 |
+
"s",
|
124 |
+
"t",
|
125 |
+
"u",
|
126 |
+
"v",
|
127 |
+
"w",
|
128 |
+
"y",
|
129 |
+
"z",
|
130 |
+
"\u0283",
|
131 |
+
"\u02a7",
|
132 |
+
"\u02a6",
|
133 |
+
"\u026f",
|
134 |
+
"\u0279",
|
135 |
+
"\u0259",
|
136 |
+
"\u0265",
|
137 |
+
"\u207c",
|
138 |
+
"\u02b0",
|
139 |
+
"`",
|
140 |
+
"\u2192",
|
141 |
+
"\u2193",
|
142 |
+
"\u2191",
|
143 |
+
" "
|
144 |
+
]
|
145 |
+
}
|
NaritaBrian/G_latest.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:126b8d2d09aecd64f66ef302510730eae4d5e2c73a0cbcfc908317b9972b43f4
|
3 |
+
size 158897385
|
NaritaBrian/config.json
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 1000,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 10000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 32,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "final_annotation_train.txt",
|
24 |
+
"validation_files": "final_annotation_val.txt",
|
25 |
+
"text_cleaners": [
|
26 |
+
"zh_ja_mixture_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 22050,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 1,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false,
|
86 |
+
"gin_channels": 256
|
87 |
+
},
|
88 |
+
"speakers": {
|
89 |
+
"NaritaBrian": 0
|
90 |
+
},
|
91 |
+
"symbols": [
|
92 |
+
"_",
|
93 |
+
",",
|
94 |
+
".",
|
95 |
+
"!",
|
96 |
+
"?",
|
97 |
+
"-",
|
98 |
+
"~",
|
99 |
+
"\u2026",
|
100 |
+
"A",
|
101 |
+
"E",
|
102 |
+
"I",
|
103 |
+
"N",
|
104 |
+
"O",
|
105 |
+
"Q",
|
106 |
+
"U",
|
107 |
+
"a",
|
108 |
+
"b",
|
109 |
+
"d",
|
110 |
+
"e",
|
111 |
+
"f",
|
112 |
+
"g",
|
113 |
+
"h",
|
114 |
+
"i",
|
115 |
+
"j",
|
116 |
+
"k",
|
117 |
+
"l",
|
118 |
+
"m",
|
119 |
+
"n",
|
120 |
+
"o",
|
121 |
+
"p",
|
122 |
+
"r",
|
123 |
+
"s",
|
124 |
+
"t",
|
125 |
+
"u",
|
126 |
+
"v",
|
127 |
+
"w",
|
128 |
+
"y",
|
129 |
+
"z",
|
130 |
+
"\u0283",
|
131 |
+
"\u02a7",
|
132 |
+
"\u02a6",
|
133 |
+
"\u026f",
|
134 |
+
"\u0279",
|
135 |
+
"\u0259",
|
136 |
+
"\u0265",
|
137 |
+
"\u207c",
|
138 |
+
"\u02b0",
|
139 |
+
"`",
|
140 |
+
"\u2192",
|
141 |
+
"\u2193",
|
142 |
+
"\u2191",
|
143 |
+
" "
|
144 |
+
]
|
145 |
+
}
|
NaritaTopRoad/G_latest.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17e9a1b32d1a59697838a048385ba966186d1ce7e8afd6fd7356ead1ea2ac92a
|
3 |
+
size 158897385
|
NaritaTopRoad/config.json
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 1000,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 10000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 32,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "final_annotation_train.txt",
|
24 |
+
"validation_files": "final_annotation_val.txt",
|
25 |
+
"text_cleaners": [
|
26 |
+
"zh_ja_mixture_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 22050,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 1,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false,
|
86 |
+
"gin_channels": 256
|
87 |
+
},
|
88 |
+
"speakers": {
|
89 |
+
"NaritaTopRoad": 0
|
90 |
+
},
|
91 |
+
"symbols": [
|
92 |
+
"_",
|
93 |
+
",",
|
94 |
+
".",
|
95 |
+
"!",
|
96 |
+
"?",
|
97 |
+
"-",
|
98 |
+
"~",
|
99 |
+
"\u2026",
|
100 |
+
"A",
|
101 |
+
"E",
|
102 |
+
"I",
|
103 |
+
"N",
|
104 |
+
"O",
|
105 |
+
"Q",
|
106 |
+
"U",
|
107 |
+
"a",
|
108 |
+
"b",
|
109 |
+
"d",
|
110 |
+
"e",
|
111 |
+
"f",
|
112 |
+
"g",
|
113 |
+
"h",
|
114 |
+
"i",
|
115 |
+
"j",
|
116 |
+
"k",
|
117 |
+
"l",
|
118 |
+
"m",
|
119 |
+
"n",
|
120 |
+
"o",
|
121 |
+
"p",
|
122 |
+
"r",
|
123 |
+
"s",
|
124 |
+
"t",
|
125 |
+
"u",
|
126 |
+
"v",
|
127 |
+
"w",
|
128 |
+
"y",
|
129 |
+
"z",
|
130 |
+
"\u0283",
|
131 |
+
"\u02a7",
|
132 |
+
"\u02a6",
|
133 |
+
"\u026f",
|
134 |
+
"\u0279",
|
135 |
+
"\u0259",
|
136 |
+
"\u0265",
|
137 |
+
"\u207c",
|
138 |
+
"\u02b0",
|
139 |
+
"`",
|
140 |
+
"\u2192",
|
141 |
+
"\u2193",
|
142 |
+
"\u2191",
|
143 |
+
" "
|
144 |
+
]
|
145 |
+
}
|
SakuraBakushinO/G_latest.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06012d857181493d45ea6e395b889e6a8860c0a9082af0157c557e17aca3cd08
|
3 |
+
size 158897385
|
SakuraBakushinO/config.json
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 1000,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 10000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 32,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "final_annotation_train.txt",
|
24 |
+
"validation_files": "final_annotation_val.txt",
|
25 |
+
"text_cleaners": [
|
26 |
+
"zh_ja_mixture_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 22050,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 1,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false,
|
86 |
+
"gin_channels": 256
|
87 |
+
},
|
88 |
+
"speakers": {
|
89 |
+
"SakuraBakushinO": 0
|
90 |
+
},
|
91 |
+
"symbols": [
|
92 |
+
"_",
|
93 |
+
",",
|
94 |
+
".",
|
95 |
+
"!",
|
96 |
+
"?",
|
97 |
+
"-",
|
98 |
+
"~",
|
99 |
+
"\u2026",
|
100 |
+
"A",
|
101 |
+
"E",
|
102 |
+
"I",
|
103 |
+
"N",
|
104 |
+
"O",
|
105 |
+
"Q",
|
106 |
+
"U",
|
107 |
+
"a",
|
108 |
+
"b",
|
109 |
+
"d",
|
110 |
+
"e",
|
111 |
+
"f",
|
112 |
+
"g",
|
113 |
+
"h",
|
114 |
+
"i",
|
115 |
+
"j",
|
116 |
+
"k",
|
117 |
+
"l",
|
118 |
+
"m",
|
119 |
+
"n",
|
120 |
+
"o",
|
121 |
+
"p",
|
122 |
+
"r",
|
123 |
+
"s",
|
124 |
+
"t",
|
125 |
+
"u",
|
126 |
+
"v",
|
127 |
+
"w",
|
128 |
+
"y",
|
129 |
+
"z",
|
130 |
+
"\u0283",
|
131 |
+
"\u02a7",
|
132 |
+
"\u02a6",
|
133 |
+
"\u026f",
|
134 |
+
"\u0279",
|
135 |
+
"\u0259",
|
136 |
+
"\u0265",
|
137 |
+
"\u207c",
|
138 |
+
"\u02b0",
|
139 |
+
"`",
|
140 |
+
"\u2192",
|
141 |
+
"\u2193",
|
142 |
+
"\u2191",
|
143 |
+
" "
|
144 |
+
]
|
145 |
+
}
|
SakuraLaurel/G_latest.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf7ec556ad3426ec6191e2ba7fe842c97fce144ae73f9937f49500f277ae1a13
|
3 |
+
size 158897385
|
SakuraLaurel/config.json
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 1000,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 10000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 32,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "final_annotation_train.txt",
|
24 |
+
"validation_files": "final_annotation_val.txt",
|
25 |
+
"text_cleaners": [
|
26 |
+
"zh_ja_mixture_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 22050,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 1,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false,
|
86 |
+
"gin_channels": 256
|
87 |
+
},
|
88 |
+
"speakers": {
|
89 |
+
"SakuraLaurel": 0
|
90 |
+
},
|
91 |
+
"symbols": [
|
92 |
+
"_",
|
93 |
+
",",
|
94 |
+
".",
|
95 |
+
"!",
|
96 |
+
"?",
|
97 |
+
"-",
|
98 |
+
"~",
|
99 |
+
"\u2026",
|
100 |
+
"A",
|
101 |
+
"E",
|
102 |
+
"I",
|
103 |
+
"N",
|
104 |
+
"O",
|
105 |
+
"Q",
|
106 |
+
"U",
|
107 |
+
"a",
|
108 |
+
"b",
|
109 |
+
"d",
|
110 |
+
"e",
|
111 |
+
"f",
|
112 |
+
"g",
|
113 |
+
"h",
|
114 |
+
"i",
|
115 |
+
"j",
|
116 |
+
"k",
|
117 |
+
"l",
|
118 |
+
"m",
|
119 |
+
"n",
|
120 |
+
"o",
|
121 |
+
"p",
|
122 |
+
"r",
|
123 |
+
"s",
|
124 |
+
"t",
|
125 |
+
"u",
|
126 |
+
"v",
|
127 |
+
"w",
|
128 |
+
"y",
|
129 |
+
"z",
|
130 |
+
"\u0283",
|
131 |
+
"\u02a7",
|
132 |
+
"\u02a6",
|
133 |
+
"\u026f",
|
134 |
+
"\u0279",
|
135 |
+
"\u0259",
|
136 |
+
"\u0265",
|
137 |
+
"\u207c",
|
138 |
+
"\u02b0",
|
139 |
+
"`",
|
140 |
+
"\u2192",
|
141 |
+
"\u2193",
|
142 |
+
"\u2191",
|
143 |
+
" "
|
144 |
+
]
|
145 |
+
}
|
SeekingthePearl/G_latest.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b60a5875e89867c5333a82c6a3c5ae9dccde9b1b851b1f32b68810d3a8b8a1aa
|
3 |
+
size 158897385
|
SeekingthePearl/config.json
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 1000,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 10000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 32,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "final_annotation_train.txt",
|
24 |
+
"validation_files": "final_annotation_val.txt",
|
25 |
+
"text_cleaners": [
|
26 |
+
"zh_ja_mixture_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 22050,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 1,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false,
|
86 |
+
"gin_channels": 256
|
87 |
+
},
|
88 |
+
"speakers": {
|
89 |
+
"SeekingthePearl": 0
|
90 |
+
},
|
91 |
+
"symbols": [
|
92 |
+
"_",
|
93 |
+
",",
|
94 |
+
".",
|
95 |
+
"!",
|
96 |
+
"?",
|
97 |
+
"-",
|
98 |
+
"~",
|
99 |
+
"\u2026",
|
100 |
+
"A",
|
101 |
+
"E",
|
102 |
+
"I",
|
103 |
+
"N",
|
104 |
+
"O",
|
105 |
+
"Q",
|
106 |
+
"U",
|
107 |
+
"a",
|
108 |
+
"b",
|
109 |
+
"d",
|
110 |
+
"e",
|
111 |
+
"f",
|
112 |
+
"g",
|
113 |
+
"h",
|
114 |
+
"i",
|
115 |
+
"j",
|
116 |
+
"k",
|
117 |
+
"l",
|
118 |
+
"m",
|
119 |
+
"n",
|
120 |
+
"o",
|
121 |
+
"p",
|
122 |
+
"r",
|
123 |
+
"s",
|
124 |
+
"t",
|
125 |
+
"u",
|
126 |
+
"v",
|
127 |
+
"w",
|
128 |
+
"y",
|
129 |
+
"z",
|
130 |
+
"\u0283",
|
131 |
+
"\u02a7",
|
132 |
+
"\u02a6",
|
133 |
+
"\u026f",
|
134 |
+
"\u0279",
|
135 |
+
"\u0259",
|
136 |
+
"\u0265",
|
137 |
+
"\u207c",
|
138 |
+
"\u02b0",
|
139 |
+
"`",
|
140 |
+
"\u2192",
|
141 |
+
"\u2193",
|
142 |
+
"\u2191",
|
143 |
+
" "
|
144 |
+
]
|
145 |
+
}
|