kiriyamaX commited on
Commit
246f9b7
·
verified ·
1 Parent(s): 3b14e2d

Upload 66 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
.gitattributes CHANGED
@@ -1,35 +1,42 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ samples/af_heart_3.wav filter=lfs diff=lfs merge=lfs -text
37
+ samples/af_heart_4.wav filter=lfs diff=lfs merge=lfs -text
38
+ samples/af_heart_5.wav filter=lfs diff=lfs merge=lfs -text
39
+ samples/af_heart_0.wav filter=lfs diff=lfs merge=lfs -text
40
+ samples/af_heart_1.wav filter=lfs diff=lfs merge=lfs -text
41
+ samples/af_heart_2.wav filter=lfs diff=lfs merge=lfs -text
42
+ samples/HEARME.wav filter=lfs diff=lfs merge=lfs -text
SAMPLES.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### HEARME
2
+ <audio controls><source src="https://huggingface.co/hexgrad/Kokoro-82M/resolve/main/samples/HEARME.wav" type="audio/wav"></audio>
3
+ > Kokoro is an open-weight TTS model with 82 million parameters. Despite its lightweight architecture, it delivers comparable quality to larger models while being significantly faster and more cost-efficient. With Apache-licensed weights, Kokoro can be deployed anywhere from production environments to personal projects.
4
+ ```
5
+ kˈOkəɹO ɪz ɐn ˈOpᵊnwˌAt tˌitˌiˈɛs mˈɑdᵊl wɪð ˈATi tˈu mˈɪljᵊn pəɹˈæməTəɹz. dəspˈIt ɪts lˈItwˌAt ˈɑɹkətˌɛkʧəɹ, ɪt dəlˈɪvəɹz kˈɑmpəɹəbᵊl kwˈɑləTi tə lˈɑɹʤəɹ mˈɑdᵊlz wˌIl bˈiɪŋ səɡnˈɪfəkəntli fˈæstəɹ ænd mˈɔɹ kˈɔstəfˌɪʃənt. wˌɪð əpˌæʧilˈIsᵊnst wˈAts, kˈOkəɹO kæn bi dəplˈYd ˈɛniwˌɛɹ fɹʌm pɹədˈʌkʃən ənvˈIɹənmᵊnts tə pˈɜɹsᵊnəl pɹˈɑʤˌɛkts.
6
+ ```
7
+
8
+ ### af_heart_0
9
+ <audio controls><source src="https://huggingface.co/hexgrad/Kokoro-82M/resolve/main/samples/af_heart_0.wav" type="audio/wav"></audio>
10
+ > The sky above the port was the color of television, tuned to a dead channel.
11
+ ```
12
+ ðə skˈI əbˈʌv ðə pˈɔɹt wʌz ðə kˈʌləɹ ʌv tˈɛləvˌɪʒən, tˈund tə ɐ dˈɛd ʧˈænᵊl.
13
+ ```
14
+
15
+ ### af_heart_1
16
+ <audio controls><source src="https://huggingface.co/hexgrad/Kokoro-82M/resolve/main/samples/af_heart_1.wav" type="audio/wav"></audio>
17
+ > "It's not like I'm using," Case heard someone say, as he shouldered his way through the crowd around the door of the Chat. "It's like my body's developed this massive drug deficiency."
18
+ ```
19
+ “ˌɪts nˌɑt lˈIk ˌIm jˈuzɪŋ,” kˈAs hˈɜɹd sˈʌmwˌʌn sˈA, æz hi ʃˈOldəɹd hɪz wˈA θɹu ðə kɹˈWd əɹˈWnd ðə dˈɔɹ ʌv ðə ʧˈæt. “ˌɪts lˈIk mI bˈɑdiz dəvˈɛləpt ðɪs mˈæsɪv dɹˈʌɡ dəfˈɪʃənsi.”
20
+ ```
21
+
22
+ ### af_heart_2
23
+ <audio controls><source src="https://huggingface.co/hexgrad/Kokoro-82M/resolve/main/samples/af_heart_2.wav" type="audio/wav"></audio>
24
+ > It was a Sprawl voice and a Sprawl joke. The Chatsubo was a bar for professional expatriates; you could drink there for a week and never hear two words in Japanese.
25
+ ```
26
+ ˌɪt wʌz ɐ spɹˈɔl vˈYs ænd ɐ spɹˈɔl ʤˈOk. ðə ʧætsˈubO wʌz ɐ bˈɑɹ fɔɹ pɹəfˈɛʃᵊnəl ɛkspˈAtɹiəts; ju kʊd dɹˈɪŋk ðɛɹ fɔɹ ɐ wˈik ænd nˈɛvəɹ hˈɪɹ tˈu wˈɜɹdz ɪn ʤˌæpənˈiz.
27
+ ```
28
+
29
+ ### af_heart_3
30
+ <audio controls><source src="https://huggingface.co/hexgrad/Kokoro-82M/resolve/main/samples/af_heart_3.wav" type="audio/wav"></audio>
31
+ > These were to have an enormous impact, not only because they were associated with Constantine, but also because, as in so many other areas, the decisions taken by Constantine (or in his name) were to have great significance for centuries to come. One of the main issues was the shape that Christian churches were to take, since there was not, apparently, a tradition of monumental church buildings when Constantine decided to help the Christian church build a series of truly spectacular structures.
32
+ ```
33
+ ðˌiz wɜɹ tə hæv ɐn ɪnˈɔɹməs ˈɪmpˌækt, nˌɑt ˈOnli bəkˈʌz ðA wɜɹ əsˈOsiˌATᵻd wɪð kˈɑnstəntˌin, bˌʌt ˈɔlsO bəkˈʌz, æz ɪn sˌO mˈɛni ˈʌðəɹ ˈɛɹiəz, ðə dəsˈɪʒᵊnz tˈAkən bI kˈɑnstəntˌin (ɔɹ ɪn hɪz nˈAm) wɜɹ tə hæv ɡɹˈAt səɡnˈɪfəkᵊns fɔɹ sˈɛnʧəɹiz tə kˈʌm. wˈʌn ʌv ðə mˈAn ˈɪʃjuz wʌz ðə ʃˈAp ðæt kɹˈɪsʧən ʧˈɜɹʧᵻz wɜɹ tə tˈAk, sˈɪns ðɛɹ wʌz nˌɑt, əpˈɛɹəntli, ɐ tɹədˈɪʃən ʌv mˌɑnjəmˈɛntᵊl ʧˈɜɹʧ bˈɪldɪŋz wˌɛn kˈɑnstəntˌin dəsˈIdᵻd tə hˈɛlp ðə kɹˈɪsʧən ʧˈɜɹʧ bˈɪld ɐ sˈɪɹiz ʌv tɹˈuli spɛktˈækjələɹ stɹˈʌkʧəɹz.
34
+ ```
35
+
36
+ ### af_heart_4
37
+ <audio controls><source src="https://huggingface.co/hexgrad/Kokoro-82M/resolve/main/samples/af_heart_4.wav" type="audio/wav"></audio>
38
+ > The main form that these churches took was that of the basilica, a multipurpose rectangular structure, based ultimately on the earlier Greek stoa, which could be found in most of the great cities of the empire. Christianity, unlike classical polytheism, needed a large interior space for the celebration of its religious services, and the basilica aptly filled that need.
39
+ ```
40
+ ðə mˈAn fˈɔɹm ðæt ðiz ʧˈɜɹʧᵻz tˈʊk wʌz ðæt ʌv ðə bəsˈɪləkə, ɐ mˌʌltipˈɜɹpəs ɹɛktˈæŋɡjələɹ stɹˈʌkʧəɹ, bˈAst ˈʌltəmətli ˌɔn ði ˈɜɹliəɹ ɡɹˈik stˈOə, wˌɪʧ kʊd bi fˈWnd ɪn mˈOst ʌv ðə ɡɹˈAt sˈɪTiz ʌv ði ˈɛmpˌIəɹ. kɹˌɪsʧiˈænəTi, ˌʌnlˈIk klˈæsəkᵊl pˈɑliθiˌɪzəm, nˈidᵻd ɐ lˈɑɹʤ ɪntˈɪɹiəɹ spˈAs fɔɹ ðə sˌɛləbɹˈAʃən ʌv ɪts ɹəlˈɪʤəs sˈɜɹvəsᵻz, ænd ðə bəsˈɪləkə ˈæptli fˈɪld ðæt nˈid.
41
+ ```
42
+
43
+ ### af_heart_5
44
+ <audio controls><source src="https://huggingface.co/hexgrad/Kokoro-82M/resolve/main/samples/af_heart_5.wav" type="audio/wav"></audio>
45
+ > We naturally do not know the degree to which the emperor was involved in the design of new churches, but it is tempting to connect this with the secular basilica that Constantine completed in the Roman forum (the so-called Basilica of Maxentius) and the one he probably built in Trier, in connection with his residence in the city at a time when he was still caesar.
46
+ ```
47
+ wˌi nˈæʧəɹəli dˈu nˌɑt nˈO ðə dəɡɹˈi tə wˌɪʧ ði ˈɛmpəɹəɹ wʌz ɪnvˈɑlvd ɪn ðə dəzˈIn ʌv nˈu ʧˈɜɹʧᵻz, bˌʌt ɪt ɪz tˈɛmptɪŋ tə kənˈɛkt ðɪs wɪð ðə sˈɛkjələɹ bəsˈɪləkə ðæt kˈɑnstəntˌin kəmplˈiTᵻd ɪn ðə ɹˈOmən fˈɔɹəm (ðə sˌOkˈɔld bəsˈɪləkə ʌv mæksˈɛntiəs) ænd ðə wˈʌn hi pɹˈɑbəbli bˈɪlt ɪn tɹˈɪɹ, ɪn kənˈɛkʃən wɪð hɪz ɹˈɛzədᵊns ɪn ðə sˈɪTi æt ɐ tˈIm wˌɛn hi wʌz stˈɪl sˈizəɹ.
48
+ ```
VOICES.md ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Voices
2
+
3
+ - 🇺🇸 [American English](#american-english): 11F 9M
4
+ - 🇬🇧 [British English](#british-english): 4F 4M
5
+ - 🇯🇵 [Japanese](#japanese): 4F 1M
6
+ - 🇨🇳 [Mandarin Chinese](#mandarin-chinese): 4F 4M
7
+ - 🇪🇸 [Spanish](#spanish): 1F 2M
8
+ - 🇫🇷 [French](#french): 1F
9
+ - 🇮🇳 [Hindi](#hindi): 2F 2M
10
+ - 🇮🇹 [Italian](#italian): 1F 1M
11
+ - 🇧🇷 [Brazilian Portuguese](#brazilian-portuguese): 1F 2M
12
+
13
+ For each voice, the given grades are intended to be estimates of the **quality and quantity** of its associated training data, both of which impact overall inference quality.
14
+
15
+ Subjectively, voices will sound better or worse to different people.
16
+
17
+ Support for non-English languages may be absent or thin due to weak G2P and/or lack of training data. Some languages are only represented by a small handful or even just one voice (French).
18
+
19
+ Most voices perform best on a "goldilocks range" of 100-200 tokens out of ~500 possible. Voices may perform worse at the extremes:
20
+ - **Weakness** on short utterances, especially less than 10-20 tokens. Root cause could be lack of short-utterance training data and/or model architecture. One possible inference mitigation is to bundle shorter utterances together.
21
+ - **Rushing** on long utterances, especially over 400 tokens. You can chunk down to shorter utterances or adjust the `speed` parameter to mitigate this.
22
+
23
+ **Target Quality**
24
+ - How high quality is the reference voice? This grade may be impacted by audio quality, artifacts, compression, & sample rate.
25
+ - How well do the text labels match the audio? Text/audio misalignment (e.g. from hallucinations) will lower this grade.
26
+
27
+ **Training Duration**
28
+ - How much audio was seen during training? Smaller durations result in a lower overall grade.
29
+ - 10 hours <= **HH hours** < 100 hours
30
+ - 1 hour <= H hours < 10 hours
31
+ - 10 minutes <= MM minutes < 100 minutes
32
+ - 1 minute <= _M minutes_ 🤏 < 10 minutes
33
+
34
+ ### American English
35
+
36
+ - `lang_code='a'` in [`misaki[en]`](https://github.com/hexgrad/misaki)
37
+ - espeak-ng `en-us` fallback
38
+
39
+ | Name | Traits | Target Quality | Training Duration | Overall Grade | SHA256 |
40
+ | ---- | ------ | -------------- | ----------------- | ------------- | ------ |
41
+ | **af\_heart** | 🚺❤️ | | | **A** | `0ab5709b` |
42
+ | af_alloy | 🚺 | B | MM minutes | C | `6d877149` |
43
+ | af_aoede | 🚺 | B | H hours | C+ | `c03bd1a4` |
44
+ | af_bella | 🚺🔥 | **A** | **HH hours** | **A-** | `8cb64e02` |
45
+ | af_jessica | 🚺 | C | MM minutes | D | `cdfdccb8` |
46
+ | af_kore | 🚺 | B | H hours | C+ | `8bfbc512` |
47
+ | af_nicole | 🚺🎧 | B | **HH hours** | B- | `c5561808` |
48
+ | af_nova | 🚺 | B | MM minutes | C | `e0233676` |
49
+ | af_river | 🚺 | C | MM minutes | D | `e149459b` |
50
+ | af_sarah | 🚺 | B | H hours | C+ | `49bd364e` |
51
+ | af_sky | 🚺 | B | _M minutes_ 🤏 | C- | `c799548a` |
52
+ | am_adam | 🚹 | D | H hours | F+ | `ced7e284` |
53
+ | am_echo | 🚹 | C | MM minutes | D | `8bcfdc85` |
54
+ | am_eric | 🚹 | C | MM minutes | D | `ada66f0e` |
55
+ | am_fenrir | 🚹 | B | H hours | C+ | `98e507ec` |
56
+ | am_liam | 🚹 | C | MM minutes | D | `c8255075` |
57
+ | am_michael | 🚹 | B | H hours | C+ | `9a443b79` |
58
+ | am_onyx | 🚹 | C | MM minutes | D | `e8452be1` |
59
+ | am_puck | 🚹 | B | H hours | C+ | `dd1d8973` |
60
+ | am_santa | 🚹 | C | _M minutes_ 🤏 | D- | `7f2f7582` |
61
+
62
+ ### British English
63
+
64
+ - `lang_code='b'` in [`misaki[en]`](https://github.com/hexgrad/misaki)
65
+ - espeak-ng `en-gb` fallback
66
+
67
+ | Name | Traits | Target Quality | Training Duration | Overall Grade | SHA256 |
68
+ | ---- | ------ | -------------- | ----------------- | ------------- | ------ |
69
+ | bf_alice | 🚺 | C | MM minutes | D | `d292651b` |
70
+ | bf_emma | 🚺 | B | **HH hours** | B- | `d0a423de` |
71
+ | bf_isabella | 🚺 | B | MM minutes | C | `cdd4c370` |
72
+ | bf_lily | 🚺 | C | MM minutes | D | `6e09c2e4` |
73
+ | bm_daniel | 🚹 | C | MM minutes | D | `fc3fce4e` |
74
+ | bm_fable | 🚹 | B | MM minutes | C | `d44935f3` |
75
+ | bm_george | 🚹 | B | MM minutes | C | `f1bc8122` |
76
+ | bm_lewis | 🚹 | C | H hours | D+ | `b5204750` |
77
+
78
+ ### Japanese
79
+
80
+ - `lang_code='j'` in [`misaki[ja]`](https://github.com/hexgrad/misaki)
81
+ - Total Japanese training data: H hours
82
+
83
+ | Name | Traits | Target Quality | Training Duration | Overall Grade | SHA256 | CC BY |
84
+ | ---- | ------ | -------------- | ----------------- | ------------- | ------ | ----- |
85
+ | jf_alpha | 🚺 | B | H hours | C+ | `1bf4c9dc` | |
86
+ | jf_gongitsune | 🚺 | B | MM minutes | C | `1b171917` | [gongitsune](https://github.com/koniwa/koniwa/blob/master/source/tnc/tnc__gongitsune.txt) |
87
+ | jf_nezumi | 🚺 | B | _M minutes_ 🤏 | C- | `d83f007a` | [nezuminoyomeiri](https://github.com/koniwa/koniwa/blob/master/source/tnc/tnc__nezuminoyomeiri.txt) |
88
+ | jf_tebukuro | 🚺 | B | MM minutes | C | `0d691790` | [tebukurowokaini](https://github.com/koniwa/koniwa/blob/master/source/tnc/tnc__tebukurowokaini.txt) |
89
+ | jm_kumo | 🚹 | B | _M minutes_ 🤏 | C- | `98340afd` | [kumonoito](https://github.com/koniwa/koniwa/blob/master/source/tnc/tnc__kumonoito.txt) |
90
+
91
+ ### Mandarin Chinese
92
+
93
+ - `lang_code='z'` in [`misaki[zh]`](https://github.com/hexgrad/misaki)
94
+ - Total Mandarin Chinese training data: H hours
95
+
96
+ | Name | Traits | Target Quality | Training Duration | Overall Grade | SHA256 |
97
+ | ---- | ------ | -------------- | ----------------- | ------------- | ------ |
98
+ | zf_xiaobei | 🚺 | C | MM minutes | D | `9b76be63` |
99
+ | zf_xiaoni | 🚺 | C | MM minutes | D | `95b49f16` |
100
+ | zf_xiaoxiao | 🚺 | C | MM minutes | D | `cfaf6f2d` |
101
+ | zf_xiaoyi | 🚺 | C | MM minutes | D | `b5235dba` |
102
+ | zm_yunjian | 🚹 | C | MM minutes | D | `76cbf8ba` |
103
+ | zm_yunxi | 🚹 | C | MM minutes | D | `dbe6e1ce` |
104
+ | zm_yunxia | 🚹 | C | MM minutes | D | `bb2b03b0` |
105
+ | zm_yunyang | 🚹 | C | MM minutes | D | `5238ac22` |
106
+
107
+ ### Spanish
108
+
109
+ - `lang_code='e'` in [`misaki[en]`](https://github.com/hexgrad/misaki)
110
+ - espeak-ng `es`
111
+
112
+ | Name | Traits | SHA256 |
113
+ | ---- | ------ | ------ |
114
+ | ef_dora | 🚺 | `d9d69b0f` |
115
+ | em_alex | 🚹 | `5eac53f7` |
116
+ | em_santa | 🚹 | `aa8620cb` |
117
+
118
+ ### French
119
+
120
+ - `lang_code='f'` in [`misaki[en]`](https://github.com/hexgrad/misaki)
121
+ - espeak-ng `fr-fr`
122
+ - Total French training data: <11 hours
123
+
124
+ | Name | Traits | Target Quality | Training Duration | Overall Grade | SHA256 | CC BY |
125
+ | ---- | ------ | -------------- | ----------------- | ------------- | ------ | ----- |
126
+ | ff_siwis | 🚺 | B | <11 hours | B- | `8073bf2d` | [SIWIS](https://datashare.ed.ac.uk/handle/10283/2353) |
127
+
128
+ ### Hindi
129
+
130
+ - `lang_code='h'` in [`misaki[en]`](https://github.com/hexgrad/misaki)
131
+ - espeak-ng `hi`
132
+ - Total Hindi training data: H hours
133
+
134
+ | Name | Traits | Target Quality | Training Duration | Overall Grade | SHA256 |
135
+ | ---- | ------ | -------------- | ----------------- | ------------- | ------ |
136
+ | hf_alpha | 🚺 | B | MM minutes | C | `06906fe0` |
137
+ | hf_beta | 🚺 | B | MM minutes | C | `63c0a1a6` |
138
+ | hm_omega | 🚹 | B | MM minutes | C | `b55f02a8` |
139
+ | hm_psi | 🚹 | B | MM minutes | C | `2f0f055c` |
140
+
141
+ ### Italian
142
+
143
+ - `lang_code='i'` in [`misaki[en]`](https://github.com/hexgrad/misaki)
144
+ - espeak-ng `it`
145
+ - Total Italian training data: H hours
146
+
147
+ | Name | Traits | Target Quality | Training Duration | Overall Grade | SHA256 |
148
+ | ---- | ------ | -------------- | ----------------- | ------------- | ------ |
149
+ | if_sara | 🚺 | B | MM minutes | C | `6c0b253b` |
150
+ | im_nicola | 🚹 | B | MM minutes | C | `234ed066` |
151
+
152
+ ### Brazilian Portuguese
153
+
154
+ - `lang_code='p'` in [`misaki[en]`](https://github.com/hexgrad/misaki)
155
+ - espeak-ng `pt-br`
156
+
157
+ | Name | Traits | SHA256 |
158
+ | ---- | ------ | ------ |
159
+ | pf_dora | 🚺 | `07e4ff98` |
160
+ | pm_alex | 🚹 | `cf0ba8c5` |
161
+ | pm_santa | 🚹 | `d4210316` |
config.json ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "istftnet": {
3
+ "upsample_kernel_sizes": [20, 12],
4
+ "upsample_rates": [10, 6],
5
+ "gen_istft_hop_size": 5,
6
+ "gen_istft_n_fft": 20,
7
+ "resblock_dilation_sizes": [
8
+ [1, 3, 5],
9
+ [1, 3, 5],
10
+ [1, 3, 5]
11
+ ],
12
+ "resblock_kernel_sizes": [3, 7, 11],
13
+ "upsample_initial_channel": 512
14
+ },
15
+ "dim_in": 64,
16
+ "dropout": 0.2,
17
+ "hidden_dim": 512,
18
+ "max_conv_dim": 512,
19
+ "max_dur": 50,
20
+ "multispeaker": true,
21
+ "n_layer": 3,
22
+ "n_mels": 80,
23
+ "n_token": 178,
24
+ "style_dim": 128,
25
+ "text_encoder_kernel_size": 5,
26
+ "plbert": {
27
+ "hidden_size": 768,
28
+ "num_attention_heads": 12,
29
+ "intermediate_size": 2048,
30
+ "max_position_embeddings": 512,
31
+ "num_hidden_layers": 12,
32
+ "dropout": 0.1
33
+ },
34
+ "vocab": {
35
+ ";": 1,
36
+ ":": 2,
37
+ ",": 3,
38
+ ".": 4,
39
+ "!": 5,
40
+ "?": 6,
41
+ "—": 9,
42
+ "…": 10,
43
+ "\"": 11,
44
+ "(": 12,
45
+ ")": 13,
46
+ "“": 14,
47
+ "”": 15,
48
+ " ": 16,
49
+ "\u0303": 17,
50
+ "ʣ": 18,
51
+ "ʥ": 19,
52
+ "ʦ": 20,
53
+ "ʨ": 21,
54
+ "ᵝ": 22,
55
+ "\uAB67": 23,
56
+ "A": 24,
57
+ "I": 25,
58
+ "O": 31,
59
+ "Q": 33,
60
+ "S": 35,
61
+ "T": 36,
62
+ "W": 39,
63
+ "Y": 41,
64
+ "ᵊ": 42,
65
+ "a": 43,
66
+ "b": 44,
67
+ "c": 45,
68
+ "d": 46,
69
+ "e": 47,
70
+ "f": 48,
71
+ "h": 50,
72
+ "i": 51,
73
+ "j": 52,
74
+ "k": 53,
75
+ "l": 54,
76
+ "m": 55,
77
+ "n": 56,
78
+ "o": 57,
79
+ "p": 58,
80
+ "q": 59,
81
+ "r": 60,
82
+ "s": 61,
83
+ "t": 62,
84
+ "u": 63,
85
+ "v": 64,
86
+ "w": 65,
87
+ "x": 66,
88
+ "y": 67,
89
+ "z": 68,
90
+ "ɑ": 69,
91
+ "ɐ": 70,
92
+ "ɒ": 71,
93
+ "æ": 72,
94
+ "β": 75,
95
+ "ɔ": 76,
96
+ "ɕ": 77,
97
+ "ç": 78,
98
+ "ɖ": 80,
99
+ "ð": 81,
100
+ "ʤ": 82,
101
+ "ə": 83,
102
+ "ɚ": 85,
103
+ "ɛ": 86,
104
+ "ɜ": 87,
105
+ "ɟ": 90,
106
+ "ɡ": 92,
107
+ "ɥ": 99,
108
+ "ɨ": 101,
109
+ "ɪ": 102,
110
+ "ʝ": 103,
111
+ "ɯ": 110,
112
+ "ɰ": 111,
113
+ "ŋ": 112,
114
+ "ɳ": 113,
115
+ "ɲ": 114,
116
+ "ɴ": 115,
117
+ "ø": 116,
118
+ "ɸ": 118,
119
+ "θ": 119,
120
+ "œ": 120,
121
+ "ɹ": 123,
122
+ "ɾ": 125,
123
+ "ɻ": 126,
124
+ "ʁ": 128,
125
+ "ɽ": 129,
126
+ "ʂ": 130,
127
+ "ʃ": 131,
128
+ "ʈ": 132,
129
+ "ʧ": 133,
130
+ "ʊ": 135,
131
+ "ʋ": 136,
132
+ "ʌ": 138,
133
+ "ɣ": 139,
134
+ "ɤ": 140,
135
+ "χ": 142,
136
+ "ʎ": 143,
137
+ "ʒ": 147,
138
+ "ʔ": 148,
139
+ "ˈ": 156,
140
+ "ˌ": 157,
141
+ "ː": 158,
142
+ "ʰ": 162,
143
+ "ʲ": 164,
144
+ "↓": 169,
145
+ "→": 171,
146
+ "↗": 172,
147
+ "↘": 173,
148
+ "ᵻ": 177
149
+ }
150
+ }
kokoro-v1_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17fcb1ed8d44c71cdbf69611f9981ccb9ba9b5edee38c01dde80c09dcc003e91
3
+ size 134
samples/HEARME.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aad38e96fa60c91c995ac820ce6e86c28b0df7300177c0d3ca0766b9dc78feec
3
+ size 996044
samples/af_heart_0.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd7999ebbc8369779d5d3f504399ea466c909339f90231143416a7819a2047fc
3
+ size 237644
samples/af_heart_1.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fe4f363b785fdc233f94dd1885c94a2267f7ceeea8c7fb5cce6bfcf0f7b273d
3
+ size 517244
samples/af_heart_2.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68ee75b2d503415a5b6edbd5230c823fbeb6b430d546b8c37e2284efcf280be8
3
+ size 496844
samples/af_heart_3.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce03dba2fe3291c1abd50326e3c1cef94e52f1242e6fafd5358dcc1f1ce425c1
3
+ size 132
samples/af_heart_4.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4542ddc87ff2882b4c776497545f459dbb86b8a72d9daa19678c811a8989b84
3
+ size 132
samples/af_heart_5.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03ec54959ae269f443cb19d18fb4f7bef0b9b07203b1e8fd5cdc3ba8e8ed414f
3
+ size 132
voices/af_alloy.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a02eb6a86d46aa3ca5737a49b942ad93506fe3e2583500d12098a5e4a05127a3
3
+ size 131
voices/af_aoede.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b6af6842c77876b847ac47a9c3461456aa4696f9f2f1abeb455346625919425
3
+ size 131
voices/af_bella.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48bd67556241f512229a4a8dd400584d64a4564b1d39c8f5f1d86348e9a795db
3
+ size 131
voices/af_heart.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50f24fddc9730b693bf93a8a4b4e889f4a4faf87bfbc7c8b1c16bf55f12accbd
3
+ size 131
voices/af_jessica.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60e8b79fe830f62ec3a75ca4db82d30b4bdf1e57b3e529fcaf7a8acc0408b02d
3
+ size 131
voices/af_kore.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2414e645aa985d128ad48336f2eebf13cc6394bf3cab1841e021aae11b3cc98f
3
+ size 131
voices/af_nicole.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5b96bee031bd6a419619a434d39ef5ff85d52d48326a029c636ec7b45d425b3
3
+ size 131
voices/af_nova.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46b5cef63379357c09e5cd12349da396cd5a374407d44dd83afcfc6ee3dcc2c8
3
+ size 131
voices/af_river.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81e60caf91cdae446e91dce71ab100cae82f7afd3e8a06bcb5026e4b0d54dc8f
3
+ size 131
voices/af_sarah.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bba343b0d21b061e7f361851e1a9a9e6a02c5c634ba44a601b3507cc6243a502
3
+ size 131
voices/af_sky.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebdd51e806d97e80bb64663f6ca7d0654f65c67085ea7c1b6894f758837ee7c5
3
+ size 131
voices/am_adam.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a63b12823c879361a390cb01ea7fda41dc7bb37d48e5e457dafc88b034c3504e
3
+ size 131
voices/am_echo.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10945d110d8efd01f09e66a8c1df555cb8690f521a14729c5e1e1a57ab0768bf
3
+ size 131
voices/am_eric.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6346c34cc30a3cbad7a3b0a5f44d2a049624a2dde91af24fd39816282fd73f4
3
+ size 131
voices/am_fenrir.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c05ea33bc8b3259cb479bcc50e659921ed47f94cac2abb572f1c51cb5832270
3
+ size 131
voices/am_liam.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f184294ce3042b12e3029920f0c612827d014b30179f376bbf3831945898ad9
3
+ size 131
voices/am_michael.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ecd9c17ba00f3c60ca0ca7f59d0166687d8116c526b8f69cf812f0ded2786e3
3
+ size 131
voices/am_onyx.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb45f782b6b60adb1dbf2a2ea89ff72d1f982776ea232cddba6046bc2d41e0fb
3
+ size 131
voices/am_puck.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8579bfc1cfdec3e0190a342ed4d60629c0bfaad67310c59cb0aca542a0cd1c7
3
+ size 131
voices/am_santa.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f99523a6f913846da7e39c56ce499c58cd92c7c613eafaee1818249edbab7b18
3
+ size 131
voices/bf_alice.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfed0c4eee241f3d6e76a8b4f0094e9dbe4b8ffd4c6f62f031835a01c60abcbe
3
+ size 131
voices/bf_emma.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ce024849989e53d20e87eb68d0cfdda9b240e7642ed619b2891937c5a49b036
3
+ size 131
voices/bf_isabella.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dca2904feb9e5604b5a3036084899b693f1309fa1d8f9d93b95908414f3b531
3
+ size 131
voices/bf_lily.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9acf8a597349b2a7c62a33e0660aedfe5804670bce72ab5dbad9f4f5261ec3f4
3
+ size 131
voices/bm_daniel.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7c7ff266d6208fd880411f6069c93fe25686646f5a343a9a58ce9376b432de3
3
+ size 131
voices/bm_fable.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7940b5165a8f7757c20ec4e98d8127e523070a6516d7f3af87933b1548b0ce65
3
+ size 131
voices/bm_george.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:714349fb212754700ad90c7fe0213af60aa6e8be24f2ec58e968836c20d6e558
3
+ size 131
voices/bm_lewis.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d287e4c2f3fa91bfe606e2360ea315e3b77c4cef2ea007bfc09ca4fe027d44f8
3
+ size 131
voices/ef_dora.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d2c4f11164d5cfe561a9fec622cd3271c7c99ca35c7a34a2e61e03cae9d3e09
3
+ size 131
voices/em_alex.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9c8862e32e84bef9a04977b83cf743c7128b6b3c202c2ccdb78db68f05a0c3c
3
+ size 131
voices/em_santa.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1bb943b345b90f7fd0dbd14e9936ef8da009708678be58039bb51de5f40bece
3
+ size 131
voices/ff_siwis.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:addae5d36e4b65dcfb8676d587ee42300309eae1973c0e18e33df4b036a6479c
3
+ size 131
voices/hf_alpha.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dc2cdfd25ed2fbbbc55c4dddbfc9b69b1c4ed057c87564dfbac620abaf2d26d
3
+ size 131
voices/hf_beta.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f183f277f6aea05b26bf1de6e19fb85c8bd91ab070ec5c2dbc4b8bdde711e7c7
3
+ size 131
voices/hm_omega.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9a2304973401f70c07350afcf000ffe87ef1a8a6cf6464d114710b14e5a66b5
3
+ size 131
voices/hm_psi.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc9016c2fbd8a1172cfa6ed8af439aeac9fdf3b7c1fd6b08298d13d7c0b5e8ac
3
+ size 131
voices/if_sara.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aea317ef7ff01972662837d64fcd2e8012bf747939e8e026945702b0b7539627
3
+ size 131
voices/im_nicola.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c67346eb54c164597dff9e641f0c3cb13683ddd422198fca04dc25c889a66c3
3
+ size 131