zedterminator ntt123 commited on
Commit
bca3bea
·
0 Parent(s):

Duplicate from ntt123/vietTTS

Browse files

Co-authored-by: Thông Nguyễn <[email protected]>

.gitattributes ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.model filter=lfs diff=lfs merge=lfs -text
12
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
13
+ *.onnx filter=lfs diff=lfs merge=lfs -text
14
+ *.ot filter=lfs diff=lfs merge=lfs -text
15
+ *.parquet filter=lfs diff=lfs merge=lfs -text
16
+ *.pb filter=lfs diff=lfs merge=lfs -text
17
+ *.pt filter=lfs diff=lfs merge=lfs -text
18
+ *.pth filter=lfs diff=lfs merge=lfs -text
19
+ *.rar filter=lfs diff=lfs merge=lfs -text
20
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
22
+ *.tflite filter=lfs diff=lfs merge=lfs -text
23
+ *.tgz filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ acoustic_ckpt_latest.pickle filter=lfs diff=lfs merge=lfs -text
29
+ duration_ckpt_latest.pickle filter=lfs diff=lfs merge=lfs -text
30
+ hk_hifi.pickle filter=lfs diff=lfs merge=lfs -text
31
+ duration_latest_ckpt.pickle filter=lfs diff=lfs merge=lfs -text
32
+ acoustic_latest_ckpt.pickle filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: VietTTS
3
+ emoji: 🚀
4
+ colorFrom: purple
5
+ colorTo: gray
6
+ sdk: gradio
7
+ app_file: app.py
8
+ pinned: false
9
+ duplicated_from: ntt123/vietTTS
10
+ ---
11
+
12
+ # Configuration
13
+
14
+ `title`: _string_
15
+ Display title for the Space
16
+
17
+ `emoji`: _string_
18
+ Space emoji (emoji-only character allowed)
19
+
20
+ `colorFrom`: _string_
21
+ Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
22
+
23
+ `colorTo`: _string_
24
+ Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
25
+
26
+ `sdk`: _string_
27
+ Can be either `gradio` or `streamlit`
28
+
29
+ `sdk_version` : _string_
30
+ Only applicable for `streamlit` SDK.
31
+ See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
32
+
33
+ `app_file`: _string_
34
+ Path to your main application file (which contains either `gradio` or `streamlit` Python code).
35
+ Path is relative to the root of the repository.
36
+
37
+ `pinned`: _boolean_
38
+ Whether the Space stays on top of your list.
acoustic_latest_ckpt.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:932009df1a4beb27fd95e28de1f1ab97fab66c0dc1bb8c576603e3dbd4a6659c
3
+ size 150689843
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from vietTTS.hifigan.mel2wave import mel2wave
2
+ from vietTTS.nat.text2mel import text2mel
3
+ from vietTTS import nat_normalize_text
4
+ import numpy as np
5
+ import gradio as gr
6
+ import os
7
+
8
+
9
+ def text_to_speech(text):
10
+ # prevent too long text
11
+ if len(text) > 500:
12
+ text = text[:500]
13
+ text = nat_normalize_text(text)
14
+ mel = text2mel(
15
+ text,
16
+ "lexicon.txt",
17
+ 0.2,
18
+ "acoustic_latest_ckpt.pickle",
19
+ "duration_latest_ckpt.pickle",
20
+ )
21
+ wave = mel2wave(mel, "config.json", "hk_hifi.pickle")
22
+ return (wave * (2**15)).astype(np.int16)
23
+
24
+
25
+ def speak(text):
26
+ y = text_to_speech(text)
27
+ return 16_000, y
28
+
29
+
30
+ title = "vietTTS"
31
+ description = "A vietnamese text-to-speech demo."
32
+
33
+ gr.Interface(
34
+ fn=speak,
35
+ inputs="text",
36
+ outputs="audio",
37
+ title = title,
38
+ examples = [
39
+ "Trăm năm trong cõi người ta, chữ tài chữ mệnh khéo là ghét nhau.",
40
+ "Đoạn trường tân thanh, thường được biết đến với cái tên đơn giản là Truyện Kiều, là một truyện thơ của đại thi hào Nguyễn Du",
41
+ "Lục Vân Tiên quê ở huyện Đông Thành, khôi ngô tuấn tú, tài kiêm văn võ. Nghe tin triều đình mở khoa thi, Vân Tiên từ giã thầy xuống núi đua tài.",
42
+ "Lê Quý Đôn, tên thuở nhỏ là Lê Danh Phương, là vị quan thời Lê trung hưng, cũng là nhà thơ và được mệnh danh là nhà bác học lớn của Việt Nam trong thời phong kiến",
43
+ "Tất cả mọi người đều sinh ra có quyền bình đẳng. Tạo hóa cho họ những quyền không ai có thể xâm phạm được; trong những quyền ấy, có quyền được sống, quyền tự do và quyền mưu cầu hạnh phúc."
44
+ ],
45
+ description=description,
46
+ theme="default",
47
+ allow_screenshot=False,
48
+ allow_flagging="never",
49
+ ).launch(debug=False)
config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "resblock": "1",
3
+ "num_gpus": 0,
4
+ "batch_size": 16,
5
+ "learning_rate": 0.0002,
6
+ "adam_b1": 0.8,
7
+ "adam_b2": 0.99,
8
+ "lr_decay": 0.999,
9
+ "seed": 1234,
10
+
11
+ "upsample_rates": [8,8,2,2],
12
+ "upsample_kernel_sizes": [16,16,4,4],
13
+ "upsample_initial_channel": 512,
14
+ "resblock_kernel_sizes": [3,7,11],
15
+ "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
16
+ "resblock_initial_channel": 256,
17
+
18
+ "segment_size": 8192,
19
+ "num_mels": 80,
20
+ "num_freq": 1025,
21
+ "n_fft": 1024,
22
+ "hop_size": 256,
23
+ "win_size": 1024,
24
+
25
+ "sampling_rate": 16000,
26
+
27
+ "fmin": 0,
28
+ "fmax": 8000,
29
+ "fmax_for_loss": null,
30
+
31
+ "num_workers": 4,
32
+
33
+ "dist_config": {
34
+ "dist_backend": "nccl",
35
+ "dist_url": "tcp://localhost:54321",
36
+ "world_size": 1
37
+ }
38
+ }
duration_latest_ckpt.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c3e6ac37d4f4b507e5be3010f67fcbc858e1ba20ed94b074da8b24015d6ca7c
3
+ size 22097018
hk_hifi.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f277328c8bbad7afadbdbe9ef33c4235b03b3b2505bdd06ac583cd1c4ad9787
3
+ size 55717030
lexicon.txt ADDED
The diff for this file is too large to render. See raw diff
 
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ libsndfile1-dev
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gdown
2
+ git+https://github.com/NTT123/vietTTS.git@demo
3
+ gradio==3.0.2