Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
b3ea40b
1
Parent(s):
91deaa2
update code for speech tokenizer
Browse files
app.py
CHANGED
@@ -21,10 +21,10 @@ auth_token = os.environ.get("TOKEN_FROM_SECRET")
|
|
21 |
# Audio part
|
22 |
##########################################
|
23 |
from huggingface_hub import snapshot_download
|
24 |
-
snapshot_download(repo_id="Emova-ollm/emova_speech_tokenizer",
|
25 |
|
26 |
-
from
|
27 |
-
from
|
28 |
|
29 |
####################
|
30 |
# S2U
|
@@ -35,19 +35,20 @@ unit_type = '40ms_multilingual_8888'
|
|
35 |
language = 'English'
|
36 |
s2u_model_name = 'SPIRAL-FSQ-CTC'
|
37 |
|
38 |
-
ckpt_path, config_path =
|
39 |
-
s2u_model =
|
40 |
|
41 |
####################
|
42 |
# U2S
|
43 |
####################
|
44 |
-
condition2style_centroid_file = "./
|
45 |
condition2style_centroid_file_dict, condition2style_centroid_embedding_dict = load_condition_centroid(condition2style_centroid_file)
|
46 |
|
47 |
unit_type = '40ms_multilingual_8888_xujing_cosyvoice_FT'
|
48 |
language = 'Chinese'
|
49 |
-
model_config_file, model_checkpoint_file =
|
50 |
net_g, hps = load_U2S_model(model_config_file, model_checkpoint_file, unit_type)
|
|
|
51 |
|
52 |
####################
|
53 |
# task format
|
|
|
21 |
# Audio part
|
22 |
##########################################
|
23 |
from huggingface_hub import snapshot_download
|
24 |
+
snapshot_download(repo_id="Emova-ollm/emova_speech_tokenizer", token=auth_token)
|
25 |
|
26 |
+
from emova_speech_tokenizer.speech_utils import get_S2U_ckpt_config_path, load_S2U_model, s2u_extract_unit_demo
|
27 |
+
from emova_speech_tokenizer.speech_utils import load_condition_centroid, get_U2S_config_checkpoint_file, load_U2S_model, synthesis
|
28 |
|
29 |
####################
|
30 |
# S2U
|
|
|
35 |
language = 'English'
|
36 |
s2u_model_name = 'SPIRAL-FSQ-CTC'
|
37 |
|
38 |
+
ckpt_path, config_path = get_S2U_ckpt_config_path(unit_type, language)
|
39 |
+
s2u_model = load_S2U_model(ckpt_path, config_path, s2u_model_name).cuda()
|
40 |
|
41 |
####################
|
42 |
# U2S
|
43 |
####################
|
44 |
+
condition2style_centroid_file = "./speech_tokenization/condition_style_centroid/condition2style_centroid.txt"
|
45 |
condition2style_centroid_file_dict, condition2style_centroid_embedding_dict = load_condition_centroid(condition2style_centroid_file)
|
46 |
|
47 |
unit_type = '40ms_multilingual_8888_xujing_cosyvoice_FT'
|
48 |
language = 'Chinese'
|
49 |
+
model_config_file, model_checkpoint_file = get_U2S_config_checkpoint_file(unit_type, language)
|
50 |
net_g, hps = load_U2S_model(model_config_file, model_checkpoint_file, unit_type)
|
51 |
+
net_g = net_g.cuda()
|
52 |
|
53 |
####################
|
54 |
# task format
|