KaiChen1998 commited on
Commit
b3ea40b
·
1 Parent(s): 91deaa2

update code for speech tokenizer

Browse files
Files changed (1) hide show
  1. app.py +8 -7
app.py CHANGED
@@ -21,10 +21,10 @@ auth_token = os.environ.get("TOKEN_FROM_SECRET")
21
  # Audio part
22
  ##########################################
23
  from huggingface_hub import snapshot_download
24
- snapshot_download(repo_id="Emova-ollm/emova_speech_tokenizer", local_dir='./speech', token=auth_token)
25
 
26
- from speech.speech_utils import s2u_extract_unit_demo, get_ckpt_config_path, load_model
27
- from speech.speech_utils import load_condition_centroid, get_config_checkpoint_file, load_U2S_model, synthesis
28
 
29
  ####################
30
  # S2U
@@ -35,19 +35,20 @@ unit_type = '40ms_multilingual_8888'
35
  language = 'English'
36
  s2u_model_name = 'SPIRAL-FSQ-CTC'
37
 
38
- ckpt_path, config_path = get_ckpt_config_path(unit_type, language)
39
- s2u_model = load_model(ckpt_path, config_path, s2u_model_name)
40
 
41
  ####################
42
  # U2S
43
  ####################
44
- condition2style_centroid_file = "./speech/condition_style_centroid/condition2style_centroid.txt"
45
  condition2style_centroid_file_dict, condition2style_centroid_embedding_dict = load_condition_centroid(condition2style_centroid_file)
46
 
47
  unit_type = '40ms_multilingual_8888_xujing_cosyvoice_FT'
48
  language = 'Chinese'
49
- model_config_file, model_checkpoint_file = get_config_checkpoint_file(unit_type, language)
50
  net_g, hps = load_U2S_model(model_config_file, model_checkpoint_file, unit_type)
 
51
 
52
  ####################
53
  # task format
 
21
  # Audio part
22
  ##########################################
23
  from huggingface_hub import snapshot_download
24
+ snapshot_download(repo_id="Emova-ollm/emova_speech_tokenizer", token=auth_token)
25
 
26
+ from emova_speech_tokenizer.speech_utils import get_S2U_ckpt_config_path, load_S2U_model, s2u_extract_unit_demo
27
+ from emova_speech_tokenizer.speech_utils import load_condition_centroid, get_U2S_config_checkpoint_file, load_U2S_model, synthesis
28
 
29
  ####################
30
  # S2U
 
35
  language = 'English'
36
  s2u_model_name = 'SPIRAL-FSQ-CTC'
37
 
38
+ ckpt_path, config_path = get_S2U_ckpt_config_path(unit_type, language)
39
+ s2u_model = load_S2U_model(ckpt_path, config_path, s2u_model_name).cuda()
40
 
41
  ####################
42
  # U2S
43
  ####################
44
+ condition2style_centroid_file = "./speech_tokenization/condition_style_centroid/condition2style_centroid.txt"
45
  condition2style_centroid_file_dict, condition2style_centroid_embedding_dict = load_condition_centroid(condition2style_centroid_file)
46
 
47
  unit_type = '40ms_multilingual_8888_xujing_cosyvoice_FT'
48
  language = 'Chinese'
49
+ model_config_file, model_checkpoint_file = get_U2S_config_checkpoint_file(unit_type, language)
50
  net_g, hps = load_U2S_model(model_config_file, model_checkpoint_file, unit_type)
51
+ net_g = net_g.cuda()
52
 
53
  ####################
54
  # task format