Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -129,15 +129,15 @@ chat_tokenizer_state = None
|
|
| 129 |
F5TTS_model_cfg = dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)
|
| 130 |
E2TTS_model_cfg = dict(dim=1024, depth=24, heads=16, ff_mult=4)
|
| 131 |
|
|
|
|
|
|
|
|
|
|
| 132 |
F5TTS_ema_model = load_custom(
|
| 133 |
-
"hf://Gregniuki/F5-tts_English_German_Polish/English/model_222600.pt", "", F5TTS_model_cfg
|
| 134 |
-
)
|
| 135 |
-
E2TTS_ema_model = load_custom(
|
| 136 |
"hf://Gregniuki/F5-tts_English_German_Polish/multi/model_300000.pt", "", F5TTS_model_cfg
|
| 137 |
)
|
| 138 |
-
E2TTS_ema_model2 = load_custom(
|
| 139 |
-
"hf://Gregniuki/F5-tts_English_German_Polish/Polish/model_500000.pt", "", F5TTS_model_cfg
|
| 140 |
-
)
|
| 141 |
|
| 142 |
|
| 143 |
|
|
@@ -222,12 +222,12 @@ def text_to_ipa(text, language='en-gb'):
|
|
| 222 |
|
| 223 |
@gpu_decorator
|
| 224 |
def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress(), language='en-gb'):
|
| 225 |
-
if exp_name == "
|
| 226 |
ema_model = F5TTS_ema_model
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
#ref_audio, ref_text = preprocess_ref_audio_text(ref_audio, ref_text, show_info=show_info)
|
| 232 |
|
| 233 |
|
|
@@ -556,7 +556,7 @@ with gr.Blocks() as app_tts:
|
|
| 556 |
ref_audio_input = gr.Audio(label="Reference Audio", type="filepath")
|
| 557 |
gen_text_input = gr.Textbox(label="Text to Generate", lines=10)
|
| 558 |
model_choice = gr.Radio(
|
| 559 |
-
choices=["
|
| 560 |
)
|
| 561 |
language_choice = gr.Dropdown(
|
| 562 |
choices=["pl", "de", "en-us", "en-gb", "uk", "ru"], label="Choose Language", value="en-gb"
|
|
@@ -753,7 +753,7 @@ with gr.Blocks() as app_emotional:
|
|
| 753 |
|
| 754 |
# Model choice
|
| 755 |
model_choice_emotional = gr.Radio(
|
| 756 |
-
choices=["
|
| 757 |
)
|
| 758 |
|
| 759 |
with gr.Accordion("Advanced Settings", open=False):
|
|
|
|
| 129 |
F5TTS_model_cfg = dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)
|
| 130 |
E2TTS_model_cfg = dict(dim=1024, depth=24, heads=16, ff_mult=4)
|
| 131 |
|
| 132 |
+
#F5TTS_ema_model = load_custom(
|
| 133 |
+
# "hf://Gregniuki/F5-tts_English_German_Polish/English/model_222600.pt", "", F5TTS_model_cfg
|
| 134 |
+
#)
|
| 135 |
F5TTS_ema_model = load_custom(
|
|
|
|
|
|
|
|
|
|
| 136 |
"hf://Gregniuki/F5-tts_English_German_Polish/multi/model_300000.pt", "", F5TTS_model_cfg
|
| 137 |
)
|
| 138 |
+
#E2TTS_ema_model2 = load_custom(
|
| 139 |
+
# "hf://Gregniuki/F5-tts_English_German_Polish/Polish/model_500000.pt", "", F5TTS_model_cfg
|
| 140 |
+
#)
|
| 141 |
|
| 142 |
|
| 143 |
|
|
|
|
| 222 |
|
| 223 |
@gpu_decorator
|
| 224 |
def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress(), language='en-gb'):
|
| 225 |
+
if exp_name == "Multi":
|
| 226 |
ema_model = F5TTS_ema_model
|
| 227 |
+
# elif exp_name == "Polish":
|
| 228 |
+
# ema_model = E2TTS_ema_model
|
| 229 |
+
# elif exp_name == "Deutsch":
|
| 230 |
+
# ema_model = E2TTS_ema_model2
|
| 231 |
#ref_audio, ref_text = preprocess_ref_audio_text(ref_audio, ref_text, show_info=show_info)
|
| 232 |
|
| 233 |
|
|
|
|
| 556 |
ref_audio_input = gr.Audio(label="Reference Audio", type="filepath")
|
| 557 |
gen_text_input = gr.Textbox(label="Text to Generate", lines=10)
|
| 558 |
model_choice = gr.Radio(
|
| 559 |
+
choices=["Multi"], label="Choose TTS Model", value="Multi"
|
| 560 |
)
|
| 561 |
language_choice = gr.Dropdown(
|
| 562 |
choices=["pl", "de", "en-us", "en-gb", "uk", "ru"], label="Choose Language", value="en-gb"
|
|
|
|
| 753 |
|
| 754 |
# Model choice
|
| 755 |
model_choice_emotional = gr.Radio(
|
| 756 |
+
choices=["Multi"], label="Choose TTS Model", value="Multi"
|
| 757 |
)
|
| 758 |
|
| 759 |
with gr.Accordion("Advanced Settings", open=False):
|