Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -129,15 +129,15 @@ chat_tokenizer_state = None
|
|
129 |
F5TTS_model_cfg = dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)
|
130 |
E2TTS_model_cfg = dict(dim=1024, depth=24, heads=16, ff_mult=4)
|
131 |
|
|
|
|
|
|
|
132 |
F5TTS_ema_model = load_custom(
|
133 |
-
"hf://Gregniuki/F5-tts_English_German_Polish/English/model_222600.pt", "", F5TTS_model_cfg
|
134 |
-
)
|
135 |
-
E2TTS_ema_model = load_custom(
|
136 |
"hf://Gregniuki/F5-tts_English_German_Polish/multi/model_300000.pt", "", F5TTS_model_cfg
|
137 |
)
|
138 |
-
E2TTS_ema_model2 = load_custom(
|
139 |
-
"hf://Gregniuki/F5-tts_English_German_Polish/Polish/model_500000.pt", "", F5TTS_model_cfg
|
140 |
-
)
|
141 |
|
142 |
|
143 |
|
@@ -222,12 +222,12 @@ def text_to_ipa(text, language='en-gb'):
|
|
222 |
|
223 |
@gpu_decorator
|
224 |
def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress(), language='en-gb'):
|
225 |
-
if exp_name == "
|
226 |
ema_model = F5TTS_ema_model
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
#ref_audio, ref_text = preprocess_ref_audio_text(ref_audio, ref_text, show_info=show_info)
|
232 |
|
233 |
|
@@ -556,7 +556,7 @@ with gr.Blocks() as app_tts:
|
|
556 |
ref_audio_input = gr.Audio(label="Reference Audio", type="filepath")
|
557 |
gen_text_input = gr.Textbox(label="Text to Generate", lines=10)
|
558 |
model_choice = gr.Radio(
|
559 |
-
choices=["
|
560 |
)
|
561 |
language_choice = gr.Dropdown(
|
562 |
choices=["pl", "de", "en-us", "en-gb", "uk", "ru"], label="Choose Language", value="en-gb"
|
@@ -753,7 +753,7 @@ with gr.Blocks() as app_emotional:
|
|
753 |
|
754 |
# Model choice
|
755 |
model_choice_emotional = gr.Radio(
|
756 |
-
choices=["
|
757 |
)
|
758 |
|
759 |
with gr.Accordion("Advanced Settings", open=False):
|
|
|
129 |
F5TTS_model_cfg = dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)
|
130 |
E2TTS_model_cfg = dict(dim=1024, depth=24, heads=16, ff_mult=4)
|
131 |
|
132 |
+
#F5TTS_ema_model = load_custom(
|
133 |
+
# "hf://Gregniuki/F5-tts_English_German_Polish/English/model_222600.pt", "", F5TTS_model_cfg
|
134 |
+
#)
|
135 |
F5TTS_ema_model = load_custom(
|
|
|
|
|
|
|
136 |
"hf://Gregniuki/F5-tts_English_German_Polish/multi/model_300000.pt", "", F5TTS_model_cfg
|
137 |
)
|
138 |
+
#E2TTS_ema_model2 = load_custom(
|
139 |
+
# "hf://Gregniuki/F5-tts_English_German_Polish/Polish/model_500000.pt", "", F5TTS_model_cfg
|
140 |
+
#)
|
141 |
|
142 |
|
143 |
|
|
|
222 |
|
223 |
@gpu_decorator
|
224 |
def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress(), language='en-gb'):
|
225 |
+
if exp_name == "Multi":
|
226 |
ema_model = F5TTS_ema_model
|
227 |
+
# elif exp_name == "Polish":
|
228 |
+
# ema_model = E2TTS_ema_model
|
229 |
+
# elif exp_name == "Deutsch":
|
230 |
+
# ema_model = E2TTS_ema_model2
|
231 |
#ref_audio, ref_text = preprocess_ref_audio_text(ref_audio, ref_text, show_info=show_info)
|
232 |
|
233 |
|
|
|
556 |
ref_audio_input = gr.Audio(label="Reference Audio", type="filepath")
|
557 |
gen_text_input = gr.Textbox(label="Text to Generate", lines=10)
|
558 |
model_choice = gr.Radio(
|
559 |
+
choices=["Multi"], label="Choose TTS Model", value="Multi"
|
560 |
)
|
561 |
language_choice = gr.Dropdown(
|
562 |
choices=["pl", "de", "en-us", "en-gb", "uk", "ru"], label="Choose Language", value="en-gb"
|
|
|
753 |
|
754 |
# Model choice
|
755 |
model_choice_emotional = gr.Radio(
|
756 |
+
choices=["Multi"], label="Choose TTS Model", value="Multi"
|
757 |
)
|
758 |
|
759 |
with gr.Accordion("Advanced Settings", open=False):
|