Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -85,7 +85,8 @@ ode_method = "euler"
|
|
85 |
sway_sampling_coef = -1.0
|
86 |
speed = 1
|
87 |
fix_duration = None
|
88 |
-
|
|
|
89 |
|
90 |
|
91 |
DEFAULT_TTS_MODEL = "F5-TTS"
|
@@ -262,7 +263,8 @@ def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence,
|
|
262 |
# Define weights for characters
|
263 |
punctuation_weights = {",": 0, ".": 0, " ": 0} # Add more punctuation as needed
|
264 |
progress = tqdm(gen_text_batches)
|
265 |
-
ipa_text_ref = text_to_ipa(ref_text, language=
|
|
|
266 |
print(language)
|
267 |
|
268 |
for i, gen_text in enumerate(progress):
|
@@ -444,7 +446,7 @@ def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, cross_fa
|
|
444 |
ref_audio,
|
445 |
chunk_length_s=15,
|
446 |
batch_size=128,
|
447 |
-
generate_kwargs={"task": "transcribe"# ,"language":
|
448 |
},
|
449 |
return_timestamps=False,
|
450 |
)["text"].strip()
|
@@ -552,6 +554,11 @@ def update_language(new_language):
|
|
552 |
language = new_language
|
553 |
return f"Language set to: {language}"
|
554 |
|
|
|
|
|
|
|
|
|
|
|
555 |
def update_speed(new_speed):
|
556 |
global speed
|
557 |
speed = new_speed
|
@@ -572,8 +579,47 @@ with gr.Blocks() as app_tts:
|
|
572 |
model_choice = gr.Radio(
|
573 |
choices=["Multi"], label="Choose TTS Model", value="Multi"
|
574 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
575 |
language_choice = gr.Dropdown(
|
576 |
-
choices=["pl", "de", "en
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
577 |
)
|
578 |
generate_btn = gr.Button("Synthesize", variant="primary")
|
579 |
with gr.Accordion("Advanced Settings", open=False):
|
@@ -605,6 +651,7 @@ with gr.Blocks() as app_tts:
|
|
605 |
)
|
606 |
speed_slider.change(update_speed, inputs=speed_slider)
|
607 |
language_choice.change(update_language, inputs=language_choice)
|
|
|
608 |
|
609 |
|
610 |
audio_output = gr.Audio(label="Synthesized Audio")
|
|
|
85 |
sway_sampling_coef = -1.0
|
86 |
speed = 1
|
87 |
fix_duration = None
|
88 |
+
ref_language = "en-us"
|
89 |
+
language = "en-us"
|
90 |
|
91 |
|
92 |
DEFAULT_TTS_MODEL = "F5-TTS"
|
|
|
263 |
# Define weights for characters
|
264 |
punctuation_weights = {",": 0, ".": 0, " ": 0} # Add more punctuation as needed
|
265 |
progress = tqdm(gen_text_batches)
|
266 |
+
ipa_text_ref = text_to_ipa(ref_text, language=ref_language)
|
267 |
+
print(ref_language)
|
268 |
print(language)
|
269 |
|
270 |
for i, gen_text in enumerate(progress):
|
|
|
446 |
ref_audio,
|
447 |
chunk_length_s=15,
|
448 |
batch_size=128,
|
449 |
+
generate_kwargs={"task": "transcribe"# ,"language": ref_language # Use the variable here
|
450 |
},
|
451 |
return_timestamps=False,
|
452 |
)["text"].strip()
|
|
|
554 |
language = new_language
|
555 |
return f"Language set to: {language}"
|
556 |
|
557 |
+
def update_language1(new_ref_language):
|
558 |
+
global ref_language
|
559 |
+
ref_language = new_ref_language
|
560 |
+
return f"Language set to: {ref_language}"
|
561 |
+
|
562 |
def update_speed(new_speed):
|
563 |
global speed
|
564 |
speed = new_speed
|
|
|
579 |
model_choice = gr.Radio(
|
580 |
choices=["Multi"], label="Choose TTS Model", value="Multi"
|
581 |
)
|
582 |
+
gr.Markdown("#Select Reference Language")
|
583 |
+
language_choice1 = gr.Dropdown(
|
584 |
+
choices=["pl", "de", "en-us", "en-gb", "uk", "ru", "cs", # Czech
|
585 |
+
"sk", # Slovak
|
586 |
+
"bg", # Bulgarian
|
587 |
+
"sr", # Serbian
|
588 |
+
"hr", # Croatian
|
589 |
+
"sl", # Slovenian
|
590 |
+
"be", # Belarusian
|
591 |
+
"lt", # Lithuanian
|
592 |
+
"lv", # Latvian
|
593 |
+
"et", # Estonian
|
594 |
+
"fi", # Finnish
|
595 |
+
"hu", # Hungarian
|
596 |
+
"sv", # Swedish
|
597 |
+
"no", # Norwegian
|
598 |
+
"da", # Danish
|
599 |
+
"is", # Icelandic
|
600 |
+
"nl" # Dutch
|
601 |
+
], label="Choose Language", value="en-us"
|
602 |
+
)
|
603 |
+
gr.Markdown("#Select Synthesized Language")
|
604 |
language_choice = gr.Dropdown(
|
605 |
+
choices=["pl", "de", "en-us", "en-gb", "uk", "ru", "cs", # Czech
|
606 |
+
"sk", # Slovak
|
607 |
+
"bg", # Bulgarian
|
608 |
+
"sr", # Serbian
|
609 |
+
"hr", # Croatian
|
610 |
+
"sl", # Slovenian
|
611 |
+
"be", # Belarusian
|
612 |
+
"lt", # Lithuanian
|
613 |
+
"lv", # Latvian
|
614 |
+
"et", # Estonian
|
615 |
+
"fi", # Finnish
|
616 |
+
"hu", # Hungarian
|
617 |
+
"sv", # Swedish
|
618 |
+
"no", # Norwegian
|
619 |
+
"da", # Danish
|
620 |
+
"is", # Icelandic
|
621 |
+
"nl" # Dutch
|
622 |
+
], label="Choose Language", value="en-us"
|
623 |
)
|
624 |
generate_btn = gr.Button("Synthesize", variant="primary")
|
625 |
with gr.Accordion("Advanced Settings", open=False):
|
|
|
651 |
)
|
652 |
speed_slider.change(update_speed, inputs=speed_slider)
|
653 |
language_choice.change(update_language, inputs=language_choice)
|
654 |
+
language_choice1.change(update_language1, inputs=language_choice1)
|
655 |
|
656 |
|
657 |
audio_output = gr.Audio(label="Synthesized Audio")
|