Spaces:
Build error
Build error
import gradio as gr | |
from tts_ui.utils import * | |
from tts_ui.tts.auralis_tts_engine import AuralisTTSEngine | |
supported_langs: list[str] = [ | |
"en", | |
"es", | |
"fr", | |
"de", | |
"it", | |
"pt", | |
"pl", | |
"tr", | |
"ru", | |
"nl", | |
"cs", | |
"ar", | |
"zh-cn", | |
"hu", | |
"ko", | |
"ja", | |
"hi", | |
"auto", | |
] | |
def build_gradio_ui(tts_engine: AuralisTTSEngine) -> gr.Blocks: | |
"""Builds and launches the Gradio UI for Auralis.""" | |
with gr.Blocks(title="Auralis TTS UI", theme="soft") as ui: | |
gr.Markdown( | |
""" | |
# Text-to-Speech Interface | |
Convert text to speech with advanced voice cloning and enhancement. | |
Powered by Auralis 🌌 made by Hoon | |
""" | |
) | |
with gr.Tab("Text to Speech"): | |
with gr.Row(): | |
with gr.Column(): | |
input_text = gr.Text( | |
label="Enter Text Here", | |
placeholder="Write the text you want to convert...", | |
) | |
ref_audio_files = gr.Files( | |
label="Reference Audio Files", file_types=["audio"] | |
) | |
with gr.Accordion("Advanced settings", open=False): | |
speed = gr.Slider( | |
label="Playback speed", | |
minimum=0.5, | |
maximum=2.0, | |
value=1.0, | |
step=0.1, | |
) | |
enhance_speech = gr.Checkbox( | |
label="Enhance Reference Speech", value=False | |
) | |
temperature = gr.Slider( | |
label="Temperature", | |
minimum=0.5, | |
maximum=1.0, | |
value=0.75, | |
step=0.05, | |
) | |
top_p = gr.Slider( | |
label="Top P", | |
minimum=0.5, | |
maximum=1.0, | |
value=0.85, | |
step=0.05, | |
) | |
top_k = gr.Slider( | |
label="Top K", minimum=0, maximum=100, value=50, step=10 | |
) | |
repetition_penalty = gr.Slider( | |
label="Repetition penalty", | |
minimum=1.0, | |
maximum=10.0, | |
value=5.0, | |
step=0.5, | |
) | |
language = gr.Dropdown( | |
label="Target Language", | |
choices=supported_langs, | |
value="auto", | |
) | |
generate_button = gr.Button("Generate Speech") | |
with gr.Column(): | |
audio_output = gr.Audio(label="Generated Audio") | |
log_output = gr.Text(label="Log Output") | |
generate_button.click( | |
fn=tts_engine.process_text_and_generate, | |
inputs=[ | |
input_text, | |
ref_audio_files, | |
speed, | |
enhance_speech, | |
temperature, | |
top_p, | |
top_k, | |
repetition_penalty, | |
language, | |
], | |
outputs=[audio_output, log_output], | |
) | |
with gr.Tab("File to Speech"): | |
with gr.Row(): | |
with gr.Column(): | |
file_input = gr.File( | |
label="Text / Ebook File", file_types=[".txt", ".md", ".epub"] | |
) | |
ref_audio_files_file = gr.Files( | |
label="Reference Audio Files", file_types=["audio"] | |
) | |
with gr.Accordion("Advanced settings", open=False): | |
speed_file = gr.Slider( | |
label="Playback speed", | |
minimum=0.5, | |
maximum=2.0, | |
value=1.0, | |
step=0.1, | |
) | |
enhance_speech_file = gr.Checkbox( | |
label="Enhance Reference Speech", value=False | |
) | |
temperature_file = gr.Slider( | |
label="Temperature", | |
minimum=0.5, | |
maximum=1.0, | |
value=0.75, | |
step=0.05, | |
) | |
top_p_file = gr.Slider( | |
label="Top P", | |
minimum=0.5, | |
maximum=1.0, | |
value=0.85, | |
step=0.05, | |
) | |
top_k_file = gr.Slider( | |
label="Top K", minimum=0, maximum=100, value=50, step=10 | |
) | |
repetition_penalty_file = gr.Slider( | |
label="Repetition penalty", | |
minimum=1.0, | |
maximum=10.0, | |
value=5.0, | |
step=0.5, | |
) | |
language_file = gr.Dropdown( | |
label="Target Language", | |
choices=supported_langs, | |
value="auto", | |
) | |
generate_button_file = gr.Button("Generate Speech from File") | |
with gr.Column(): | |
audio_output_file = gr.Audio(label="Generated Audio") | |
log_output_file = gr.Text(label="Log Output") | |
generate_button_file.click( | |
tts_engine.process_file_and_generate, | |
inputs=[ | |
file_input, | |
ref_audio_files_file, | |
speed_file, | |
enhance_speech_file, | |
temperature_file, | |
top_p_file, | |
top_k_file, | |
repetition_penalty_file, | |
language_file, | |
], | |
outputs=[audio_output_file, log_output_file], | |
) | |
with gr.Tab("Clone With Microphone"): | |
with gr.Row(): | |
with gr.Column(): | |
input_text_mic = gr.Text( | |
label="Enter Text Here", | |
placeholder="Write the text you want to convert...", | |
) | |
mic_ref_audio = gr.Audio( | |
label="Record Reference Audio", sources=["microphone"] | |
) | |
with gr.Accordion("Advanced settings", open=False): | |
speed_mic = gr.Slider( | |
label="Playback speed", | |
minimum=0.5, | |
maximum=2.0, | |
value=1.0, | |
step=0.1, | |
) | |
enhance_speech_mic = gr.Checkbox( | |
label="Enhance Reference Speech", value=True | |
) | |
temperature_mic = gr.Slider( | |
label="Temperature", | |
minimum=0.5, | |
maximum=1.0, | |
value=0.75, | |
step=0.05, | |
) | |
top_p_mic = gr.Slider( | |
label="Top P", | |
minimum=0.5, | |
maximum=1.0, | |
value=0.85, | |
step=0.05, | |
) | |
top_k_mic = gr.Slider( | |
label="Top K", minimum=0, maximum=100, value=50, step=10 | |
) | |
repetition_penalty_mic = gr.Slider( | |
label="Repetition penalty", | |
minimum=1.0, | |
maximum=10.0, | |
value=5.0, | |
step=0.5, | |
) | |
language_mic = gr.Dropdown( | |
label="Target Language", | |
choices=supported_langs, | |
value="auto", | |
) | |
generate_button_mic = gr.Button("Generate Speech") | |
with gr.Column(): | |
audio_output_mic = gr.Audio(label="Generated Audio") | |
log_output_mic = gr.Text(label="Log Output") | |
generate_button_mic.click( | |
fn=tts_engine.process_mic_and_generate, | |
inputs=[ | |
input_text_mic, | |
mic_ref_audio, | |
speed_mic, | |
enhance_speech_mic, | |
temperature_mic, | |
top_p_mic, | |
top_k_mic, | |
repetition_penalty_mic, | |
language_mic, | |
], | |
outputs=[audio_output_mic, log_output_mic], | |
) | |
return ui | |