gpt-tts-ui / tts_ui /ui /__init__.py
hoonsubin's picture
add base proj
597e812
import gradio as gr
from tts_ui.utils import *
from tts_ui.tts.auralis_tts_engine import AuralisTTSEngine
supported_langs: list[str] = [
"en",
"es",
"fr",
"de",
"it",
"pt",
"pl",
"tr",
"ru",
"nl",
"cs",
"ar",
"zh-cn",
"hu",
"ko",
"ja",
"hi",
"auto",
]
def build_gradio_ui(tts_engine: AuralisTTSEngine) -> gr.Blocks:
"""Builds and launches the Gradio UI for Auralis."""
with gr.Blocks(title="Auralis TTS UI", theme="soft") as ui:
gr.Markdown(
"""
# Text-to-Speech Interface
Convert text to speech with advanced voice cloning and enhancement.
Powered by Auralis 🌌 made by Hoon
"""
)
with gr.Tab("Text to Speech"):
with gr.Row():
with gr.Column():
input_text = gr.Text(
label="Enter Text Here",
placeholder="Write the text you want to convert...",
)
ref_audio_files = gr.Files(
label="Reference Audio Files", file_types=["audio"]
)
with gr.Accordion("Advanced settings", open=False):
speed = gr.Slider(
label="Playback speed",
minimum=0.5,
maximum=2.0,
value=1.0,
step=0.1,
)
enhance_speech = gr.Checkbox(
label="Enhance Reference Speech", value=False
)
temperature = gr.Slider(
label="Temperature",
minimum=0.5,
maximum=1.0,
value=0.75,
step=0.05,
)
top_p = gr.Slider(
label="Top P",
minimum=0.5,
maximum=1.0,
value=0.85,
step=0.05,
)
top_k = gr.Slider(
label="Top K", minimum=0, maximum=100, value=50, step=10
)
repetition_penalty = gr.Slider(
label="Repetition penalty",
minimum=1.0,
maximum=10.0,
value=5.0,
step=0.5,
)
language = gr.Dropdown(
label="Target Language",
choices=supported_langs,
value="auto",
)
generate_button = gr.Button("Generate Speech")
with gr.Column():
audio_output = gr.Audio(label="Generated Audio")
log_output = gr.Text(label="Log Output")
generate_button.click(
fn=tts_engine.process_text_and_generate,
inputs=[
input_text,
ref_audio_files,
speed,
enhance_speech,
temperature,
top_p,
top_k,
repetition_penalty,
language,
],
outputs=[audio_output, log_output],
)
with gr.Tab("File to Speech"):
with gr.Row():
with gr.Column():
file_input = gr.File(
label="Text / Ebook File", file_types=[".txt", ".md", ".epub"]
)
ref_audio_files_file = gr.Files(
label="Reference Audio Files", file_types=["audio"]
)
with gr.Accordion("Advanced settings", open=False):
speed_file = gr.Slider(
label="Playback speed",
minimum=0.5,
maximum=2.0,
value=1.0,
step=0.1,
)
enhance_speech_file = gr.Checkbox(
label="Enhance Reference Speech", value=False
)
temperature_file = gr.Slider(
label="Temperature",
minimum=0.5,
maximum=1.0,
value=0.75,
step=0.05,
)
top_p_file = gr.Slider(
label="Top P",
minimum=0.5,
maximum=1.0,
value=0.85,
step=0.05,
)
top_k_file = gr.Slider(
label="Top K", minimum=0, maximum=100, value=50, step=10
)
repetition_penalty_file = gr.Slider(
label="Repetition penalty",
minimum=1.0,
maximum=10.0,
value=5.0,
step=0.5,
)
language_file = gr.Dropdown(
label="Target Language",
choices=supported_langs,
value="auto",
)
generate_button_file = gr.Button("Generate Speech from File")
with gr.Column():
audio_output_file = gr.Audio(label="Generated Audio")
log_output_file = gr.Text(label="Log Output")
generate_button_file.click(
tts_engine.process_file_and_generate,
inputs=[
file_input,
ref_audio_files_file,
speed_file,
enhance_speech_file,
temperature_file,
top_p_file,
top_k_file,
repetition_penalty_file,
language_file,
],
outputs=[audio_output_file, log_output_file],
)
with gr.Tab("Clone With Microphone"):
with gr.Row():
with gr.Column():
input_text_mic = gr.Text(
label="Enter Text Here",
placeholder="Write the text you want to convert...",
)
mic_ref_audio = gr.Audio(
label="Record Reference Audio", sources=["microphone"]
)
with gr.Accordion("Advanced settings", open=False):
speed_mic = gr.Slider(
label="Playback speed",
minimum=0.5,
maximum=2.0,
value=1.0,
step=0.1,
)
enhance_speech_mic = gr.Checkbox(
label="Enhance Reference Speech", value=True
)
temperature_mic = gr.Slider(
label="Temperature",
minimum=0.5,
maximum=1.0,
value=0.75,
step=0.05,
)
top_p_mic = gr.Slider(
label="Top P",
minimum=0.5,
maximum=1.0,
value=0.85,
step=0.05,
)
top_k_mic = gr.Slider(
label="Top K", minimum=0, maximum=100, value=50, step=10
)
repetition_penalty_mic = gr.Slider(
label="Repetition penalty",
minimum=1.0,
maximum=10.0,
value=5.0,
step=0.5,
)
language_mic = gr.Dropdown(
label="Target Language",
choices=supported_langs,
value="auto",
)
generate_button_mic = gr.Button("Generate Speech")
with gr.Column():
audio_output_mic = gr.Audio(label="Generated Audio")
log_output_mic = gr.Text(label="Log Output")
generate_button_mic.click(
fn=tts_engine.process_mic_and_generate,
inputs=[
input_text_mic,
mic_ref_audio,
speed_mic,
enhance_speech_mic,
temperature_mic,
top_p_mic,
top_k_mic,
repetition_penalty_mic,
language_mic,
],
outputs=[audio_output_mic, log_output_mic],
)
return ui