Spaces:
Sleeping
Sleeping
# This Gradio app generates audio from text using a simple text-to-speech model with configurable generation parameters. | |
import gradio as gr | |
import numpy as np | |
# Define a function that takes text and generates audio with configurable parameters. | |
def text_to_audio(text, use_sampling=True, top_k=250, top_p=0.0, temperature=1.0, duration=10.0, cfg_coef=3.0): | |
# For demonstration purposes, we'll generate a simple sine wave. | |
# In a real application, you would use a text-to-speech model here. | |
sr = 44100 # Sample rate | |
frequency = 440 # Frequency in Hz (A4 note) | |
t = np.linspace(0, duration, sr * duration, endpoint=False) | |
audio = (0.5 * np.sin(2 * np.pi * frequency * t)).astype(np.float32) | |
return (sr, audio) | |
# Create a Gradio interface that takes a textbox input, runs it through the text_to_audio function, and returns output to an audio component. | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
text_input = gr.Textbox(label="Input Text") | |
use_sampling_checkbox = gr.Checkbox(label="Use Sampling", value=True) | |
with gr.Row(): | |
top_k_slider = gr.Slider(1, 1000, value=250, label="Top K") | |
top_p_slider = gr.Slider(0.0, 1.0, value=0.0, label="Top P") | |
with gr.Row(): | |
temperature_slider = gr.Slider(0.1, 10.0, value=1.0, label="Temperature") | |
duration_slider = gr.Slider(1.0, 60.0, value=10.0, label="Duration (s)") | |
cfg_coef_slider = gr.Slider(0.1, 10.0, value=3.0, label="CFG Coefficient") | |
audio_output = gr.Audio(label="Generated Audio", type="numpy") | |
text_input.change( | |
fn=text_to_audio, | |
inputs=[text_input, use_sampling_checkbox, top_k_slider, top_p_slider, temperature_slider, duration_slider, cfg_coef_slider], | |
outputs=audio_output | |
) | |
# Launch the interface. | |
demo.launch(show_error=True) |