audiogen-3 / app.py
Gertie01's picture
initial commit
082dbde verified
# This Gradio app generates audio from text using a simple text-to-speech model with configurable generation parameters.
import gradio as gr
import numpy as np
# Define a function that takes text and generates audio with configurable parameters.
def text_to_audio(text, use_sampling=True, top_k=250, top_p=0.0, temperature=1.0, duration=10.0, cfg_coef=3.0):
# For demonstration purposes, we'll generate a simple sine wave.
# In a real application, you would use a text-to-speech model here.
sr = 44100 # Sample rate
frequency = 440 # Frequency in Hz (A4 note)
t = np.linspace(0, duration, sr * duration, endpoint=False)
audio = (0.5 * np.sin(2 * np.pi * frequency * t)).astype(np.float32)
return (sr, audio)
# Create a Gradio interface that takes a textbox input, runs it through the text_to_audio function, and returns output to an audio component.
with gr.Blocks() as demo:
with gr.Row():
text_input = gr.Textbox(label="Input Text")
use_sampling_checkbox = gr.Checkbox(label="Use Sampling", value=True)
with gr.Row():
top_k_slider = gr.Slider(1, 1000, value=250, label="Top K")
top_p_slider = gr.Slider(0.0, 1.0, value=0.0, label="Top P")
with gr.Row():
temperature_slider = gr.Slider(0.1, 10.0, value=1.0, label="Temperature")
duration_slider = gr.Slider(1.0, 60.0, value=10.0, label="Duration (s)")
cfg_coef_slider = gr.Slider(0.1, 10.0, value=3.0, label="CFG Coefficient")
audio_output = gr.Audio(label="Generated Audio", type="numpy")
text_input.change(
fn=text_to_audio,
inputs=[text_input, use_sampling_checkbox, top_k_slider, top_p_slider, temperature_slider, duration_slider, cfg_coef_slider],
outputs=audio_output
)
# Launch the interface.
demo.launch(show_error=True)