Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import soundfile as sf | |
| import numpy as np | |
| from kittentts import KittenTTS | |
| import os | |
| # Initialize the model | |
| model = KittenTTS("KittenML/kitten-tts-nano-0.1") | |
| # Available voices | |
| AVAILABLE_VOICES = [ | |
| 'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f', | |
| 'expr-voice-4-m', 'expr-voice-4-f', 'expr-voice-5-m', 'expr-voice-5-f' | |
| ] | |
| def generate_speech(text, voice, progress=gr.Progress()): | |
| """ | |
| Generate speech from text using KittenTTS | |
| """ | |
| if not text.strip(): | |
| return None, "Please enter some text to generate speech." | |
| try: | |
| progress(0.3, desc="Loading model...") | |
| # Generate audio | |
| progress(0.6, desc="Generating speech...") | |
| audio = model.generate(text, voice=voice) | |
| progress(0.9, desc="Processing audio...") | |
| # Convert to the format expected by Gradio | |
| # Ensure audio is in the correct format (float32, mono) | |
| if len(audio.shape) > 1: | |
| audio = audio.mean(axis=1) # Convert stereo to mono if needed | |
| # Normalize audio | |
| audio = audio / np.max(np.abs(audio)) if np.max(np.abs(audio)) > 0 else audio | |
| progress(1.0, desc="Complete!") | |
| return audio, f"β Successfully generated speech with voice: {voice}" | |
| except Exception as e: | |
| return None, f"β Error generating speech: {str(e)}" | |
| def create_demo(): | |
| """ | |
| Create the Gradio demo interface | |
| """ | |
| # Custom CSS for better styling | |
| css = """ | |
| .gradio-container { | |
| max-width: 800px !important; | |
| margin: auto !important; | |
| } | |
| .main-header { | |
| text-align: center; | |
| margin-bottom: 2rem; | |
| } | |
| .voice-selector { | |
| margin: 1rem 0; | |
| } | |
| .output-audio { | |
| margin-top: 1rem; | |
| } | |
| """ | |
| with gr.Blocks(css=css, title="KittenTTS - High Quality Text-to-Speech") as demo: | |
| # Header | |
| gr.HTML(""" | |
| <div class="main-header"> | |
| <h1>π€ KittenTTS</h1> | |
| <p><em>High Quality Text-to-Speech Generation</em></p> | |
| <p>Generate natural-sounding speech from text using the KittenTTS model</p> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| # Text input | |
| text_input = gr.Textbox( | |
| label="Enter your text", | |
| placeholder="Type or paste your text here...", | |
| lines=4, | |
| max_lines=10 | |
| ) | |
| # Voice selection | |
| voice_dropdown = gr.Dropdown( | |
| choices=AVAILABLE_VOICES, | |
| value=AVAILABLE_VOICES[1], # Default to female voice | |
| label="Select Voice", | |
| info="Choose from 8 different voices (4 male, 4 female)" | |
| ) | |
| # Generate button | |
| generate_btn = gr.Button( | |
| "π΅ Generate Speech", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| with gr.Column(scale=1): | |
| # Voice info | |
| gr.HTML(""" | |
| <div style="background: #f0f0f0; padding: 1rem; border-radius: 8px;"> | |
| <h3>Available Voices:</h3> | |
| <ul> | |
| <li><strong>Male voices:</strong> expr-voice-2-m, expr-voice-3-m, expr-voice-4-m, expr-voice-5-m</li> | |
| <li><strong>Female voices:</strong> expr-voice-2-f, expr-voice-3-f, expr-voice-4-f, expr-voice-5-f</li> | |
| </ul> | |
| </div> | |
| """) | |
| # Output section | |
| with gr.Row(): | |
| with gr.Column(): | |
| # Audio output | |
| audio_output = gr.Audio( | |
| label="Generated Audio", | |
| type="numpy" | |
| ) | |
| # Status message | |
| status_output = gr.Textbox( | |
| label="Status", | |
| interactive=False | |
| ) | |
| # Example texts | |
| gr.Examples( | |
| examples=[ | |
| ["Hello! This is a demonstration of the KittenTTS model.", "expr-voice-2-f"], | |
| ["The quick brown fox jumps over the lazy dog.", "expr-voice-2-m"], | |
| ["Welcome to our high-quality text-to-speech system.", "expr-voice-3-f"], | |
| ["This model works without requiring a GPU.", "expr-voice-3-m"], | |
| ], | |
| inputs=[text_input, voice_dropdown] | |
| ) | |
| # Footer | |
| gr.HTML(""" | |
| <div style="text-align: center; margin-top: 2rem; padding: 1rem; background: #f9f9f9; border-radius: 8px;"> | |
| <p><strong>KittenTTS</strong> - Powered by <a href="https://huggingface.co/KittenML/kitten-tts-nano-0.1" target="_blank">KittenML/kitten-tts-nano-0.1</a></p> | |
| <p>Model: KittenTTS Nano v0.1 | Sample Rate: 24kHz</p> | |
| </div> | |
| """) | |
| # Connect the generate button | |
| generate_btn.click( | |
| fn=generate_speech, | |
| inputs=[text_input, voice_dropdown], | |
| outputs=[audio_output, status_output] | |
| ) | |
| # Auto-generate when text is entered and Enter is pressed | |
| text_input.submit( | |
| fn=generate_speech, | |
| inputs=[text_input, voice_dropdown], | |
| outputs=[audio_output, status_output] | |
| ) | |
| return demo | |
| # Create and launch the demo | |
| if __name__ == "__main__": | |
| demo = create_demo() | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=True, | |
| debug=False | |
| ) |