🎤 KittenTTS

import gradio as gr
import soundfile as sf
import numpy as np
from kittentts import KittenTTS

# Initialize the model
model = KittenTTS("KittenML/kitten-tts-nano-0.1")

# Available voices
AVAILABLE_VOICES = [
    'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f',
    'expr-voice-4-m', 'expr-voice-4-f', 'expr-voice-5-m', 'expr-voice-5-f'
]

def generate_speech(text, voice):
    """Generate speech from text using KittenTTS"""
    if not text.strip():
        return None, "Please enter some text to generate speech."
    
    try:
        # Generate audio
        audio = model.generate(text, voice=voice)
        
        # Convert to the format expected by Gradio
        if len(audio.shape) > 1:
            audio = audio.mean(axis=1)  # Convert stereo to mono if needed
        
        # Normalize audio
        audio = audio / np.max(np.abs(audio)) if np.max(np.abs(audio)) > 0 else audio
        
        return audio, f"✅ Successfully generated speech with voice: {voice}"
        
    except Exception as e:
        return None, f"❌ Error generating speech: {str(e)}"

# Create the interface
with gr.Blocks(title="KittenTTS - High Quality Text-to-Speech") as demo:
    
    gr.HTML("""
    <div style="text-align: center; margin-bottom: 2rem;">
        <h1>🎤 KittenTTS</h1>
        <p><em>High Quality Text-to-Speech Generation</em></p>
    </div>
    """)
    
    with gr.Row():
        with gr.Column():
            text_input = gr.Textbox(
                label="Enter your text",
                placeholder="Type or paste your text here...",
                lines=4
            )
            
            voice_dropdown = gr.Dropdown(
                choices=AVAILABLE_VOICES,
                value=AVAILABLE_VOICES[1],
                label="Select Voice"
            )
            
            generate_btn = gr.Button("🎵 Generate Speech", variant="primary")
        
        with gr.Column():
            gr.HTML("""
            <div style="background: #f0f0f0; padding: 1rem; border-radius: 8px;">
                <h3>Available Voices:</h3>
                <ul>
                    <li><strong>Male:</strong> expr-voice-2-m, expr-voice-3-m, expr-voice-4-m, expr-voice-5-m</li>
                    <li><strong>Female:</strong> expr-voice-2-f, expr-voice-3-f, expr-voice-4-f, expr-voice-5-f</li>
                </ul>
            </div>
            """)
    
    audio_output = gr.Audio(label="Generated Audio")
    status_output = gr.Textbox(label="Status", interactive=False)
    
    # Connect the generate button
    generate_btn.click(
        fn=generate_speech,
        inputs=[text_input, voice_dropdown],
        outputs=[audio_output, status_output]
    )
    
    # Auto-generate when text is entered and Enter is pressed
    text_input.submit(
        fn=generate_speech,
        inputs=[text_input, voice_dropdown],
        outputs=[audio_output, status_output]
    )

# Launch the demo
if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=True
    )