🎤 AI Text-to-Speech Chatbot

"""
AI Text-to-Speech Chatbot - Gradio Version for Hugging Face Spaces
"""

import gradio as gr
import tempfile
import uuid
import os
import re
import base64
import io
import soundfile as sf

# Global TTS model instance
model = None

def initialize_model():
    """Initialize the KittenTTS model"""
    global model
    try:
        from kittentts import KittenTTS
        model = KittenTTS("KittenML/kitten-tts-nano-0.1")
        print("✅ Model initialized successfully")
        return True
    except Exception as e:
        print(f"❌ Model initialization failed: {e}")
        return False

def get_available_voices():
    """Get available voices from the model"""
    if not model:
        return ["expr-voice-5-m"]
    
    try:
        voices = model.available_voices
        return voices if voices else ["expr-voice-5-m"]
    except Exception:
        return ["expr-voice-5-m"]

def sanitize_text(text):
    """Clean and sanitize input text"""
    # Normalize whitespace
    cleaned = re.sub(r'\s+', ' ', text.strip())
    # Remove potentially problematic characters
    cleaned = re.sub(r'[^\w\s.,!?;:\'"()-]', '', cleaned)
    return cleaned

def generate_speech(text, voice, speed):
    """
    Generate speech from text using KittenTTS
    
    Args:
        text (str): Text to convert to speech
        voice (str): Voice to use for generation
        speed (float): Speed of speech generation
    
    Returns:
        tuple: (audio_file_path, status_message)
    """
    if not model:
        return None, "❌ TTS model not available"
    
    if not text.strip():
        return None, "❌ Please enter some text to generate speech"
    
    if len(text) > 500:
        return None, "❌ Text too long. Maximum 500 characters allowed"
    
    try:
        # Clean text
        processed_text = sanitize_text(text)
        
        # Generate audio with fallback handling
        try:
            audio_data = model.generate(processed_text, voice=voice, speed=speed)
        except Exception as generation_error:
            # Fallback: try with truncated text
            if len(processed_text) > 100:
                processed_text = processed_text[:100] + "..."
                audio_data = model.generate(processed_text, voice=voice, speed=speed)
            else:
                raise generation_error
        
        # Save to temporary file
        temp_dir = tempfile.gettempdir()
        unique_filename = f"kitten_tts_{uuid.uuid4()}.wav"
        output_path = os.path.join(temp_dir, unique_filename)
        
        sf.write(output_path, audio_data, 24000)
        
        return output_path, "✅ Speech generated successfully!"
        
    except Exception as e:
        return None, f"❌ Generation failed: {str(e)}"

# Initialize model on startup
initialize_model()

# Get available voices
available_voices = get_available_voices()

# Create Gradio interface
with gr.Blocks(
    title="AI Text-to-Speech Chatbot",
    theme=gr.themes.Soft(
        primary_hue="blue",
        secondary_hue="purple",
        neutral_hue="slate"
    ),
    css="""
    .gradio-container {
        max-width: 1200px !important;
        margin: auto !important;
    }
    .main-header {
        text-align: center;
        margin-bottom: 2rem;
    }
    .feature-grid {
        display: grid;
        grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
        gap: 1rem;
        margin: 1rem 0;
    }
    .feature-card {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        color: white;
        padding: 1rem;
        border-radius: 10px;
        text-align: center;
    }
    """
) as app:
    
    # Header
    gr.HTML("""
    <div class="main-header">
        <h1>🎤 AI Text-to-Speech Chatbot</h1>
        <p>Transform any text into natural, high-quality speech using advanced AI</p>
    </div>
    """)
    
    # Features section
    gr.HTML("""
    <div class="feature-grid">
        <div class="feature-card">
            <h3>🎭 Multiple Voices</h3>
            <p>8 different voice options</p>
        </div>
        <div class="feature-card">
            <h3>⚡ Speed Control</h3>
            <p>Adjust from 0.5x to 2.0x</p>
        </div>
        <div class="feature-card">
            <h3>🎵 High Quality</h3>
            <p>24kHz WAV output</p>
        </div>
        <div class="feature-card">
            <h3>📱 Mobile Ready</h3>
            <p>Works on all devices</p>
        </div>
    </div>
    """)
    
    with gr.Row():
        with gr.Column(scale=2):
            # Input section
            gr.Markdown("## 📝 Enter Your Text")
            
            text_input = gr.Textbox(
                label="Text to Convert",
                placeholder="Enter the text you want to convert to speech... (max 500 characters)",
                lines=4,
                max_lines=8
            )
            
            with gr.Row():
                voice_dropdown = gr.Dropdown(
                    choices=available_voices,
                    value=available_voices[0] if available_voices else "expr-voice-5-m",
                    label="🎭 Voice Selection",
                    info="Choose the voice for speech generation"
                )
                
                speed_slider = gr.Slider(
                    minimum=0.5,
                    maximum=2.0,
                    step=0.1,
                    value=1.25,
                    label="⚡ Speech Speed",
                    info="Adjust the speed of speech (0.5x to 2.0x)"
                )
            
            generate_btn = gr.Button(
                "🎵 Generate Speech", 
                variant="primary", 
                size="lg"
            )
            
        with gr.Column(scale=1):
            # Output section
            gr.Markdown("## 🔊 Generated Audio")
            
            status_output = gr.Textbox(
                label="Status",
                value="Ready to generate speech",
                interactive=False
            )
            
            audio_output = gr.Audio(
                label="Generated Speech",
                type="filepath",
                interactive=False
            )
    
    # Example texts section
    gr.Markdown("## 🚀 Quick Examples")
    gr.Markdown("Try these example texts:")
    gr.Markdown("- Hello! Welcome to AI Text-to-Speech. I can convert any text into natural speech.")
    gr.Markdown("- This system uses advanced neural networks to generate high-quality voice synthesis.")
    gr.Markdown("- Once upon a time, there was a magical voice that could bring any story to life.")
    gr.Markdown("- The quick brown fox jumps over the lazy dog. This sentence contains every letter of the alphabet.")
    
    # Information section
    with gr.Accordion("ℹ️ About This App", open=False):
        gr.Markdown("""
        ### 🛠️ Technical Details
        - **Model**: KittenTTS nano (high-quality, fast)
        - **Output**: 24kHz WAV audio files
        - **Voices**: 8 different voice options
        - **Speed**: Adjustable from 0.5x to 2.0x
        
        ### 🎯 How to Use
        1. Enter your text (up to 500 characters)
        2. Select a voice from the dropdown
        3. Adjust the speech speed if needed
        4. Click "Generate Speech"
        5. Listen to the generated audio
        6. Download the audio file if needed
        
        ### 🔗 Source Code
        Available on GitHub: [ai-tts-chatbot](https://github.com/your-username/ai-tts-chatbot)
        
        ### 📄 License
        MIT License - Free to use and modify
        """)
    
    # Event handlers
    generate_btn.click(
        fn=generate_speech,
        inputs=[text_input, voice_dropdown, speed_slider],
        outputs=[audio_output, status_output]
    )
    
    # Auto-generate on Enter key
    text_input.submit(
        fn=generate_speech,
        inputs=[text_input, voice_dropdown, speed_slider],
        outputs=[audio_output, status_output]
    )

# Launch the app
if __name__ == "__main__":
    app.queue(default_concurrency_limit=10).launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=True,
        show_error=True
    )