Spaces:

laxminarasimha6
/

ai-tts-generator

Running

File size: 8,487 Bytes

a2b7ad5

"""

AI Text-to-Speech Chatbot - Gradio Version for Hugging Face Spaces

"""

import gradio as gr
import tempfile
import uuid
import os
import re
import base64
import io
import soundfile as sf

# Global TTS model instance
model = None

def initialize_model():
    """Initialize the KittenTTS model"""
    global model
    try:
        from kittentts import KittenTTS
        model = KittenTTS("KittenML/kitten-tts-nano-0.1")
        print("✅ Model initialized successfully")
        return True
    except Exception as e:
        print(f"❌ Model initialization failed: {e}")
        return False

def get_available_voices():
    """Get available voices from the model"""
    if not model:
        return ["expr-voice-5-m"]
    
    try:
        voices = model.available_voices
        return voices if voices else ["expr-voice-5-m"]
    except Exception:
        return ["expr-voice-5-m"]

def sanitize_text(text):
    """Clean and sanitize input text"""
    # Normalize whitespace
    cleaned = re.sub(r'\s+', ' ', text.strip())
    # Remove potentially problematic characters
    cleaned = re.sub(r'[^\w\s.,!?;:\'"()-]', '', cleaned)
    return cleaned

def generate_speech(text, voice, speed):
    """

    Generate speech from text using KittenTTS

    

    Args:

        text (str): Text to convert to speech

        voice (str): Voice to use for generation

        speed (float): Speed of speech generation

    

    Returns:

        tuple: (audio_file_path, status_message)

    """
    if not model:
        return None, "❌ TTS model not available"
    
    if not text.strip():
        return None, "❌ Please enter some text to generate speech"
    
    if len(text) > 500:
        return None, "❌ Text too long. Maximum 500 characters allowed"
    
    try:
        # Clean text
        processed_text = sanitize_text(text)
        
        # Generate audio with fallback handling
        try:
            audio_data = model.generate(processed_text, voice=voice, speed=speed)
        except Exception as generation_error:
            # Fallback: try with truncated text
            if len(processed_text) > 100:
                processed_text = processed_text[:100] + "..."
                audio_data = model.generate(processed_text, voice=voice, speed=speed)
            else:
                raise generation_error
        
        # Save to temporary file
        temp_dir = tempfile.gettempdir()
        unique_filename = f"kitten_tts_{uuid.uuid4()}.wav"
        output_path = os.path.join(temp_dir, unique_filename)
        
        sf.write(output_path, audio_data, 24000)
        
        return output_path, "✅ Speech generated successfully!"
        
    except Exception as e:
        return None, f"❌ Generation failed: {str(e)}"

# Initialize model on startup
initialize_model()

# Get available voices
available_voices = get_available_voices()

# Create Gradio interface
with gr.Blocks(
    title="AI Text-to-Speech Chatbot",
    theme=gr.themes.Soft(
        primary_hue="blue",
        secondary_hue="purple",
        neutral_hue="slate"
    ),
    css="""

    .gradio-container {

        max-width: 1200px !important;

        margin: auto !important;

    }

    .main-header {

        text-align: center;

        margin-bottom: 2rem;

    }

    .feature-grid {

        display: grid;

        grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));

        gap: 1rem;

        margin: 1rem 0;

    }

    .feature-card {

        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);

        color: white;

        padding: 1rem;

        border-radius: 10px;

        text-align: center;

    }

    """
) as app:
    
    # Header
    gr.HTML("""

    <div class="main-header">

        <h1>🎤 AI Text-to-Speech Chatbot</h1>

        <p>Transform any text into natural, high-quality speech using advanced AI</p>

    </div>

    """)
    
    # Features section
    gr.HTML("""

    <div class="feature-grid">

        <div class="feature-card">

            <h3>🎭 Multiple Voices</h3>

            <p>8 different voice options</p>

        </div>

        <div class="feature-card">

            <h3>⚡ Speed Control</h3>

            <p>Adjust from 0.5x to 2.0x</p>

        </div>

        <div class="feature-card">

            <h3>🎵 High Quality</h3>

            <p>24kHz WAV output</p>

        </div>

        <div class="feature-card">

            <h3>📱 Mobile Ready</h3>

            <p>Works on all devices</p>

        </div>

    </div>

    """)
    
    with gr.Row():
        with gr.Column(scale=2):
            # Input section
            gr.Markdown("## 📝 Enter Your Text")
            
            text_input = gr.Textbox(
                label="Text to Convert",
                placeholder="Enter the text you want to convert to speech... (max 500 characters)",
                lines=4,
                max_lines=8
            )
            
            with gr.Row():
                voice_dropdown = gr.Dropdown(
                    choices=available_voices,
                    value=available_voices[0] if available_voices else "expr-voice-5-m",
                    label="🎭 Voice Selection",
                    info="Choose the voice for speech generation"
                )
                
                speed_slider = gr.Slider(
                    minimum=0.5,
                    maximum=2.0,
                    step=0.1,
                    value=1.25,
                    label="⚡ Speech Speed",
                    info="Adjust the speed of speech (0.5x to 2.0x)"
                )
            
            generate_btn = gr.Button(
                "🎵 Generate Speech", 
                variant="primary", 
                size="lg"
            )
            
        with gr.Column(scale=1):
            # Output section
            gr.Markdown("## 🔊 Generated Audio")
            
            status_output = gr.Textbox(
                label="Status",
                value="Ready to generate speech",
                interactive=False
            )
            
            audio_output = gr.Audio(
                label="Generated Speech",
                type="filepath",
                interactive=False
            )
    
    # Example texts section
    gr.Markdown("## 🚀 Quick Examples")
    gr.Markdown("Try these example texts:")
    gr.Markdown("- Hello! Welcome to AI Text-to-Speech. I can convert any text into natural speech.")
    gr.Markdown("- This system uses advanced neural networks to generate high-quality voice synthesis.")
    gr.Markdown("- Once upon a time, there was a magical voice that could bring any story to life.")
    gr.Markdown("- The quick brown fox jumps over the lazy dog. This sentence contains every letter of the alphabet.")
    
    # Information section
    with gr.Accordion("ℹ️ About This App", open=False):
        gr.Markdown("""

        ### 🛠️ Technical Details

        - **Model**: KittenTTS nano (high-quality, fast)

        - **Output**: 24kHz WAV audio files

        - **Voices**: 8 different voice options

        - **Speed**: Adjustable from 0.5x to 2.0x

        

        ### 🎯 How to Use

        1. Enter your text (up to 500 characters)

        2. Select a voice from the dropdown

        3. Adjust the speech speed if needed

        4. Click "Generate Speech"

        5. Listen to the generated audio

        6. Download the audio file if needed

        

        ### 🔗 Source Code

        Available on GitHub: [ai-tts-chatbot](https://github.com/your-username/ai-tts-chatbot)

        

        ### 📄 License

        MIT License - Free to use and modify

        """)
    
    # Event handlers
    generate_btn.click(
        fn=generate_speech,
        inputs=[text_input, voice_dropdown, speed_slider],
        outputs=[audio_output, status_output]
    )
    
    # Auto-generate on Enter key
    text_input.submit(
        fn=generate_speech,
        inputs=[text_input, voice_dropdown, speed_slider],
        outputs=[audio_output, status_output]
    )

# Launch the app
if __name__ == "__main__":
    app.queue(default_concurrency_limit=10).launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=True,
        show_error=True
    )