🎤 AI Text-to-Speech Chatbot
Transform any text into natural, high-quality speech using advanced AI
""" AI Text-to-Speech Chatbot - Gradio Version for Hugging Face Spaces """ import gradio as gr import tempfile import uuid import os import re import base64 import io import soundfile as sf # Global TTS model instance model = None def initialize_model(): """Initialize the KittenTTS model""" global model try: from kittentts import KittenTTS model = KittenTTS("KittenML/kitten-tts-nano-0.1") print("✅ Model initialized successfully") return True except Exception as e: print(f"❌ Model initialization failed: {e}") return False def get_available_voices(): """Get available voices from the model""" if not model: return ["expr-voice-5-m"] try: voices = model.available_voices return voices if voices else ["expr-voice-5-m"] except Exception: return ["expr-voice-5-m"] def sanitize_text(text): """Clean and sanitize input text""" # Normalize whitespace cleaned = re.sub(r'\s+', ' ', text.strip()) # Remove potentially problematic characters cleaned = re.sub(r'[^\w\s.,!?;:\'"()-]', '', cleaned) return cleaned def generate_speech(text, voice, speed): """ Generate speech from text using KittenTTS Args: text (str): Text to convert to speech voice (str): Voice to use for generation speed (float): Speed of speech generation Returns: tuple: (audio_file_path, status_message) """ if not model: return None, "❌ TTS model not available" if not text.strip(): return None, "❌ Please enter some text to generate speech" if len(text) > 500: return None, "❌ Text too long. Maximum 500 characters allowed" try: # Clean text processed_text = sanitize_text(text) # Generate audio with fallback handling try: audio_data = model.generate(processed_text, voice=voice, speed=speed) except Exception as generation_error: # Fallback: try with truncated text if len(processed_text) > 100: processed_text = processed_text[:100] + "..." audio_data = model.generate(processed_text, voice=voice, speed=speed) else: raise generation_error # Save to temporary file temp_dir = tempfile.gettempdir() unique_filename = f"kitten_tts_{uuid.uuid4()}.wav" output_path = os.path.join(temp_dir, unique_filename) sf.write(output_path, audio_data, 24000) return output_path, "✅ Speech generated successfully!" except Exception as e: return None, f"❌ Generation failed: {str(e)}" # Initialize model on startup initialize_model() # Get available voices available_voices = get_available_voices() # Create Gradio interface with gr.Blocks( title="AI Text-to-Speech Chatbot", theme=gr.themes.Soft( primary_hue="blue", secondary_hue="purple", neutral_hue="slate" ), css=""" .gradio-container { max-width: 1200px !important; margin: auto !important; } .main-header { text-align: center; margin-bottom: 2rem; } .feature-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 1rem; margin: 1rem 0; } .feature-card { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 1rem; border-radius: 10px; text-align: center; } """ ) as app: # Header gr.HTML("""
Transform any text into natural, high-quality speech using advanced AI
8 different voice options
Adjust from 0.5x to 2.0x
24kHz WAV output
Works on all devices