""" AI Text-to-Speech Chatbot - Gradio Version for Hugging Face Spaces """ import gradio as gr import tempfile import uuid import os import re import base64 import io import soundfile as sf # Global TTS model instance model = None def initialize_model(): """Initialize the KittenTTS model""" global model try: from kittentts import KittenTTS model = KittenTTS("KittenML/kitten-tts-nano-0.1") print("✅ Model initialized successfully") return True except Exception as e: print(f"❌ Model initialization failed: {e}") return False def get_available_voices(): """Get available voices from the model""" if not model: return ["expr-voice-5-m"] try: voices = model.available_voices return voices if voices else ["expr-voice-5-m"] except Exception: return ["expr-voice-5-m"] def sanitize_text(text): """Clean and sanitize input text""" # Normalize whitespace cleaned = re.sub(r'\s+', ' ', text.strip()) # Remove potentially problematic characters cleaned = re.sub(r'[^\w\s.,!?;:\'"()-]', '', cleaned) return cleaned def generate_speech(text, voice, speed): """ Generate speech from text using KittenTTS Args: text (str): Text to convert to speech voice (str): Voice to use for generation speed (float): Speed of speech generation Returns: tuple: (audio_file_path, status_message) """ if not model: return None, "❌ TTS model not available" if not text.strip(): return None, "❌ Please enter some text to generate speech" if len(text) > 500: return None, "❌ Text too long. Maximum 500 characters allowed" try: # Clean text processed_text = sanitize_text(text) # Generate audio with fallback handling try: audio_data = model.generate(processed_text, voice=voice, speed=speed) except Exception as generation_error: # Fallback: try with truncated text if len(processed_text) > 100: processed_text = processed_text[:100] + "..." audio_data = model.generate(processed_text, voice=voice, speed=speed) else: raise generation_error # Save to temporary file temp_dir = tempfile.gettempdir() unique_filename = f"kitten_tts_{uuid.uuid4()}.wav" output_path = os.path.join(temp_dir, unique_filename) sf.write(output_path, audio_data, 24000) return output_path, "✅ Speech generated successfully!" except Exception as e: return None, f"❌ Generation failed: {str(e)}" # Initialize model on startup initialize_model() # Get available voices available_voices = get_available_voices() # Create Gradio interface with gr.Blocks( title="AI Text-to-Speech Chatbot", theme=gr.themes.Soft( primary_hue="blue", secondary_hue="purple", neutral_hue="slate" ), css=""" .gradio-container { max-width: 1200px !important; margin: auto !important; } .main-header { text-align: center; margin-bottom: 2rem; } .feature-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 1rem; margin: 1rem 0; } .feature-card { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 1rem; border-radius: 10px; text-align: center; } """ ) as app: # Header gr.HTML("""

🎤 AI Text-to-Speech Chatbot

Transform any text into natural, high-quality speech using advanced AI

""") # Features section gr.HTML("""

🎭 Multiple Voices

8 different voice options

⚡ Speed Control

Adjust from 0.5x to 2.0x

🎵 High Quality

24kHz WAV output

📱 Mobile Ready

Works on all devices

""") with gr.Row(): with gr.Column(scale=2): # Input section gr.Markdown("## 📝 Enter Your Text") text_input = gr.Textbox( label="Text to Convert", placeholder="Enter the text you want to convert to speech... (max 500 characters)", lines=4, max_lines=8 ) with gr.Row(): voice_dropdown = gr.Dropdown( choices=available_voices, value=available_voices[0] if available_voices else "expr-voice-5-m", label="🎭 Voice Selection", info="Choose the voice for speech generation" ) speed_slider = gr.Slider( minimum=0.5, maximum=2.0, step=0.1, value=1.25, label="⚡ Speech Speed", info="Adjust the speed of speech (0.5x to 2.0x)" ) generate_btn = gr.Button( "🎵 Generate Speech", variant="primary", size="lg" ) with gr.Column(scale=1): # Output section gr.Markdown("## 🔊 Generated Audio") status_output = gr.Textbox( label="Status", value="Ready to generate speech", interactive=False ) audio_output = gr.Audio( label="Generated Speech", type="filepath", interactive=False ) # Example texts section gr.Markdown("## 🚀 Quick Examples") gr.Markdown("Try these example texts:") gr.Markdown("- Hello! Welcome to AI Text-to-Speech. I can convert any text into natural speech.") gr.Markdown("- This system uses advanced neural networks to generate high-quality voice synthesis.") gr.Markdown("- Once upon a time, there was a magical voice that could bring any story to life.") gr.Markdown("- The quick brown fox jumps over the lazy dog. This sentence contains every letter of the alphabet.") # Information section with gr.Accordion("ℹ️ About This App", open=False): gr.Markdown(""" ### 🛠️ Technical Details - **Model**: KittenTTS nano (high-quality, fast) - **Output**: 24kHz WAV audio files - **Voices**: 8 different voice options - **Speed**: Adjustable from 0.5x to 2.0x ### 🎯 How to Use 1. Enter your text (up to 500 characters) 2. Select a voice from the dropdown 3. Adjust the speech speed if needed 4. Click "Generate Speech" 5. Listen to the generated audio 6. Download the audio file if needed ### 🔗 Source Code Available on GitHub: [ai-tts-chatbot](https://github.com/your-username/ai-tts-chatbot) ### 📄 License MIT License - Free to use and modify """) # Event handlers generate_btn.click( fn=generate_speech, inputs=[text_input, voice_dropdown, speed_slider], outputs=[audio_output, status_output] ) # Auto-generate on Enter key text_input.submit( fn=generate_speech, inputs=[text_input, voice_dropdown, speed_slider], outputs=[audio_output, status_output] ) # Launch the app if __name__ == "__main__": app.queue(default_concurrency_limit=10).launch( server_name="0.0.0.0", server_port=7860, share=True, show_error=True )