Spaces:

laxminarasimha6
/

ai-tts-generator

Running

App Files Files Community

laxminarasimha6 commited on 29 days ago

Commit

a2b7ad5

verified ·

1 Parent(s): b568184

Upload 3 files

Browse files

Files changed (3) hide show

README.md +64 -0
app.py +269 -0
requirements.txt +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,64 @@

+---
+title: AI Text-to-Speech Chatbot
+emoji: 🎤
+colorFrom: blue
+colorTo: purple
+sdk: gradio
+sdk_version: 4.44.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: Convert text to natural speech with AI
+tags:
+  - text-to-speech
+  - tts
+  - voice-synthesis
+  - audio
+  - chatbot
+  - kittentts
+---
+# 🎤 AI Text-to-Speech Chatbot
+Convert any text into natural, high-quality speech using advanced AI models. Features multiple voices, speed control, and a modern dark interface.
+## ✨ Features
+- **8 Voice Options** - Male and female voices available
+- **Speed Control** - Adjust from 0.5x to 2.0x (default 1.25x)
+- **Modern Interface** - Clean, responsive dark theme
+- **Audio Controls** - Play, pause, seek, and download
+- **Mobile Responsive** - Works perfectly on all devices
+- **Quick Examples** - Instant demo texts
+## 🎯 How to Use
+1. **Enter your text** (up to 500 characters)
+2. **Choose a voice** from the dropdown
+3. **Adjust speed** with the slider
+4. **Generate speech** and wait for processing
+5. **Listen and enjoy** - audio plays automatically
+6. **Download** your audio file as needed
+## 🛠️ Technical Details
+- **Model**: KittenTTS nano (high-quality, fast)
+- **Output**: 24kHz WAV audio files
+- **Interface**: Gradio web interface
+- **Voices**: 8 different voice options (male/female)
+## 🚀 Try It Now
+Just start typing in the text area above and click "Generate Speech"!
+## 📱 Browser Support
+Works on all modern browsers including Chrome, Firefox, Safari, and Edge.
+## 🔗 Source Code
+Available on GitHub: [ai-tts-chatbot](https://github.com/your-username/ai-tts-chatbot)
+---
+**Ready to give your text a voice? Start typing above! ✨**

app.py ADDED Viewed

	@@ -0,0 +1,269 @@

+"""
+AI Text-to-Speech Chatbot - Gradio Version for Hugging Face Spaces
+"""
+import gradio as gr
+import tempfile
+import uuid
+import os
+import re
+import base64
+import io
+import soundfile as sf
+# Global TTS model instance
+model = None
+def initialize_model():
+    """Initialize the KittenTTS model"""
+    global model
+    try:
+        from kittentts import KittenTTS
+        model = KittenTTS("KittenML/kitten-tts-nano-0.1")
+        print("✅ Model initialized successfully")
+        return True
+    except Exception as e:
+        print(f"❌ Model initialization failed: {e}")
+        return False
+def get_available_voices():
+    """Get available voices from the model"""
+    if not model:
+        return ["expr-voice-5-m"]
+    try:
+        voices = model.available_voices
+        return voices if voices else ["expr-voice-5-m"]
+    except Exception:
+        return ["expr-voice-5-m"]
+def sanitize_text(text):
+    """Clean and sanitize input text"""
+    # Normalize whitespace
+    cleaned = re.sub(r'\s+', ' ', text.strip())
+    # Remove potentially problematic characters
+    cleaned = re.sub(r'[^\w\s.,!?;:\'"()-]', '', cleaned)
+    return cleaned
+def generate_speech(text, voice, speed):
+    """
+    Generate speech from text using KittenTTS
+    Args:
+        text (str): Text to convert to speech
+        voice (str): Voice to use for generation
+        speed (float): Speed of speech generation
+    Returns:
+        tuple: (audio_file_path, status_message)
+    """
+    if not model:
+        return None, "❌ TTS model not available"
+    if not text.strip():
+        return None, "❌ Please enter some text to generate speech"
+    if len(text) > 500:
+        return None, "❌ Text too long. Maximum 500 characters allowed"
+    try:
+        # Clean text
+        processed_text = sanitize_text(text)
+        # Generate audio with fallback handling
+        try:
+            audio_data = model.generate(processed_text, voice=voice, speed=speed)
+        except Exception as generation_error:
+            # Fallback: try with truncated text
+            if len(processed_text) > 100:
+                processed_text = processed_text[:100] + "..."
+                audio_data = model.generate(processed_text, voice=voice, speed=speed)
+            else:
+                raise generation_error
+        # Save to temporary file
+        temp_dir = tempfile.gettempdir()
+        unique_filename = f"kitten_tts_{uuid.uuid4()}.wav"
+        output_path = os.path.join(temp_dir, unique_filename)
+        sf.write(output_path, audio_data, 24000)
+        return output_path, "✅ Speech generated successfully!"
+    except Exception as e:
+        return None, f"❌ Generation failed: {str(e)}"
+# Initialize model on startup
+initialize_model()
+# Get available voices
+available_voices = get_available_voices()
+# Create Gradio interface
+with gr.Blocks(
+    title="AI Text-to-Speech Chatbot",
+    theme=gr.themes.Soft(
+        primary_hue="blue",
+        secondary_hue="purple",
+        neutral_hue="slate"
+    ),
+    css="""
+    .gradio-container {
+        max-width: 1200px !important;
+        margin: auto !important;
+    }
+    .main-header {
+        text-align: center;
+        margin-bottom: 2rem;
+    }
+    .feature-grid {
+        display: grid;
+        grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
+        gap: 1rem;
+        margin: 1rem 0;
+    }
+    .feature-card {
+        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+        color: white;
+        padding: 1rem;
+        border-radius: 10px;
+        text-align: center;
+    }
+    """
+) as app:
+    # Header
+    gr.HTML("""
+    <div class="main-header">
+        <h1>🎤 AI Text-to-Speech Chatbot</h1>
+        <p>Transform any text into natural, high-quality speech using advanced AI</p>
+    </div>
+    """)
+    # Features section
+    gr.HTML("""
+    <div class="feature-grid">
+        <div class="feature-card">
+            <h3>🎭 Multiple Voices</h3>
+            <p>8 different voice options</p>
+        </div>
+        <div class="feature-card">
+            <h3>⚡ Speed Control</h3>
+            <p>Adjust from 0.5x to 2.0x</p>
+        </div>
+        <div class="feature-card">
+            <h3>🎵 High Quality</h3>
+            <p>24kHz WAV output</p>
+        </div>
+        <div class="feature-card">
+            <h3>📱 Mobile Ready</h3>
+            <p>Works on all devices</p>
+        </div>
+    </div>
+    """)
+    with gr.Row():
+        with gr.Column(scale=2):
+            # Input section
+            gr.Markdown("## 📝 Enter Your Text")
+            text_input = gr.Textbox(
+                label="Text to Convert",
+                placeholder="Enter the text you want to convert to speech... (max 500 characters)",
+                lines=4,
+                max_lines=8
+            )
+            with gr.Row():
+                voice_dropdown = gr.Dropdown(
+                    choices=available_voices,
+                    value=available_voices[0] if available_voices else "expr-voice-5-m",
+                    label="🎭 Voice Selection",
+                    info="Choose the voice for speech generation"
+                )
+                speed_slider = gr.Slider(
+                    minimum=0.5,
+                    maximum=2.0,
+                    step=0.1,
+                    value=1.25,
+                    label="⚡ Speech Speed",
+                    info="Adjust the speed of speech (0.5x to 2.0x)"
+                )
+            generate_btn = gr.Button(
+                "🎵 Generate Speech",
+                variant="primary",
+                size="lg"
+            )
+        with gr.Column(scale=1):
+            # Output section
+            gr.Markdown("## 🔊 Generated Audio")
+            status_output = gr.Textbox(
+                label="Status",
+                value="Ready to generate speech",
+                interactive=False
+            )
+            audio_output = gr.Audio(
+                label="Generated Speech",
+                type="filepath",
+                interactive=False
+            )
+    # Example texts section
+    gr.Markdown("## 🚀 Quick Examples")
+    gr.Markdown("Try these example texts:")
+    gr.Markdown("- Hello! Welcome to AI Text-to-Speech. I can convert any text into natural speech.")
+    gr.Markdown("- This system uses advanced neural networks to generate high-quality voice synthesis.")
+    gr.Markdown("- Once upon a time, there was a magical voice that could bring any story to life.")
+    gr.Markdown("- The quick brown fox jumps over the lazy dog. This sentence contains every letter of the alphabet.")
+    # Information section
+    with gr.Accordion("ℹ️ About This App", open=False):
+        gr.Markdown("""
+        ### 🛠️ Technical Details
+        - **Model**: KittenTTS nano (high-quality, fast)
+        - **Output**: 24kHz WAV audio files
+        - **Voices**: 8 different voice options
+        - **Speed**: Adjustable from 0.5x to 2.0x
+        ### 🎯 How to Use
+        1. Enter your text (up to 500 characters)
+        2. Select a voice from the dropdown
+        3. Adjust the speech speed if needed
+        4. Click "Generate Speech"
+        5. Listen to the generated audio
+        6. Download the audio file if needed
+        ### 🔗 Source Code
+        Available on GitHub: [ai-tts-chatbot](https://github.com/your-username/ai-tts-chatbot)
+        ### 📄 License
+        MIT License - Free to use and modify
+        """)
+    # Event handlers
+    generate_btn.click(
+        fn=generate_speech,
+        inputs=[text_input, voice_dropdown, speed_slider],
+        outputs=[audio_output, status_output]
+    )
+    # Auto-generate on Enter key
+    text_input.submit(
+        fn=generate_speech,
+        inputs=[text_input, voice_dropdown, speed_slider],
+        outputs=[audio_output, status_output]
+    )
+# Launch the app
+if __name__ == "__main__":
+    app.queue(default_concurrency_limit=10).launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=True,
+        show_error=True
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+https://github.com/KittenML/KittenTTS/releases/download/0.1/kittentts-0.1.0-py3-none-any.whl
+gradio==4.44.1
+soundfile==0.13.1