laxminarasimha6's picture
Upload 3 files
a2b7ad5 verified
raw
history blame
8.49 kB
"""
AI Text-to-Speech Chatbot - Gradio Version for Hugging Face Spaces
"""
import gradio as gr
import tempfile
import uuid
import os
import re
import base64
import io
import soundfile as sf
# Global TTS model instance
model = None
def initialize_model():
"""Initialize the KittenTTS model"""
global model
try:
from kittentts import KittenTTS
model = KittenTTS("KittenML/kitten-tts-nano-0.1")
print("βœ… Model initialized successfully")
return True
except Exception as e:
print(f"❌ Model initialization failed: {e}")
return False
def get_available_voices():
"""Get available voices from the model"""
if not model:
return ["expr-voice-5-m"]
try:
voices = model.available_voices
return voices if voices else ["expr-voice-5-m"]
except Exception:
return ["expr-voice-5-m"]
def sanitize_text(text):
"""Clean and sanitize input text"""
# Normalize whitespace
cleaned = re.sub(r'\s+', ' ', text.strip())
# Remove potentially problematic characters
cleaned = re.sub(r'[^\w\s.,!?;:\'"()-]', '', cleaned)
return cleaned
def generate_speech(text, voice, speed):
"""
Generate speech from text using KittenTTS
Args:
text (str): Text to convert to speech
voice (str): Voice to use for generation
speed (float): Speed of speech generation
Returns:
tuple: (audio_file_path, status_message)
"""
if not model:
return None, "❌ TTS model not available"
if not text.strip():
return None, "❌ Please enter some text to generate speech"
if len(text) > 500:
return None, "❌ Text too long. Maximum 500 characters allowed"
try:
# Clean text
processed_text = sanitize_text(text)
# Generate audio with fallback handling
try:
audio_data = model.generate(processed_text, voice=voice, speed=speed)
except Exception as generation_error:
# Fallback: try with truncated text
if len(processed_text) > 100:
processed_text = processed_text[:100] + "..."
audio_data = model.generate(processed_text, voice=voice, speed=speed)
else:
raise generation_error
# Save to temporary file
temp_dir = tempfile.gettempdir()
unique_filename = f"kitten_tts_{uuid.uuid4()}.wav"
output_path = os.path.join(temp_dir, unique_filename)
sf.write(output_path, audio_data, 24000)
return output_path, "βœ… Speech generated successfully!"
except Exception as e:
return None, f"❌ Generation failed: {str(e)}"
# Initialize model on startup
initialize_model()
# Get available voices
available_voices = get_available_voices()
# Create Gradio interface
with gr.Blocks(
title="AI Text-to-Speech Chatbot",
theme=gr.themes.Soft(
primary_hue="blue",
secondary_hue="purple",
neutral_hue="slate"
),
css="""
.gradio-container {
max-width: 1200px !important;
margin: auto !important;
}
.main-header {
text-align: center;
margin-bottom: 2rem;
}
.feature-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
gap: 1rem;
margin: 1rem 0;
}
.feature-card {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 1rem;
border-radius: 10px;
text-align: center;
}
"""
) as app:
# Header
gr.HTML("""
<div class="main-header">
<h1>🎀 AI Text-to-Speech Chatbot</h1>
<p>Transform any text into natural, high-quality speech using advanced AI</p>
</div>
""")
# Features section
gr.HTML("""
<div class="feature-grid">
<div class="feature-card">
<h3>🎭 Multiple Voices</h3>
<p>8 different voice options</p>
</div>
<div class="feature-card">
<h3>⚑ Speed Control</h3>
<p>Adjust from 0.5x to 2.0x</p>
</div>
<div class="feature-card">
<h3>🎡 High Quality</h3>
<p>24kHz WAV output</p>
</div>
<div class="feature-card">
<h3>πŸ“± Mobile Ready</h3>
<p>Works on all devices</p>
</div>
</div>
""")
with gr.Row():
with gr.Column(scale=2):
# Input section
gr.Markdown("## πŸ“ Enter Your Text")
text_input = gr.Textbox(
label="Text to Convert",
placeholder="Enter the text you want to convert to speech... (max 500 characters)",
lines=4,
max_lines=8
)
with gr.Row():
voice_dropdown = gr.Dropdown(
choices=available_voices,
value=available_voices[0] if available_voices else "expr-voice-5-m",
label="🎭 Voice Selection",
info="Choose the voice for speech generation"
)
speed_slider = gr.Slider(
minimum=0.5,
maximum=2.0,
step=0.1,
value=1.25,
label="⚑ Speech Speed",
info="Adjust the speed of speech (0.5x to 2.0x)"
)
generate_btn = gr.Button(
"🎡 Generate Speech",
variant="primary",
size="lg"
)
with gr.Column(scale=1):
# Output section
gr.Markdown("## πŸ”Š Generated Audio")
status_output = gr.Textbox(
label="Status",
value="Ready to generate speech",
interactive=False
)
audio_output = gr.Audio(
label="Generated Speech",
type="filepath",
interactive=False
)
# Example texts section
gr.Markdown("## πŸš€ Quick Examples")
gr.Markdown("Try these example texts:")
gr.Markdown("- Hello! Welcome to AI Text-to-Speech. I can convert any text into natural speech.")
gr.Markdown("- This system uses advanced neural networks to generate high-quality voice synthesis.")
gr.Markdown("- Once upon a time, there was a magical voice that could bring any story to life.")
gr.Markdown("- The quick brown fox jumps over the lazy dog. This sentence contains every letter of the alphabet.")
# Information section
with gr.Accordion("ℹ️ About This App", open=False):
gr.Markdown("""
### πŸ› οΈ Technical Details
- **Model**: KittenTTS nano (high-quality, fast)
- **Output**: 24kHz WAV audio files
- **Voices**: 8 different voice options
- **Speed**: Adjustable from 0.5x to 2.0x
### 🎯 How to Use
1. Enter your text (up to 500 characters)
2. Select a voice from the dropdown
3. Adjust the speech speed if needed
4. Click "Generate Speech"
5. Listen to the generated audio
6. Download the audio file if needed
### πŸ”— Source Code
Available on GitHub: [ai-tts-chatbot](https://github.com/your-username/ai-tts-chatbot)
### πŸ“„ License
MIT License - Free to use and modify
""")
# Event handlers
generate_btn.click(
fn=generate_speech,
inputs=[text_input, voice_dropdown, speed_slider],
outputs=[audio_output, status_output]
)
# Auto-generate on Enter key
text_input.submit(
fn=generate_speech,
inputs=[text_input, voice_dropdown, speed_slider],
outputs=[audio_output, status_output]
)
# Launch the app
if __name__ == "__main__":
app.queue(default_concurrency_limit=10).launch(
server_name="0.0.0.0",
server_port=7860,
share=True,
show_error=True
)