|
"""
|
|
AI Text-to-Speech Chatbot - Gradio Version for Hugging Face Spaces
|
|
"""
|
|
|
|
import gradio as gr
|
|
import tempfile
|
|
import uuid
|
|
import os
|
|
import re
|
|
import base64
|
|
import io
|
|
import soundfile as sf
|
|
|
|
|
|
model = None
|
|
|
|
def initialize_model():
|
|
"""Initialize the KittenTTS model"""
|
|
global model
|
|
try:
|
|
from kittentts import KittenTTS
|
|
model = KittenTTS("KittenML/kitten-tts-nano-0.1")
|
|
print("β
Model initialized successfully")
|
|
return True
|
|
except Exception as e:
|
|
print(f"β Model initialization failed: {e}")
|
|
return False
|
|
|
|
def get_available_voices():
|
|
"""Get available voices from the model"""
|
|
if not model:
|
|
return ["expr-voice-5-m"]
|
|
|
|
try:
|
|
voices = model.available_voices
|
|
return voices if voices else ["expr-voice-5-m"]
|
|
except Exception:
|
|
return ["expr-voice-5-m"]
|
|
|
|
def sanitize_text(text):
|
|
"""Clean and sanitize input text"""
|
|
|
|
cleaned = re.sub(r'\s+', ' ', text.strip())
|
|
|
|
cleaned = re.sub(r'[^\w\s.,!?;:\'"()-]', '', cleaned)
|
|
return cleaned
|
|
|
|
def generate_speech(text, voice, speed):
|
|
"""
|
|
Generate speech from text using KittenTTS
|
|
|
|
Args:
|
|
text (str): Text to convert to speech
|
|
voice (str): Voice to use for generation
|
|
speed (float): Speed of speech generation
|
|
|
|
Returns:
|
|
tuple: (audio_file_path, status_message)
|
|
"""
|
|
if not model:
|
|
return None, "β TTS model not available"
|
|
|
|
if not text.strip():
|
|
return None, "β Please enter some text to generate speech"
|
|
|
|
if len(text) > 500:
|
|
return None, "β Text too long. Maximum 500 characters allowed"
|
|
|
|
try:
|
|
|
|
processed_text = sanitize_text(text)
|
|
|
|
|
|
try:
|
|
audio_data = model.generate(processed_text, voice=voice, speed=speed)
|
|
except Exception as generation_error:
|
|
|
|
if len(processed_text) > 100:
|
|
processed_text = processed_text[:100] + "..."
|
|
audio_data = model.generate(processed_text, voice=voice, speed=speed)
|
|
else:
|
|
raise generation_error
|
|
|
|
|
|
temp_dir = tempfile.gettempdir()
|
|
unique_filename = f"kitten_tts_{uuid.uuid4()}.wav"
|
|
output_path = os.path.join(temp_dir, unique_filename)
|
|
|
|
sf.write(output_path, audio_data, 24000)
|
|
|
|
return output_path, "β
Speech generated successfully!"
|
|
|
|
except Exception as e:
|
|
return None, f"β Generation failed: {str(e)}"
|
|
|
|
|
|
initialize_model()
|
|
|
|
|
|
available_voices = get_available_voices()
|
|
|
|
|
|
with gr.Blocks(
|
|
title="AI Text-to-Speech Chatbot",
|
|
theme=gr.themes.Soft(
|
|
primary_hue="blue",
|
|
secondary_hue="purple",
|
|
neutral_hue="slate"
|
|
),
|
|
css="""
|
|
.gradio-container {
|
|
max-width: 1200px !important;
|
|
margin: auto !important;
|
|
}
|
|
.main-header {
|
|
text-align: center;
|
|
margin-bottom: 2rem;
|
|
}
|
|
.feature-grid {
|
|
display: grid;
|
|
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
|
|
gap: 1rem;
|
|
margin: 1rem 0;
|
|
}
|
|
.feature-card {
|
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
|
color: white;
|
|
padding: 1rem;
|
|
border-radius: 10px;
|
|
text-align: center;
|
|
}
|
|
"""
|
|
) as app:
|
|
|
|
|
|
gr.HTML("""
|
|
<div class="main-header">
|
|
<h1>π€ AI Text-to-Speech Chatbot</h1>
|
|
<p>Transform any text into natural, high-quality speech using advanced AI</p>
|
|
</div>
|
|
""")
|
|
|
|
|
|
gr.HTML("""
|
|
<div class="feature-grid">
|
|
<div class="feature-card">
|
|
<h3>π Multiple Voices</h3>
|
|
<p>8 different voice options</p>
|
|
</div>
|
|
<div class="feature-card">
|
|
<h3>β‘ Speed Control</h3>
|
|
<p>Adjust from 0.5x to 2.0x</p>
|
|
</div>
|
|
<div class="feature-card">
|
|
<h3>π΅ High Quality</h3>
|
|
<p>24kHz WAV output</p>
|
|
</div>
|
|
<div class="feature-card">
|
|
<h3>π± Mobile Ready</h3>
|
|
<p>Works on all devices</p>
|
|
</div>
|
|
</div>
|
|
""")
|
|
|
|
with gr.Row():
|
|
with gr.Column(scale=2):
|
|
|
|
gr.Markdown("## π Enter Your Text")
|
|
|
|
text_input = gr.Textbox(
|
|
label="Text to Convert",
|
|
placeholder="Enter the text you want to convert to speech... (max 500 characters)",
|
|
lines=4,
|
|
max_lines=8
|
|
)
|
|
|
|
with gr.Row():
|
|
voice_dropdown = gr.Dropdown(
|
|
choices=available_voices,
|
|
value=available_voices[0] if available_voices else "expr-voice-5-m",
|
|
label="π Voice Selection",
|
|
info="Choose the voice for speech generation"
|
|
)
|
|
|
|
speed_slider = gr.Slider(
|
|
minimum=0.5,
|
|
maximum=2.0,
|
|
step=0.1,
|
|
value=1.25,
|
|
label="β‘ Speech Speed",
|
|
info="Adjust the speed of speech (0.5x to 2.0x)"
|
|
)
|
|
|
|
generate_btn = gr.Button(
|
|
"π΅ Generate Speech",
|
|
variant="primary",
|
|
size="lg"
|
|
)
|
|
|
|
with gr.Column(scale=1):
|
|
|
|
gr.Markdown("## π Generated Audio")
|
|
|
|
status_output = gr.Textbox(
|
|
label="Status",
|
|
value="Ready to generate speech",
|
|
interactive=False
|
|
)
|
|
|
|
audio_output = gr.Audio(
|
|
label="Generated Speech",
|
|
type="filepath",
|
|
interactive=False
|
|
)
|
|
|
|
|
|
gr.Markdown("## π Quick Examples")
|
|
gr.Markdown("Try these example texts:")
|
|
gr.Markdown("- Hello! Welcome to AI Text-to-Speech. I can convert any text into natural speech.")
|
|
gr.Markdown("- This system uses advanced neural networks to generate high-quality voice synthesis.")
|
|
gr.Markdown("- Once upon a time, there was a magical voice that could bring any story to life.")
|
|
gr.Markdown("- The quick brown fox jumps over the lazy dog. This sentence contains every letter of the alphabet.")
|
|
|
|
|
|
with gr.Accordion("βΉοΈ About This App", open=False):
|
|
gr.Markdown("""
|
|
### π οΈ Technical Details
|
|
- **Model**: KittenTTS nano (high-quality, fast)
|
|
- **Output**: 24kHz WAV audio files
|
|
- **Voices**: 8 different voice options
|
|
- **Speed**: Adjustable from 0.5x to 2.0x
|
|
|
|
### π― How to Use
|
|
1. Enter your text (up to 500 characters)
|
|
2. Select a voice from the dropdown
|
|
3. Adjust the speech speed if needed
|
|
4. Click "Generate Speech"
|
|
5. Listen to the generated audio
|
|
6. Download the audio file if needed
|
|
|
|
### π Source Code
|
|
Available on GitHub: [ai-tts-chatbot](https://github.com/your-username/ai-tts-chatbot)
|
|
|
|
### π License
|
|
MIT License - Free to use and modify
|
|
""")
|
|
|
|
|
|
generate_btn.click(
|
|
fn=generate_speech,
|
|
inputs=[text_input, voice_dropdown, speed_slider],
|
|
outputs=[audio_output, status_output]
|
|
)
|
|
|
|
|
|
text_input.submit(
|
|
fn=generate_speech,
|
|
inputs=[text_input, voice_dropdown, speed_slider],
|
|
outputs=[audio_output, status_output]
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
app.queue(default_concurrency_limit=10).launch(
|
|
server_name="0.0.0.0",
|
|
server_port=7860,
|
|
share=True,
|
|
show_error=True
|
|
) |