Spaces:

laxminarasimha6
/

ai-tts-generator

Running

App Files Files Community

ai-tts-generator / app.py

laxminarasimha6

Upload 3 files

a2b7ad5 verified about 1 month ago

raw

history blame

8.49 kB

	"""
	AI Text-to-Speech Chatbot - Gradio Version for Hugging Face Spaces
	"""

	import gradio as gr
	import tempfile
	import uuid
	import os
	import re
	import base64
	import io
	import soundfile as sf

	# Global TTS model instance
	model = None

	def initialize_model():
	"""Initialize the KittenTTS model"""
	global model
	try:
	from kittentts import KittenTTS
	model = KittenTTS("KittenML/kitten-tts-nano-0.1")
	print("✅ Model initialized successfully")
	return True
	except Exception as e:
	print(f"❌ Model initialization failed: {e}")
	return False

	def get_available_voices():
	"""Get available voices from the model"""
	if not model:
	return ["expr-voice-5-m"]

	try:
	voices = model.available_voices
	return voices if voices else ["expr-voice-5-m"]
	except Exception:
	return ["expr-voice-5-m"]

	def sanitize_text(text):
	"""Clean and sanitize input text"""
	# Normalize whitespace
	cleaned = re.sub(r'\s+', ' ', text.strip())
	# Remove potentially problematic characters
	cleaned = re.sub(r'[^\w\s.,!?;:\'"()-]', '', cleaned)
	return cleaned

	def generate_speech(text, voice, speed):
	"""
	Generate speech from text using KittenTTS

	Args:
	text (str): Text to convert to speech
	voice (str): Voice to use for generation
	speed (float): Speed of speech generation

	Returns:
	tuple: (audio_file_path, status_message)
	"""
	if not model:
	return None, "❌ TTS model not available"

	if not text.strip():
	return None, "❌ Please enter some text to generate speech"

	if len(text) > 500:
	return None, "❌ Text too long. Maximum 500 characters allowed"

	try:
	# Clean text
	processed_text = sanitize_text(text)

	# Generate audio with fallback handling
	try:
	audio_data = model.generate(processed_text, voice=voice, speed=speed)
	except Exception as generation_error:
	# Fallback: try with truncated text
	if len(processed_text) > 100:
	processed_text = processed_text[:100] + "..."
	audio_data = model.generate(processed_text, voice=voice, speed=speed)
	else:
	raise generation_error

	# Save to temporary file
	temp_dir = tempfile.gettempdir()
	unique_filename = f"kitten_tts_{uuid.uuid4()}.wav"
	output_path = os.path.join(temp_dir, unique_filename)

	sf.write(output_path, audio_data, 24000)

	return output_path, "✅ Speech generated successfully!"

	except Exception as e:
	return None, f"❌ Generation failed: {str(e)}"

	# Initialize model on startup
	initialize_model()

	# Get available voices
	available_voices = get_available_voices()

	# Create Gradio interface
	with gr.Blocks(
	title="AI Text-to-Speech Chatbot",
	theme=gr.themes.Soft(
	primary_hue="blue",
	secondary_hue="purple",
	neutral_hue="slate"
	),
	css="""
	.gradio-container {
	max-width: 1200px !important;
	margin: auto !important;
	}
	.main-header {
	text-align: center;
	margin-bottom: 2rem;
	}
	.feature-grid {
	display: grid;
	grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
	gap: 1rem;
	margin: 1rem 0;
	}
	.feature-card {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	color: white;
	padding: 1rem;
	border-radius: 10px;
	text-align: center;
	}
	"""
	) as app:

	# Header
	gr.HTML("""
	<div class="main-header">
	<h1>🎤 AI Text-to-Speech Chatbot</h1>
	<p>Transform any text into natural, high-quality speech using advanced AI</p>
	</div>
	""")

	# Features section
	gr.HTML("""
	<div class="feature-grid">
	<div class="feature-card">
	<h3>🎭 Multiple Voices</h3>
	<p>8 different voice options</p>
	</div>
	<div class="feature-card">
	<h3>⚡ Speed Control</h3>
	<p>Adjust from 0.5x to 2.0x</p>
	</div>
	<div class="feature-card">
	<h3>🎵 High Quality</h3>
	<p>24kHz WAV output</p>
	</div>
	<div class="feature-card">
	<h3>📱 Mobile Ready</h3>
	<p>Works on all devices</p>
	</div>
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=2):
	# Input section
	gr.Markdown("## 📝 Enter Your Text")

	text_input = gr.Textbox(
	label="Text to Convert",
	placeholder="Enter the text you want to convert to speech... (max 500 characters)",
	lines=4,
	max_lines=8
	)

	with gr.Row():
	voice_dropdown = gr.Dropdown(
	choices=available_voices,
	value=available_voices[0] if available_voices else "expr-voice-5-m",
	label="🎭 Voice Selection",
	info="Choose the voice for speech generation"
	)

	speed_slider = gr.Slider(
	minimum=0.5,
	maximum=2.0,
	step=0.1,
	value=1.25,
	label="⚡ Speech Speed",
	info="Adjust the speed of speech (0.5x to 2.0x)"
	)

	generate_btn = gr.Button(
	"🎵 Generate Speech",
	variant="primary",
	size="lg"
	)

	with gr.Column(scale=1):
	# Output section
	gr.Markdown("## 🔊 Generated Audio")

	status_output = gr.Textbox(
	label="Status",
	value="Ready to generate speech",
	interactive=False
	)

	audio_output = gr.Audio(
	label="Generated Speech",
	type="filepath",
	interactive=False
	)

	# Example texts section
	gr.Markdown("## 🚀 Quick Examples")
	gr.Markdown("Try these example texts:")
	gr.Markdown("- Hello! Welcome to AI Text-to-Speech. I can convert any text into natural speech.")
	gr.Markdown("- This system uses advanced neural networks to generate high-quality voice synthesis.")
	gr.Markdown("- Once upon a time, there was a magical voice that could bring any story to life.")
	gr.Markdown("- The quick brown fox jumps over the lazy dog. This sentence contains every letter of the alphabet.")

	# Information section
	with gr.Accordion("ℹ️ About This App", open=False):
	gr.Markdown("""
	### 🛠️ Technical Details
	- Model: KittenTTS nano (high-quality, fast)
	- Output: 24kHz WAV audio files
	- Voices: 8 different voice options
	- Speed: Adjustable from 0.5x to 2.0x

	### 🎯 How to Use
	1. Enter your text (up to 500 characters)
	2. Select a voice from the dropdown
	3. Adjust the speech speed if needed
	4. Click "Generate Speech"
	5. Listen to the generated audio
	6. Download the audio file if needed

	### 🔗 Source Code
	Available on GitHub: [ai-tts-chatbot](https://github.com/your-username/ai-tts-chatbot)

	### 📄 License
	MIT License - Free to use and modify
	""")

	# Event handlers
	generate_btn.click(
	fn=generate_speech,
	inputs=[text_input, voice_dropdown, speed_slider],
	outputs=[audio_output, status_output]
	)

	# Auto-generate on Enter key
	text_input.submit(
	fn=generate_speech,
	inputs=[text_input, voice_dropdown, speed_slider],
	outputs=[audio_output, status_output]
	)

	# Launch the app
	if __name__ == "__main__":
	app.queue(default_concurrency_limit=10).launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=True,
	show_error=True
	)