laxminarasimha6 commited on
Commit
a2b7ad5
Β·
verified Β·
1 Parent(s): b568184

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +64 -0
  2. app.py +269 -0
  3. requirements.txt +3 -0
README.md ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: AI Text-to-Speech Chatbot
3
+ emoji: 🎀
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 4.44.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ short_description: Convert text to natural speech with AI
12
+ tags:
13
+ - text-to-speech
14
+ - tts
15
+ - voice-synthesis
16
+ - audio
17
+ - chatbot
18
+ - kittentts
19
+ ---
20
+
21
+ # 🎀 AI Text-to-Speech Chatbot
22
+
23
+ Convert any text into natural, high-quality speech using advanced AI models. Features multiple voices, speed control, and a modern dark interface.
24
+
25
+ ## ✨ Features
26
+
27
+ - **8 Voice Options** - Male and female voices available
28
+ - **Speed Control** - Adjust from 0.5x to 2.0x (default 1.25x)
29
+ - **Modern Interface** - Clean, responsive dark theme
30
+ - **Audio Controls** - Play, pause, seek, and download
31
+ - **Mobile Responsive** - Works perfectly on all devices
32
+ - **Quick Examples** - Instant demo texts
33
+
34
+ ## 🎯 How to Use
35
+
36
+ 1. **Enter your text** (up to 500 characters)
37
+ 2. **Choose a voice** from the dropdown
38
+ 3. **Adjust speed** with the slider
39
+ 4. **Generate speech** and wait for processing
40
+ 5. **Listen and enjoy** - audio plays automatically
41
+ 6. **Download** your audio file as needed
42
+
43
+ ## πŸ› οΈ Technical Details
44
+
45
+ - **Model**: KittenTTS nano (high-quality, fast)
46
+ - **Output**: 24kHz WAV audio files
47
+ - **Interface**: Gradio web interface
48
+ - **Voices**: 8 different voice options (male/female)
49
+
50
+ ## πŸš€ Try It Now
51
+
52
+ Just start typing in the text area above and click "Generate Speech"!
53
+
54
+ ## πŸ“± Browser Support
55
+
56
+ Works on all modern browsers including Chrome, Firefox, Safari, and Edge.
57
+
58
+ ## πŸ”— Source Code
59
+
60
+ Available on GitHub: [ai-tts-chatbot](https://github.com/your-username/ai-tts-chatbot)
61
+
62
+ ---
63
+
64
+ **Ready to give your text a voice? Start typing above! ✨**
app.py ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ AI Text-to-Speech Chatbot - Gradio Version for Hugging Face Spaces
3
+ """
4
+
5
+ import gradio as gr
6
+ import tempfile
7
+ import uuid
8
+ import os
9
+ import re
10
+ import base64
11
+ import io
12
+ import soundfile as sf
13
+
14
+ # Global TTS model instance
15
+ model = None
16
+
17
+ def initialize_model():
18
+ """Initialize the KittenTTS model"""
19
+ global model
20
+ try:
21
+ from kittentts import KittenTTS
22
+ model = KittenTTS("KittenML/kitten-tts-nano-0.1")
23
+ print("βœ… Model initialized successfully")
24
+ return True
25
+ except Exception as e:
26
+ print(f"❌ Model initialization failed: {e}")
27
+ return False
28
+
29
+ def get_available_voices():
30
+ """Get available voices from the model"""
31
+ if not model:
32
+ return ["expr-voice-5-m"]
33
+
34
+ try:
35
+ voices = model.available_voices
36
+ return voices if voices else ["expr-voice-5-m"]
37
+ except Exception:
38
+ return ["expr-voice-5-m"]
39
+
40
+ def sanitize_text(text):
41
+ """Clean and sanitize input text"""
42
+ # Normalize whitespace
43
+ cleaned = re.sub(r'\s+', ' ', text.strip())
44
+ # Remove potentially problematic characters
45
+ cleaned = re.sub(r'[^\w\s.,!?;:\'"()-]', '', cleaned)
46
+ return cleaned
47
+
48
+ def generate_speech(text, voice, speed):
49
+ """
50
+ Generate speech from text using KittenTTS
51
+
52
+ Args:
53
+ text (str): Text to convert to speech
54
+ voice (str): Voice to use for generation
55
+ speed (float): Speed of speech generation
56
+
57
+ Returns:
58
+ tuple: (audio_file_path, status_message)
59
+ """
60
+ if not model:
61
+ return None, "❌ TTS model not available"
62
+
63
+ if not text.strip():
64
+ return None, "❌ Please enter some text to generate speech"
65
+
66
+ if len(text) > 500:
67
+ return None, "❌ Text too long. Maximum 500 characters allowed"
68
+
69
+ try:
70
+ # Clean text
71
+ processed_text = sanitize_text(text)
72
+
73
+ # Generate audio with fallback handling
74
+ try:
75
+ audio_data = model.generate(processed_text, voice=voice, speed=speed)
76
+ except Exception as generation_error:
77
+ # Fallback: try with truncated text
78
+ if len(processed_text) > 100:
79
+ processed_text = processed_text[:100] + "..."
80
+ audio_data = model.generate(processed_text, voice=voice, speed=speed)
81
+ else:
82
+ raise generation_error
83
+
84
+ # Save to temporary file
85
+ temp_dir = tempfile.gettempdir()
86
+ unique_filename = f"kitten_tts_{uuid.uuid4()}.wav"
87
+ output_path = os.path.join(temp_dir, unique_filename)
88
+
89
+ sf.write(output_path, audio_data, 24000)
90
+
91
+ return output_path, "βœ… Speech generated successfully!"
92
+
93
+ except Exception as e:
94
+ return None, f"❌ Generation failed: {str(e)}"
95
+
96
+ # Initialize model on startup
97
+ initialize_model()
98
+
99
+ # Get available voices
100
+ available_voices = get_available_voices()
101
+
102
+ # Create Gradio interface
103
+ with gr.Blocks(
104
+ title="AI Text-to-Speech Chatbot",
105
+ theme=gr.themes.Soft(
106
+ primary_hue="blue",
107
+ secondary_hue="purple",
108
+ neutral_hue="slate"
109
+ ),
110
+ css="""
111
+ .gradio-container {
112
+ max-width: 1200px !important;
113
+ margin: auto !important;
114
+ }
115
+ .main-header {
116
+ text-align: center;
117
+ margin-bottom: 2rem;
118
+ }
119
+ .feature-grid {
120
+ display: grid;
121
+ grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
122
+ gap: 1rem;
123
+ margin: 1rem 0;
124
+ }
125
+ .feature-card {
126
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
127
+ color: white;
128
+ padding: 1rem;
129
+ border-radius: 10px;
130
+ text-align: center;
131
+ }
132
+ """
133
+ ) as app:
134
+
135
+ # Header
136
+ gr.HTML("""
137
+ <div class="main-header">
138
+ <h1>🎀 AI Text-to-Speech Chatbot</h1>
139
+ <p>Transform any text into natural, high-quality speech using advanced AI</p>
140
+ </div>
141
+ """)
142
+
143
+ # Features section
144
+ gr.HTML("""
145
+ <div class="feature-grid">
146
+ <div class="feature-card">
147
+ <h3>🎭 Multiple Voices</h3>
148
+ <p>8 different voice options</p>
149
+ </div>
150
+ <div class="feature-card">
151
+ <h3>⚑ Speed Control</h3>
152
+ <p>Adjust from 0.5x to 2.0x</p>
153
+ </div>
154
+ <div class="feature-card">
155
+ <h3>🎡 High Quality</h3>
156
+ <p>24kHz WAV output</p>
157
+ </div>
158
+ <div class="feature-card">
159
+ <h3>πŸ“± Mobile Ready</h3>
160
+ <p>Works on all devices</p>
161
+ </div>
162
+ </div>
163
+ """)
164
+
165
+ with gr.Row():
166
+ with gr.Column(scale=2):
167
+ # Input section
168
+ gr.Markdown("## πŸ“ Enter Your Text")
169
+
170
+ text_input = gr.Textbox(
171
+ label="Text to Convert",
172
+ placeholder="Enter the text you want to convert to speech... (max 500 characters)",
173
+ lines=4,
174
+ max_lines=8
175
+ )
176
+
177
+ with gr.Row():
178
+ voice_dropdown = gr.Dropdown(
179
+ choices=available_voices,
180
+ value=available_voices[0] if available_voices else "expr-voice-5-m",
181
+ label="🎭 Voice Selection",
182
+ info="Choose the voice for speech generation"
183
+ )
184
+
185
+ speed_slider = gr.Slider(
186
+ minimum=0.5,
187
+ maximum=2.0,
188
+ step=0.1,
189
+ value=1.25,
190
+ label="⚑ Speech Speed",
191
+ info="Adjust the speed of speech (0.5x to 2.0x)"
192
+ )
193
+
194
+ generate_btn = gr.Button(
195
+ "🎡 Generate Speech",
196
+ variant="primary",
197
+ size="lg"
198
+ )
199
+
200
+ with gr.Column(scale=1):
201
+ # Output section
202
+ gr.Markdown("## πŸ”Š Generated Audio")
203
+
204
+ status_output = gr.Textbox(
205
+ label="Status",
206
+ value="Ready to generate speech",
207
+ interactive=False
208
+ )
209
+
210
+ audio_output = gr.Audio(
211
+ label="Generated Speech",
212
+ type="filepath",
213
+ interactive=False
214
+ )
215
+
216
+ # Example texts section
217
+ gr.Markdown("## πŸš€ Quick Examples")
218
+ gr.Markdown("Try these example texts:")
219
+ gr.Markdown("- Hello! Welcome to AI Text-to-Speech. I can convert any text into natural speech.")
220
+ gr.Markdown("- This system uses advanced neural networks to generate high-quality voice synthesis.")
221
+ gr.Markdown("- Once upon a time, there was a magical voice that could bring any story to life.")
222
+ gr.Markdown("- The quick brown fox jumps over the lazy dog. This sentence contains every letter of the alphabet.")
223
+
224
+ # Information section
225
+ with gr.Accordion("ℹ️ About This App", open=False):
226
+ gr.Markdown("""
227
+ ### πŸ› οΈ Technical Details
228
+ - **Model**: KittenTTS nano (high-quality, fast)
229
+ - **Output**: 24kHz WAV audio files
230
+ - **Voices**: 8 different voice options
231
+ - **Speed**: Adjustable from 0.5x to 2.0x
232
+
233
+ ### 🎯 How to Use
234
+ 1. Enter your text (up to 500 characters)
235
+ 2. Select a voice from the dropdown
236
+ 3. Adjust the speech speed if needed
237
+ 4. Click "Generate Speech"
238
+ 5. Listen to the generated audio
239
+ 6. Download the audio file if needed
240
+
241
+ ### πŸ”— Source Code
242
+ Available on GitHub: [ai-tts-chatbot](https://github.com/your-username/ai-tts-chatbot)
243
+
244
+ ### πŸ“„ License
245
+ MIT License - Free to use and modify
246
+ """)
247
+
248
+ # Event handlers
249
+ generate_btn.click(
250
+ fn=generate_speech,
251
+ inputs=[text_input, voice_dropdown, speed_slider],
252
+ outputs=[audio_output, status_output]
253
+ )
254
+
255
+ # Auto-generate on Enter key
256
+ text_input.submit(
257
+ fn=generate_speech,
258
+ inputs=[text_input, voice_dropdown, speed_slider],
259
+ outputs=[audio_output, status_output]
260
+ )
261
+
262
+ # Launch the app
263
+ if __name__ == "__main__":
264
+ app.queue(default_concurrency_limit=10).launch(
265
+ server_name="0.0.0.0",
266
+ server_port=7860,
267
+ share=True,
268
+ show_error=True
269
+ )
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ https://github.com/KittenML/KittenTTS/releases/download/0.1/kittentts-0.1.0-py3-none-any.whl
2
+ gradio==4.44.1
3
+ soundfile==0.13.1