Rogerjs commited on
Commit
4479222
·
verified ·
1 Parent(s): 27e6d88

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -1
app.py CHANGED
@@ -223,4 +223,114 @@ class VoiceSynthesizer:
223
 
224
  return filepath, None
225
 
226
- # Rest of the code remains the same...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
  return filepath, None
225
 
226
+ def create_interface():
227
+ synthesizer = VoiceSynthesizer()
228
+
229
+ with gr.Blocks() as interface:
230
+ gr.Markdown("# 🎙️ Advanced Voice Synthesis")
231
+
232
+ with gr.Row():
233
+ with gr.Column():
234
+ gr.Markdown("## 1. Capture Reference Voice")
235
+ reference_audio = gr.Audio(sources=["microphone", "upload"], type="numpy")
236
+ process_ref_btn = gr.Button("Process Reference Voice")
237
+ process_ref_output = gr.Textbox(label="Reference Voice Processing")
238
+
239
+ with gr.Column():
240
+ gr.Markdown("## 2. Generate Speech")
241
+ text_input = gr.Textbox(label="Enter Text to Speak")
242
+
243
+ # Model Selection
244
+ model_dropdown = gr.Dropdown(
245
+ choices=[
246
+ "bark (Suno AI)",
247
+ "speecht5 (Microsoft)"
248
+ ],
249
+ label="Select TTS Model",
250
+ value="bark (Suno AI)"
251
+ )
252
+
253
+ # Voice Preset Dropdowns
254
+ with gr.Row():
255
+ bark_preset = gr.Dropdown(
256
+ choices=[
257
+ "v2/en_speaker_6 (Female)",
258
+ "v2/en_speaker_3 (Male)",
259
+ "v2/en_speaker_9 (Neutral)"
260
+ ],
261
+ label="Bark Voice Preset",
262
+ visible=True
263
+ )
264
+
265
+ speecht5_preset = gr.Dropdown(
266
+ choices=[
267
+ "Default Speaker"
268
+ ],
269
+ label="SpeechT5 Speaker",
270
+ visible=False
271
+ )
272
+
273
+ generate_btn = gr.Button("Generate Speech")
274
+ audio_output = gr.Audio(label="Generated Speech")
275
+ error_output = gr.Textbox(label="Errors", visible=True)
276
+
277
+ # Process reference audio
278
+ process_ref_btn.click(
279
+ fn=synthesizer.process_reference_audio,
280
+ inputs=reference_audio,
281
+ outputs=process_ref_output
282
+ )
283
+
284
+ # Dynamic model and preset visibility
285
+ def update_model_visibility(model):
286
+ if "bark" in model.lower():
287
+ return {
288
+ bark_preset: gr.update(visible=True),
289
+ speecht5_preset: gr.update(visible=False)
290
+ }
291
+ else:
292
+ return {
293
+ bark_preset: gr.update(visible=False),
294
+ speecht5_preset: gr.update(visible=True)
295
+ }
296
+
297
+ model_dropdown.change(
298
+ fn=update_model_visibility,
299
+ inputs=model_dropdown,
300
+ outputs=[bark_preset, speecht5_preset]
301
+ )
302
+
303
+ # Speech generation logic
304
+ def generate_speech_wrapper(text, model, bark_preset, speecht5_preset):
305
+ # Map model name
306
+ model_map = {
307
+ "bark (Suno AI)": "bark",
308
+ "speecht5 (Microsoft)": "speecht5"
309
+ }
310
+
311
+ # Select appropriate preset
312
+ preset = bark_preset if "bark" in model else speecht5_preset
313
+
314
+ return synthesizer.generate_speech(
315
+ text,
316
+ model_name=model_map[model],
317
+ voice_preset=preset
318
+ )
319
+
320
+ generate_btn.click(
321
+ fn=generate_speech_wrapper,
322
+ inputs=[text_input, model_dropdown, bark_preset, speecht5_preset],
323
+ outputs=[audio_output, error_output]
324
+ )
325
+
326
+ return interface
327
+
328
+ if __name__ == "__main__":
329
+ interface = create_interface()
330
+ interface.launch(
331
+ share=False,
332
+ debug=True,
333
+ show_error=True,
334
+ server_name='0.0.0.0',
335
+ server_port=7860
336
+ )