Hematej commited on
Commit
3348872
Β·
verified Β·
1 Parent(s): d06f9f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -59
app.py CHANGED
@@ -1,76 +1,66 @@
1
  import gradio as gr
 
2
  from TTS.api import TTS
3
- from pydub import AudioSegment
4
- import tempfile
5
  import os
 
6
 
7
- # βœ… Accept XTTS License Automatically
8
  os.environ["COQUI_TOS_AGREED"] = "1"
9
 
10
- # βœ… Load XTTS v2 (CPU-friendly)
11
- tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
12
- tts.to("cpu")
13
 
14
- # βœ… Convert speaker audio to XTTS-compatible WAV (16-bit, mono, 22.05 kHz)
15
- def convert_audio_to_wav(audio_file_path):
16
- try:
17
- sound = AudioSegment.from_file(audio_file_path)
18
- duration_seconds = len(sound) / 1000.0
19
- if duration_seconds < 2:
20
- raise ValueError("Voice sample is too short. Please use at least 2 seconds of clear speech.")
21
- sound = sound.set_frame_rate(22050).set_channels(1).set_sample_width(2) # 16-bit mono 22.05kHz
22
- temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
23
- sound.export(temp_wav.name, format="wav")
24
- return temp_wav.name
25
- except Exception as e:
26
- raise RuntimeError(f"Audio conversion failed: {str(e)}")
27
 
28
- # βœ… Clone voice
29
- def clone_voice(text, speaker_audio):
30
- if not speaker_audio:
31
- return "⚠️ Please upload a voice sample."
32
 
33
- if len(text.strip()) == 0:
34
- return "⚠️ Text input is empty. Please enter something to speak."
35
-
36
- if len(text) > 500:
37
- return "⚠️ Text is too long. Please enter 500 characters or fewer."
38
 
39
  try:
40
- speaker_path = speaker_audio if isinstance(speaker_audio, str) else speaker_audio.name
41
- speaker_wav = convert_audio_to_wav(speaker_path)
42
- output_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
43
 
44
- print(f"[INFO] Generating voice from: {speaker_wav}")
45
- tts.tts_to_file(
46
- text=text,
47
- speaker_wav=speaker_wav,
48
- language="en",
49
- file_path=output_path
50
- )
51
 
52
- if os.path.exists(output_path) and os.path.getsize(output_path) > 1000:
53
- return output_path
54
- else:
55
- return "❌ Generation failed: Output audio file is empty."
56
 
57
- except Exception as e:
58
- print(f"[ERROR] {str(e)}")
59
- return f"❌ Error: {str(e)}"
60
 
61
- # βœ… Gradio UI
62
- with gr.Blocks(title="XTTS v2 Voice Cloner") as demo:
63
- gr.Markdown("## πŸŽ™οΈ XTTS v2 - Multilingual Voice Cloner (CPU-friendly)")
64
- gr.Markdown("Upload a short voice sample and enter text to generate speech in the same voice.")
65
-
66
- with gr.Row():
67
- text_input = gr.Textbox(label="Text to Speak", placeholder="Enter up to 500 characters...", max_lines=5)
68
- audio_input = gr.Audio(label="Voice Sample (MP3 or WAV)", type="filepath")
69
-
70
- output_audio = gr.Audio(label="πŸ—£οΈ Generated Voice", type="filepath")
71
- generate_btn = gr.Button("πŸ”Š Generate Voice")
72
 
73
- generate_btn.click(fn=clone_voice, inputs=[text_input, audio_input], outputs=output_audio)
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
- # βœ… Launch with public link
76
- demo.launch(share=True)
 
1
  import gradio as gr
2
+ import torch
3
  from TTS.api import TTS
 
 
4
  import os
5
+ import soundfile as sf
6
 
 
7
  os.environ["COQUI_TOS_AGREED"] = "1"
8
 
9
+ # Smart device detection
10
+ use_gpu = torch.cuda.is_available()
 
11
 
12
+ # βœ… XTTS Model Initialization with Proper Error Handling
13
+ try:
14
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=use_gpu, progress_bar=True)
15
+ if not hasattr(tts, "synthesizer") or not hasattr(tts.synthesizer, "tts_model"):
16
+ raise RuntimeError("XTTS model failed to load correctly.")
17
+ print(f"[INFO] XTTS model loaded successfully. GPU enabled: {use_gpu}")
18
+ except Exception as e:
19
+ print(f"[ERROR] Failed to initialize XTTS model: {str(e)}")
20
+ tts = None # Prevents further crashes
 
 
 
 
21
 
22
+ # βœ… Fixed clone() Function
23
+ def clone(text, audio):
24
+ if tts is None:
25
+ return None, "⚠ XTTS model failed to load."
26
 
27
+ if not text or not audio:
28
+ return None, "⚠ Error: Missing text or audio input."
 
 
 
29
 
30
  try:
31
+ # βœ… Validate audio input
32
+ if isinstance(audio, bool) or not isinstance(audio, str) or not os.path.exists(audio):
33
+ return None, "⚠ Error: Invalid audio input format."
34
 
35
+ output_path = "./output.wav"
36
+ # βœ… XTTS Processing with Error Handling
37
+ tts.tts_to_file(text=text, speaker_wav=audio, language="en", file_path=output_path)
 
 
 
 
38
 
39
+ # βœ… Ensure output file is valid before passing to Gradio
40
+ if not os.path.exists(output_path) or os.path.getsize(output_path) == 0:
41
+ return None, "⚠ Error: XTTS failed to generate audio."
 
42
 
43
+ # βœ… Convert output file format
44
+ audio_data, samplerate = sf.read(output_path)
45
+ sf.write(output_path, audio_data, samplerate)
46
 
47
+ return output_path
48
+ except Exception as e:
49
+ print(f"[ERROR] XTTS Processing Error: {str(e)}")
50
+ return None
 
 
 
 
 
 
 
51
 
52
+ # βœ… Fixed Gradio Setup
53
+ iface = gr.Interface(
54
+ fn=clone,
55
+ inputs=[
56
+ gr.Textbox(label='Text'),
57
+ gr.Audio(type='filepath', label='Voice reference audio file')
58
+ ],
59
+ outputs=gr.Audio(type='filepath'),
60
+ title='Voice Clone",
61
+ flagging_mode="never",
62
+ cache_examples=False,
63
+ theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate"),
64
+ )
65
 
66
+ iface.launch()