Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -1,76 +1,66 @@
|
|
1 |
import gradio as gr
|
|
|
2 |
from TTS.api import TTS
|
3 |
-
from pydub import AudioSegment
|
4 |
-
import tempfile
|
5 |
import os
|
|
|
6 |
|
7 |
-
# β
Accept XTTS License Automatically
|
8 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
9 |
|
10 |
-
#
|
11 |
-
|
12 |
-
tts.to("cpu")
|
13 |
|
14 |
-
# β
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
sound.export(temp_wav.name, format="wav")
|
24 |
-
return temp_wav.name
|
25 |
-
except Exception as e:
|
26 |
-
raise RuntimeError(f"Audio conversion failed: {str(e)}")
|
27 |
|
28 |
-
# β
|
29 |
-
def
|
30 |
-
if
|
31 |
-
return "
|
32 |
|
33 |
-
if
|
34 |
-
return "
|
35 |
-
|
36 |
-
if len(text) > 500:
|
37 |
-
return "β οΈ Text is too long. Please enter 500 characters or fewer."
|
38 |
|
39 |
try:
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
speaker_wav=speaker_wav,
|
48 |
-
language="en",
|
49 |
-
file_path=output_path
|
50 |
-
)
|
51 |
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
return "β Generation failed: Output audio file is empty."
|
56 |
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
with gr.Row():
|
67 |
-
text_input = gr.Textbox(label="Text to Speak", placeholder="Enter up to 500 characters...", max_lines=5)
|
68 |
-
audio_input = gr.Audio(label="Voice Sample (MP3 or WAV)", type="filepath")
|
69 |
-
|
70 |
-
output_audio = gr.Audio(label="π£οΈ Generated Voice", type="filepath")
|
71 |
-
generate_btn = gr.Button("π Generate Voice")
|
72 |
|
73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
-
|
76 |
-
demo.launch(share=True)
|
|
|
1 |
import gradio as gr
|
2 |
+
import torch
|
3 |
from TTS.api import TTS
|
|
|
|
|
4 |
import os
|
5 |
+
import soundfile as sf
|
6 |
|
|
|
7 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
8 |
|
9 |
+
# Smart device detection
|
10 |
+
use_gpu = torch.cuda.is_available()
|
|
|
11 |
|
12 |
+
# β
XTTS Model Initialization with Proper Error Handling
|
13 |
+
try:
|
14 |
+
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=use_gpu, progress_bar=True)
|
15 |
+
if not hasattr(tts, "synthesizer") or not hasattr(tts.synthesizer, "tts_model"):
|
16 |
+
raise RuntimeError("XTTS model failed to load correctly.")
|
17 |
+
print(f"[INFO] XTTS model loaded successfully. GPU enabled: {use_gpu}")
|
18 |
+
except Exception as e:
|
19 |
+
print(f"[ERROR] Failed to initialize XTTS model: {str(e)}")
|
20 |
+
tts = None # Prevents further crashes
|
|
|
|
|
|
|
|
|
21 |
|
22 |
+
# β
Fixed clone() Function
|
23 |
+
def clone(text, audio):
|
24 |
+
if tts is None:
|
25 |
+
return None, "β XTTS model failed to load."
|
26 |
|
27 |
+
if not text or not audio:
|
28 |
+
return None, "β Error: Missing text or audio input."
|
|
|
|
|
|
|
29 |
|
30 |
try:
|
31 |
+
# β
Validate audio input
|
32 |
+
if isinstance(audio, bool) or not isinstance(audio, str) or not os.path.exists(audio):
|
33 |
+
return None, "β Error: Invalid audio input format."
|
34 |
|
35 |
+
output_path = "./output.wav"
|
36 |
+
# β
XTTS Processing with Error Handling
|
37 |
+
tts.tts_to_file(text=text, speaker_wav=audio, language="en", file_path=output_path)
|
|
|
|
|
|
|
|
|
38 |
|
39 |
+
# β
Ensure output file is valid before passing to Gradio
|
40 |
+
if not os.path.exists(output_path) or os.path.getsize(output_path) == 0:
|
41 |
+
return None, "β Error: XTTS failed to generate audio."
|
|
|
42 |
|
43 |
+
# β
Convert output file format
|
44 |
+
audio_data, samplerate = sf.read(output_path)
|
45 |
+
sf.write(output_path, audio_data, samplerate)
|
46 |
|
47 |
+
return output_path
|
48 |
+
except Exception as e:
|
49 |
+
print(f"[ERROR] XTTS Processing Error: {str(e)}")
|
50 |
+
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
+
# β
Fixed Gradio Setup
|
53 |
+
iface = gr.Interface(
|
54 |
+
fn=clone,
|
55 |
+
inputs=[
|
56 |
+
gr.Textbox(label='Text'),
|
57 |
+
gr.Audio(type='filepath', label='Voice reference audio file')
|
58 |
+
],
|
59 |
+
outputs=gr.Audio(type='filepath'),
|
60 |
+
title='Voice Clone",
|
61 |
+
flagging_mode="never",
|
62 |
+
cache_examples=False,
|
63 |
+
theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate"),
|
64 |
+
)
|
65 |
|
66 |
+
iface.launch()
|
|