Spaces:

Hematej
/

conqui-tts2

Build error

App Files Files Community

Hematej commited on Jun 7

Commit

3348872

verified ·

1 Parent(s): d06f9f3

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -59

app.py CHANGED Viewed

@@ -1,76 +1,66 @@
 import gradio as gr
 from TTS.api import TTS
-from pydub import AudioSegment
-import tempfile
 import os
-# ✅ Accept XTTS License Automatically
 os.environ["COQUI_TOS_AGREED"] = "1"
-# ✅ Load XTTS v2 (CPU-friendly)
-tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
-tts.to("cpu")
-# ✅ Convert speaker audio to XTTS-compatible WAV (16-bit, mono, 22.05 kHz)
-def convert_audio_to_wav(audio_file_path):
-    try:
-        sound = AudioSegment.from_file(audio_file_path)
-        duration_seconds = len(sound) / 1000.0
-        if duration_seconds < 2:
-            raise ValueError("Voice sample is too short. Please use at least 2 seconds of clear speech.")
-        sound = sound.set_frame_rate(22050).set_channels(1).set_sample_width(2)  # 16-bit mono 22.05kHz
-        temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
-        sound.export(temp_wav.name, format="wav")
-        return temp_wav.name
-    except Exception as e:
-        raise RuntimeError(f"Audio conversion failed: {str(e)}")
-# ✅ Clone voice
-def clone_voice(text, speaker_audio):
-    if not speaker_audio:
-        return "⚠️ Please upload a voice sample."
-    if len(text.strip()) == 0:
-        return "⚠️ Text input is empty. Please enter something to speak."
-    if len(text) > 500:
-        return "⚠️ Text is too long. Please enter 500 characters or fewer."
     try:
-        speaker_path = speaker_audio if isinstance(speaker_audio, str) else speaker_audio.name
-        speaker_wav = convert_audio_to_wav(speaker_path)
-        output_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
-        print(f"[INFO] Generating voice from: {speaker_wav}")
-        tts.tts_to_file(
-            text=text,
-            speaker_wav=speaker_wav,
-            language="en",
-            file_path=output_path
-        )
-        if os.path.exists(output_path) and os.path.getsize(output_path) > 1000:
-            return output_path
-        else:
-            return "❌ Generation failed: Output audio file is empty."
-    except Exception as e:
-        print(f"[ERROR] {str(e)}")
-        return f"❌ Error: {str(e)}"
-# ✅ Gradio UI
-with gr.Blocks(title="XTTS v2 Voice Cloner") as demo:
-    gr.Markdown("## 🎙️ XTTS v2 - Multilingual Voice Cloner (CPU-friendly)")
-    gr.Markdown("Upload a short voice sample and enter text to generate speech in the same voice.")
-    with gr.Row():
-        text_input = gr.Textbox(label="Text to Speak", placeholder="Enter up to 500 characters...", max_lines=5)
-        audio_input = gr.Audio(label="Voice Sample (MP3 or WAV)", type="filepath")
-    output_audio = gr.Audio(label="🗣️ Generated Voice", type="filepath")
-    generate_btn = gr.Button("🔊 Generate Voice")
-    generate_btn.click(fn=clone_voice, inputs=[text_input, audio_input], outputs=output_audio)
-# ✅ Launch with public link
-demo.launch(share=True)

 import gradio as gr
+import torch
 from TTS.api import TTS
 import os
+import soundfile as sf
 os.environ["COQUI_TOS_AGREED"] = "1"
+# Smart device detection
+use_gpu = torch.cuda.is_available()
+# ✅ XTTS Model Initialization with Proper Error Handling
+try:
+    tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=use_gpu, progress_bar=True)
+    if not hasattr(tts, "synthesizer") or not hasattr(tts.synthesizer, "tts_model"):
+        raise RuntimeError("XTTS model failed to load correctly.")
+    print(f"[INFO] XTTS model loaded successfully. GPU enabled: {use_gpu}")
+except Exception as e:
+    print(f"[ERROR] Failed to initialize XTTS model: {str(e)}")
+    tts = None  # Prevents further crashes
+# ✅ Fixed clone() Function
+def clone(text, audio):
+    if tts is None:
+        return None, "⚠ XTTS model failed to load."
+    if not text or not audio:
+        return None, "⚠ Error: Missing text or audio input."
     try:
+        # ✅ Validate audio input
+        if isinstance(audio, bool) or not isinstance(audio, str) or not os.path.exists(audio):
+            return None, "⚠ Error: Invalid audio input format."
+        output_path = "./output.wav"
+        # ✅ XTTS Processing with Error Handling
+        tts.tts_to_file(text=text, speaker_wav=audio, language="en", file_path=output_path)
+        # ✅ Ensure output file is valid before passing to Gradio
+        if not os.path.exists(output_path) or os.path.getsize(output_path) == 0:
+            return None, "⚠ Error: XTTS failed to generate audio."
+        # ✅ Convert output file format
+        audio_data, samplerate = sf.read(output_path)
+        sf.write(output_path, audio_data, samplerate)
+        return output_path
+    except Exception as e:
+        print(f"[ERROR] XTTS Processing Error: {str(e)}")
+        return None
+# ✅ Fixed Gradio Setup
+iface = gr.Interface(
+    fn=clone,
+    inputs=[
+        gr.Textbox(label='Text'),
+        gr.Audio(type='filepath', label='Voice reference audio file')
+    ],
+    outputs=gr.Audio(type='filepath'),
+    title='Voice Clone",
+    flagging_mode="never",
+    cache_examples=False,
+    theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate"),
+)
+iface.launch()