Spaces:

Hematej
/

conqui-TTS-Voice-cloning

Running

File size: 3,988 Bytes

d620cb2
f86f7a4
dad241e
f86f7a4
ed5d162
7c05b07
479df86
 
f86f7a4
479df86
ad241fb
 
f86f7a4
ed5d162
 
 
 
 
 
 
 
fdd8ee2
ed5d162
 
 
 
 
 
 
 
479df86
fdd8ee2
f86f7a4
ed5d162
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f86f7a4
fdd8ee2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed5d162
fdd8ee2
 
ed5d162
fdd8ee2
ed5d162

import gradio as gr
import torch
from TTS.api import TTS
import os
import time # Import time module for measuring performance

# Agree to Coqui Terms of Service
os.environ["COQUI_TOS_AGREED"] = "1"

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Running on: {device}")

# --- Add check for model path before initialization ---
model_path = os.path.expanduser("~/.local/share/tts/tts_models--multilingual--multi-dataset--xtts_v2")
if not os.path.exists(model_path):
    print(f"Model files not found at {model_path}. Initial download will occur.")
else:
    print(f"Model files found at {model_path}. Skipping initial download.")


# Initialize TTS model (XTTS v2)
print("Initializing TTS model...")
try:
    tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=False, gpu=torch.cuda.is_available())
    print("TTS model initialized successfully.")
except Exception as e:
    print(f"Error initializing TTS model: {e}")
    print("This might indicate issues with model files or environment setup.")
    exit() # Exit if model fails to initialize

# Define clone function
def clone(text, audio):
    print(f"Received request: Text='{text[:50]}...' (length {len(text)}), Audio='{audio}'") # Log input
    
    if not os.path.exists(audio):
        print(f"Error: Reference audio file not found at {audio}")
        gr.Warning("Reference audio file not found. Please ensure the path is correct.")
        return None # Return None or handle error appropriately

    start_time = time.time()
    try:
        print("Starting audio generation...")
        tts.tts_to_file(text=text, speaker_wav=audio, language="en", file_path="./output.wav")
        end_time = time.time()
        print(f"Audio generated in {end_time - start_time:.2f} seconds.")
        
        if os.path.exists("./output.wav") and os.path.getsize("./output.wav") > 0:
            print("Output file './output.wav' created successfully.")
            return "./output.wav"
        else:
            print("Warning: Output file is empty or not created.")
            gr.Warning("Audio generation completed, but output file might be empty or not created.")
            return None # Indicate failure to Gradio
    except Exception as e:
        print(f"Error during audio generation: {e}")
        gr.Error(f"An error occurred during audio generation: {e}")
        return None # Indicate failure to Gradio

# Build the Gradio Interface
iface = gr.Interface(
    fn=clone,
    inputs=[
        gr.Textbox(label='Text'),
        gr.Audio(type='filepath', label='Voice reference audio file')
    ],
    outputs=gr.Audio(type='filepath'),
    title='Voice Clone',
    description="""
    by [Tony Assi](https://www.tonyassi.com/)
    This space uses the `xtts_v2` model. **Non-commercial use only**. [Coqui Public Model License](https://coqui.ai/cpml)
    Please ❤️ this Space. <a href="mailto:[email protected]">Email me</a>.
    """,
    theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate"),
    # REMOVE examples caching to prevent FileNotFoundError
    examples=[
        ["Hey! It's me Dorthy, from the Wizard of Oz. Type in whatever you'd like me to say.", "./audio/Wizard-of-Oz-Dorthy.wav"],
        ["It's me Vito Corleone, from the Godfather. Type in whatever you'd like me to say.", "./audio/Godfather.wav"],
        ["Hey, it's me Paris Hilton. Type in whatever you'd like me to say.", "./audio/Paris-Hilton.mp3"],
        ["Hey, it's me Megan Fox from Transformers. Type in whatever you'd like me to say.", "./audio/Megan-Fox.mp3"],
        ["Hey there, it's me Jeff Goldblum. Type in whatever you'd like me to say.", "./audio/Jeff-Goldblum.mp3"],
        ["Hey there, it's me Heath Ledger as the Joker. Type in whatever you'd like me to say.", "./audio/Heath-Ledger.mp3"]
    ],
    cache_examples=False
)

print("\nLaunching Gradio interface...")
iface.launch()
print("Gradio interface launched.")