File size: 3,988 Bytes
d620cb2
f86f7a4
dad241e
f86f7a4
ed5d162
7c05b07
479df86
 
f86f7a4
479df86
ad241fb
 
f86f7a4
ed5d162
 
 
 
 
 
 
 
fdd8ee2
ed5d162
 
 
 
 
 
 
 
479df86
fdd8ee2
f86f7a4
ed5d162
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f86f7a4
fdd8ee2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed5d162
fdd8ee2
 
ed5d162
fdd8ee2
ed5d162
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import gradio as gr
import torch
from TTS.api import TTS
import os
import time # Import time module for measuring performance

# Agree to Coqui Terms of Service
os.environ["COQUI_TOS_AGREED"] = "1"

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Running on: {device}")

# --- Add check for model path before initialization ---
model_path = os.path.expanduser("~/.local/share/tts/tts_models--multilingual--multi-dataset--xtts_v2")
if not os.path.exists(model_path):
    print(f"Model files not found at {model_path}. Initial download will occur.")
else:
    print(f"Model files found at {model_path}. Skipping initial download.")


# Initialize TTS model (XTTS v2)
print("Initializing TTS model...")
try:
    tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=False, gpu=torch.cuda.is_available())
    print("TTS model initialized successfully.")
except Exception as e:
    print(f"Error initializing TTS model: {e}")
    print("This might indicate issues with model files or environment setup.")
    exit() # Exit if model fails to initialize

# Define clone function
def clone(text, audio):
    print(f"Received request: Text='{text[:50]}...' (length {len(text)}), Audio='{audio}'") # Log input
    
    if not os.path.exists(audio):
        print(f"Error: Reference audio file not found at {audio}")
        gr.Warning("Reference audio file not found. Please ensure the path is correct.")
        return None # Return None or handle error appropriately

    start_time = time.time()
    try:
        print("Starting audio generation...")
        tts.tts_to_file(text=text, speaker_wav=audio, language="en", file_path="./output.wav")
        end_time = time.time()
        print(f"Audio generated in {end_time - start_time:.2f} seconds.")
        
        if os.path.exists("./output.wav") and os.path.getsize("./output.wav") > 0:
            print("Output file './output.wav' created successfully.")
            return "./output.wav"
        else:
            print("Warning: Output file is empty or not created.")
            gr.Warning("Audio generation completed, but output file might be empty or not created.")
            return None # Indicate failure to Gradio
    except Exception as e:
        print(f"Error during audio generation: {e}")
        gr.Error(f"An error occurred during audio generation: {e}")
        return None # Indicate failure to Gradio

# Build the Gradio Interface
iface = gr.Interface(
    fn=clone,
    inputs=[
        gr.Textbox(label='Text'),
        gr.Audio(type='filepath', label='Voice reference audio file')
    ],
    outputs=gr.Audio(type='filepath'),
    title='Voice Clone',
    description="""
    by [Tony Assi](https://www.tonyassi.com/)
    This space uses the `xtts_v2` model. **Non-commercial use only**. [Coqui Public Model License](https://coqui.ai/cpml)
    Please ❤️ this Space. <a href="mailto:[email protected]">Email me</a>.
    """,
    theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate"),
    # REMOVE examples caching to prevent FileNotFoundError
    examples=[
        ["Hey! It's me Dorthy, from the Wizard of Oz. Type in whatever you'd like me to say.", "./audio/Wizard-of-Oz-Dorthy.wav"],
        ["It's me Vito Corleone, from the Godfather. Type in whatever you'd like me to say.", "./audio/Godfather.wav"],
        ["Hey, it's me Paris Hilton. Type in whatever you'd like me to say.", "./audio/Paris-Hilton.mp3"],
        ["Hey, it's me Megan Fox from Transformers. Type in whatever you'd like me to say.", "./audio/Megan-Fox.mp3"],
        ["Hey there, it's me Jeff Goldblum. Type in whatever you'd like me to say.", "./audio/Jeff-Goldblum.mp3"],
        ["Hey there, it's me Heath Ledger as the Joker. Type in whatever you'd like me to say.", "./audio/Heath-Ledger.mp3"]
    ],
    cache_examples=False
)

print("\nLaunching Gradio interface...")
iface.launch()
print("Gradio interface launched.")