|
import gradio as gr |
|
import torch |
|
from TTS.api import TTS |
|
import os |
|
import time |
|
|
|
|
|
os.environ["COQUI_TOS_AGREED"] = "1" |
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
print(f"Running on: {device}") |
|
|
|
|
|
model_path = os.path.expanduser("~/.local/share/tts/tts_models--multilingual--multi-dataset--xtts_v2") |
|
if not os.path.exists(model_path): |
|
print(f"Model files not found at {model_path}. Initial download will occur.") |
|
else: |
|
print(f"Model files found at {model_path}. Skipping initial download.") |
|
|
|
|
|
|
|
print("Initializing TTS model...") |
|
try: |
|
tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=False, gpu=torch.cuda.is_available()) |
|
print("TTS model initialized successfully.") |
|
except Exception as e: |
|
print(f"Error initializing TTS model: {e}") |
|
print("This might indicate issues with model files or environment setup.") |
|
exit() |
|
|
|
|
|
def clone(text, audio): |
|
print(f"Received request: Text='{text[:50]}...' (length {len(text)}), Audio='{audio}'") |
|
|
|
if not os.path.exists(audio): |
|
print(f"Error: Reference audio file not found at {audio}") |
|
gr.Warning("Reference audio file not found. Please ensure the path is correct.") |
|
return None |
|
|
|
start_time = time.time() |
|
try: |
|
print("Starting audio generation...") |
|
tts.tts_to_file(text=text, speaker_wav=audio, language="en", file_path="./output.wav") |
|
end_time = time.time() |
|
print(f"Audio generated in {end_time - start_time:.2f} seconds.") |
|
|
|
if os.path.exists("./output.wav") and os.path.getsize("./output.wav") > 0: |
|
print("Output file './output.wav' created successfully.") |
|
return "./output.wav" |
|
else: |
|
print("Warning: Output file is empty or not created.") |
|
gr.Warning("Audio generation completed, but output file might be empty or not created.") |
|
return None |
|
except Exception as e: |
|
print(f"Error during audio generation: {e}") |
|
gr.Error(f"An error occurred during audio generation: {e}") |
|
return None |
|
|
|
|
|
iface = gr.Interface( |
|
fn=clone, |
|
inputs=[ |
|
gr.Textbox(label='Text'), |
|
gr.Audio(type='filepath', label='Voice reference audio file') |
|
], |
|
outputs=gr.Audio(type='filepath'), |
|
title='Voice Clone', |
|
description=""" |
|
by [Tony Assi](https://www.tonyassi.com/) |
|
This space uses the `xtts_v2` model. **Non-commercial use only**. [Coqui Public Model License](https://coqui.ai/cpml) |
|
Please ❤️ this Space. <a href="mailto:[email protected]">Email me</a>. |
|
""", |
|
theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate"), |
|
|
|
examples=[ |
|
["Hey! It's me Dorthy, from the Wizard of Oz. Type in whatever you'd like me to say.", "./audio/Wizard-of-Oz-Dorthy.wav"], |
|
["It's me Vito Corleone, from the Godfather. Type in whatever you'd like me to say.", "./audio/Godfather.wav"], |
|
["Hey, it's me Paris Hilton. Type in whatever you'd like me to say.", "./audio/Paris-Hilton.mp3"], |
|
["Hey, it's me Megan Fox from Transformers. Type in whatever you'd like me to say.", "./audio/Megan-Fox.mp3"], |
|
["Hey there, it's me Jeff Goldblum. Type in whatever you'd like me to say.", "./audio/Jeff-Goldblum.mp3"], |
|
["Hey there, it's me Heath Ledger as the Joker. Type in whatever you'd like me to say.", "./audio/Heath-Ledger.mp3"] |
|
], |
|
cache_examples=False |
|
) |
|
|
|
print("\nLaunching Gradio interface...") |
|
iface.launch() |
|
print("Gradio interface launched.") |