Spaces:
Running
Running
File size: 2,794 Bytes
d0b6563 9776097 bdeb120 17855f6 3251e7e 156316e fef87f0 fa8f0f6 9776097 029f491 9776097 fef87f0 af37368 9776097 90a8602 9776097 6bbdf7b 9776097 d3ac099 db19809 9776097 db19809 9776097 bdeb120 db19809 9776097 90a8602 17855f6 db19809 3a4982a 3251e7e fef87f0 9776097 9d42120 3a4982a 9d42120 1c717f9 f82b319 3a4982a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import gradio as gr
import os
import requests
import torch
import zipfile
from TTS.api import TTS
from pydub import AudioSegment
#download for mecab
os.system('python -m unidic download')
os.environ["COQUI_TOS_AGREED"] = "1"
MODEL_PATH = "tts_models/multilingual/multi-dataset/xtts_v2"
LANGUAGES = ["en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "zh-cn", "ja", "hu", "ko", "hi"]
AUDIO_FORMATS = [".wav", ".mp3", ".flac", ".mp4"]
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
tts = TTS(MODEL_PATH).to(device)
def download_audio_file(url):
try:
response = requests.get(url, stream=True)
file_name = url.split("/")[-1]
file_extension = os.path.splitext(file_name)[-1].lower()
if file_extension not in AUDIO_FORMATS:
raise ValueError(f"Invalid audio file format: {file_extension}")
with open(file_name, "wb") as f:
f.write(response.content) # Write the entire response content at once
return file_name
except requests.exceptions.RequestException as e:
print(f"Error downloading audio file: {e}")
return None
def convert_to_wav(input_audio_file):
file_extension = os.path.splitext(input_audio_file)[-1].lower()
if file_extension!= ".wav":
audio = AudioSegment.from_file(input_audio_file)
audio.export("temp.wav", format="wav")
os.remove(input_audio_file)
return "temp.wav"
return input_audio_file
def synthesize_text(text, input_audio_file, language):
input_audio_file = convert_to_wav(input_audio_file)
tts.tts_to_file(text=text, speaker_wav=input_audio_file, language=language, file_path="./output.wav")
return "./output.wav"
def clone(text, input_file, language, url=None, use_url=False):
if use_url:
if url is None:
return None
input_audio_file = download_audio_file(url)
if input_audio_file is None:
return None
else:
if input_file is None:
return None
input_audio_file = input_file.name
output_file_path = synthesize_text(text, input_audio_file, language)
return output_file_path
iface = gr.Interface(
fn=clone,
inputs=["text", gr.File(label="Input File", file_types=AUDIO_FORMATS), gr.Dropdown(choices=LANGUAGES, label="Language"), gr.Text(label="URL"), gr.Checkbox(label="Use URL", value=False)],
outputs=gr.Audio(type='filepath'),
title='Voice Clone',
description=""" by [Angetyde](https://youtube.com/@Angetyde?si=7nusP31nTumIkPTF) and [Tony Assi](https://www.tonyassi.com/ ) use this colab with caution <3. """,
theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate")
)
iface.launch(share=True) |