File size: 2,794 Bytes
d0b6563
9776097
 
bdeb120
17855f6
3251e7e
156316e
fef87f0
fa8f0f6
 
 
 
9776097
 
 
029f491
9776097
fef87f0
 
 
 
af37368
 
9776097
 
90a8602
 
 
 
 
9776097
6bbdf7b
9776097
 
 
 
 
 
 
 
 
 
 
 
 
d3ac099
db19809
9776097
db19809
9776097
bdeb120
db19809
9776097
 
 
 
 
 
 
 
 
90a8602
17855f6
db19809
3a4982a
3251e7e
fef87f0
9776097
9d42120
3a4982a
 
9d42120
1c717f9
f82b319
 
3a4982a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import gradio as gr
import os
import requests
import torch
import zipfile
from TTS.api import TTS
from pydub import AudioSegment

#download for mecab
os.system('python -m unidic download')


os.environ["COQUI_TOS_AGREED"] = "1"

MODEL_PATH = "tts_models/multilingual/multi-dataset/xtts_v2"
LANGUAGES = ["en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "zh-cn", "ja", "hu", "ko", "hi"]
AUDIO_FORMATS = [".wav", ".mp3", ".flac", ".mp4"]

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

tts = TTS(MODEL_PATH).to(device)

def download_audio_file(url):
    try:
        response = requests.get(url, stream=True)
        file_name = url.split("/")[-1]
        file_extension = os.path.splitext(file_name)[-1].lower()
        if file_extension not in AUDIO_FORMATS:
            raise ValueError(f"Invalid audio file format: {file_extension}")
        with open(file_name, "wb") as f:
            f.write(response.content)  # Write the entire response content at once
        return file_name
    except requests.exceptions.RequestException as e:
        print(f"Error downloading audio file: {e}")
        return None

def convert_to_wav(input_audio_file):
    file_extension = os.path.splitext(input_audio_file)[-1].lower()
    if file_extension!= ".wav":
        audio = AudioSegment.from_file(input_audio_file)
        audio.export("temp.wav", format="wav")
        os.remove(input_audio_file)
        return "temp.wav"
    return input_audio_file

def synthesize_text(text, input_audio_file, language):
    input_audio_file = convert_to_wav(input_audio_file)
    tts.tts_to_file(text=text, speaker_wav=input_audio_file, language=language, file_path="./output.wav")
    return "./output.wav"

def clone(text, input_file, language, url=None, use_url=False):
    if use_url:
        if url is None:
            return None
        input_audio_file = download_audio_file(url)
        if input_audio_file is None:
            return None
    else:
        if input_file is None:
            return None
        input_audio_file = input_file.name

    output_file_path = synthesize_text(text, input_audio_file, language)
    return output_file_path

iface = gr.Interface(
    fn=clone,
    inputs=["text", gr.File(label="Input File", file_types=AUDIO_FORMATS), gr.Dropdown(choices=LANGUAGES, label="Language"), gr.Text(label="URL"), gr.Checkbox(label="Use URL", value=False)],
    outputs=gr.Audio(type='filepath'),
    title='Voice Clone',
    description=""" by [Angetyde](https://youtube.com/@Angetyde?si=7nusP31nTumIkPTF) and [Tony Assi](https://www.tonyassi.com/ ) use this colab with caution <3. """,
    theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate")
)

iface.launch(share=True)