Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,899 Bytes
3f53728 a13c70d 11fdcf5 3062c72 11fdcf5 820c332 29f6b1d 820c332 11fdcf5 a13c70d 3062c72 3f53728 a13c70d 820c332 3062c72 820c332 29f6b1d 3062c72 29f6b1d 820c332 3062c72 820c332 d3b49fc 3062c72 820c332 d3b49fc 820c332 a13c70d 006f2a8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import spaces
import gradio as gr
import torch
from TTS.api import TTS
from TTS.tts.utils.text.tokenizer import TTSTokenizer
from TTS.tts.utils.text.phonemizer import Phonemizer
import os
import json
import scipy.io.wavfile as wavfile
import numpy as np
os.environ["COQUI_TOS_AGREED"] = "1"
device = "cuda"
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
# Inicjalizacja tokenizera i fonemizera
tokenizer = TTSTokenizer(use_phonemes=False)
phonemizer = Phonemizer()
@spaces.GPU(enable_queue=True)
def clone(text, audio):
# Generowanie mowy
wav = tts.tts(text=text, speaker_wav=audio, language="pl")
# Konwersja do numpy array i zapisanie jako plik WAV
wav_np = np.array(wav)
wavfile.write("./output.wav", 24000, (wav_np * 32767).astype(np.int16))
# Przetwarzanie tekstu na fonemy
tokens = tokenizer.text_to_ids(text)
phonemes = phonemizer.phonemize(tokens, language="pl")
# Przygotowanie informacji o fonemach
phonemes_data = []
for i, phoneme in enumerate(phonemes):
phonemes_data.append({
"phoneme": phoneme,
"index": i
})
# Zapisywanie informacji o fonemach do pliku JSON
with open("./phonemes_info.json", "w", encoding="utf-8") as f:
json.dump(phonemes_data, f, ensure_ascii=False, indent=2)
return "./output.wav", "./phonemes_info.json"
# Interfejs Gradio
iface = gr.Interface(
fn=clone,
inputs=[
gr.Textbox(label='Tekst do syntezy'),
gr.Audio(type='filepath', label='Plik audio z głosem referencyjnym')
],
outputs=[
gr.Audio(type='filepath', label='Zsyntezowana mowa'),
gr.File(label='Informacje o fonemach (JSON)')
],
title='Klonowanie Głosu z Informacjami o Fonemach',
theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate")
)
iface.launch() |