khulnasoft's picture
Upload 76 files
873d0cf verified
import soundfile as sf
synthesiser_ = None
def synthesiser():
from transformers import pipeline
global synthesiser_
if synthesiser_ is None:
synthesiser_ = pipeline("text-to-speech", "microsoft/speecht5_tts")
return synthesiser_
embeddings_dataset_ = None
def embeddings_dataset():
from datasets import load_dataset
global embeddings_dataset_
if embeddings_dataset_ is None:
embeddings_dataset_ = load_dataset(
"Matthijs/cmu-arctic-xvectors", split="validation"
)
return embeddings_dataset_
speaker_embedding_ = None
def speaker_embedding():
import torch
global speaker_embedding_
if speaker_embedding_ is None:
speaker_embedding_ = torch.tensor(
embeddings_dataset()[7306]["xvector"]
).unsqueeze(0)
return speaker_embedding_
def preload_tts_microsoft_local():
synthesiser()
embeddings_dataset()
speaker_embedding()
def tts_microsoft_local(text_chunk, location):
speech = synthesiser()(
text_chunk, forward_params={"speaker_embeddings": speaker_embedding()}
)
sf.write(location, speech["audio"], samplerate=speech["sampling_rate"])
return location