|
import os |
|
|
|
import torch |
|
|
|
from InferenceInterfaces.ToucanTTSInterface import ToucanTTSInterface |
|
|
|
|
|
def read_texts(model_id, sentence, filename, device="cpu", language="eng", speaker_reference=None, duration_scaling_factor=1.0): |
|
tts = ToucanTTSInterface(device=device, tts_model_path=model_id) |
|
tts.set_language(language) |
|
if speaker_reference is not None: |
|
tts.set_utterance_embedding(speaker_reference) |
|
if type(sentence) == str: |
|
sentence = [sentence] |
|
tts.read_to_file(text_list=sentence, file_location=filename, duration_scaling_factor=duration_scaling_factor) |
|
del tts |
|
|
|
|
|
def the_raven(version, model_id="Meta", exec_device="cpu", speaker_reference=None): |
|
os.makedirs("audios", exist_ok=True) |
|
|
|
read_texts(model_id=model_id, |
|
sentence=['Once upon a midnight dreary, while I pondered, weak, and weary,', |
|
'Over many a quaint, and curious volume, of forgotten lore,', |
|
'While I nodded, nearly napping, suddenly, there came a tapping,', |
|
'As of someone gently rapping, rapping at my chamber door.', |
|
'Ah, distinctly, I remember, it was in the bleak December,', |
|
'And each separate dying ember, wrought its ghost upon the floor.', |
|
'Eagerly, I wished the morrow, vainly, I had sought to borrow', |
|
'From my books surcease of sorrow, sorrow, for the lost Lenore,', |
|
'And the silken, sad, uncertain, rustling of each purple curtain', |
|
'Thrilled me, filled me, with fantastic terrors, never felt before.'], |
|
filename=f"audios/{version}_the_raven.wav", |
|
device=exec_device, |
|
language="eng", |
|
speaker_reference=speaker_reference) |
|
|
|
|
|
def sound_of_silence_single_utt(version, model_id="Meta", exec_device="cpu", speaker_reference=None): |
|
os.makedirs("audios", exist_ok=True) |
|
|
|
read_texts(model_id=model_id, |
|
sentence=["""In restless dreams I walked alone, |
|
Narrow streets of cobblestone. |
|
Beneath the halo of a streetlamp, |
|
I turned my collar to the cold and damp, |
|
When my eyes were stabbed, by the flash of a neon light, |
|
That split the night. |
|
And touched the sound, of silence."""], |
|
filename=f"audios/{version}_sound_of_silence_as_single_utterance.wav", |
|
device=exec_device, |
|
language="eng", |
|
speaker_reference=speaker_reference) |
|
|
|
|
|
def die_glocke(version, model_id="Meta", exec_device="cpu", speaker_reference=None): |
|
os.makedirs("audios", exist_ok=True) |
|
|
|
read_texts(model_id=model_id, |
|
sentence=["""Fest gemauert in der Erden, |
|
Steht die Form, aus Lehm gebrannt. |
|
Heute muss die Glocke werden! |
|
Frisch, Gesellen, seid zur Hand!"""], |
|
filename=f"audios/{version}_die_glocke.wav", |
|
device=exec_device, |
|
language="deu", |
|
speaker_reference=speaker_reference) |
|
|
|
|
|
def viet_poem(version, model_id="Meta", exec_device="cpu", speaker_reference=None): |
|
os.makedirs("audios", exist_ok=True) |
|
|
|
read_texts(model_id=model_id, |
|
sentence=["""Thân phận, |
|
ở một nơi luôn phải nhắc mình, |
|
im miệng, |
|
thân phận, |
|
là khi nói về quá khứ, |
|
ngó trước nhìn sau, |
|
là phải biết nhắm mắt bịt tai làm lơ, |
|
thờ ơ, |
|
với tất cả những điều gai chướng, |
|
thân phận chúng tôi ở đó, |
|
những quyển sách chuyền tay nhau như ăn cắp, |
|
ngôn luận ư? |
|
không có đất cho nghĩa tự do."""], |
|
filename=f"audios/{version}_viet_poem.wav", |
|
device=exec_device, |
|
language="vie", |
|
speaker_reference=speaker_reference, |
|
duration_scaling_factor=1.2) |
|
|
|
|
|
if __name__ == '__main__': |
|
exec_device = "cuda" if torch.cuda.is_available() else "cpu" |
|
print(f"running on {exec_device}") |
|
|
|
merged_speaker_references = ["audios/speaker_references/" + ref for ref in os.listdir("audios/speaker_references/")] |
|
|
|
sound_of_silence_single_utt(version="new_voc", |
|
model_id="Meta", |
|
exec_device=exec_device, |
|
speaker_reference=merged_speaker_references) |
|
|
|
die_glocke(version="new_voc", |
|
model_id="Meta", |
|
exec_device=exec_device, |
|
speaker_reference=merged_speaker_references) |
|
|
|
viet_poem(version="new_voc", |
|
model_id="Meta", |
|
exec_device=exec_device, |
|
speaker_reference=merged_speaker_references) |
|
|