|
from TTS.api import TTS
|
|
import numpy as np
|
|
import simpleaudio as sa
|
|
import torch
|
|
|
|
|
|
if torch.cuda.is_available():
|
|
device = "cuda"
|
|
else:
|
|
device = "cpu"
|
|
|
|
|
|
tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=True)
|
|
tts.to(device)
|
|
|
|
|
|
def stream_tts(text, speaker="Ana Florence", language="en", chunk_size=20):
|
|
|
|
words = text.split()
|
|
chunks = [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
|
|
|
|
|
|
for chunk in chunks:
|
|
print(f"Processing chunk: {chunk}")
|
|
audio = tts.tts(
|
|
text=chunk,
|
|
speaker=speaker,
|
|
language=language
|
|
)
|
|
|
|
|
|
audio_data = np.array(audio, dtype=np.float32)
|
|
audio_data = (audio_data * 32767).astype(np.int16)
|
|
play_obj = sa.play_buffer(audio_data, 1, 2, tts.synthesizer.output_sample_rate)
|
|
play_obj.wait_done()
|
|
|
|
|
|
print("Enter text to generate speech. Type 'exit' to quit.")
|
|
while True:
|
|
|
|
text = input("Enter text: ")
|
|
|
|
|
|
if text.lower() == "exit":
|
|
print("Exiting...")
|
|
break
|
|
|
|
|
|
print("Streaming speech...")
|
|
stream_tts(text, speaker="Ana Florence", language="en")
|
|
print("Streaming finished.") |