from TTS.api import TTS import numpy as np import simpleaudio as sa import torch # Check if GPU is available if torch.cuda.is_available(): device = "cuda" else: device = "cpu" # Initialize the TTS object tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=True) tts.to(device) # Use GPU if available # Define a function to simulate streaming def stream_tts(text, speaker="Ana Florence", language="en", chunk_size=20): # Split the text into smaller chunks words = text.split() chunks = [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)] # Generate and play each chunk for chunk in chunks: print(f"Processing chunk: {chunk}") audio = tts.tts( text=chunk, speaker=speaker, language=language ) # Convert the audio to a playable format audio_data = np.array(audio, dtype=np.float32) audio_data = (audio_data * 32767).astype(np.int16) # Convert to 16-bit PCM play_obj = sa.play_buffer(audio_data, 1, 2, tts.synthesizer.output_sample_rate) play_obj.wait_done() # Wait for the current chunk to finish playing # Continuous loop for text input print("Enter text to generate speech. Type 'exit' to quit.") while True: # Get user input text = input("Enter text: ") # Exit the loop if the user types 'exit' if text.lower() == "exit": print("Exiting...") break # Stream the TTS print("Streaming speech...") stream_tts(text, speaker="Ana Florence", language="en") print("Streaming finished.")