khulnasoft's picture
Upload 76 files
873d0cf verified
try:
from ..llm import *
from ..utils.db import *
from .tts_providers.openai import tts_openai
from .tts_providers.microsoft_local import tts_microsoft_local
except ImportError:
from llm import *
from utils.db import *
from audio.tts_providers.openai import tts_openai
from audio.tts_providers.microsoft_local import tts_microsoft_local
import os
import hashlib
import random
import threading
def is_local_tts_available():
try:
return True
except:
return False
def is_openai_tts_available():
the_model = load_model_settings()
if llm_settings[the_model]["provider"] == "openai":
if load_api_key() != "CHANGE_ME":
return True
return False
supported_openai_speakers = ["fable"]
def random_model(exclude):
models = supported_openai_speakers.copy()
models.remove(exclude)
return random.choice(models)
def generate_speech_chunk(text_chunk, index, voice, results):
sha = hashlib.sha256(text_chunk.encode()).hexdigest()
location = os.path.join(artifacts_dir, f"{sha}.mp3")
if os.path.exists(location):
results[index] = location
else:
the_model = load_model_settings()
tts_setting = load_tts_model_settings()
if tts_setting == "openai":
tts_openai(voice, text_chunk, location)
if tts_setting == "microsoft_local":
if not is_local_tts_available():
print("Please install gpt-computer-agent[local_tts] to use local TTS")
else:
tts_microsoft_local(text_chunk, location)
results[index] = location
def split_text_to_sentences(text, max_chunk_size=300):
"""Splits text into sentences and ensures chunks do not exceed max_chunk_size."""
sentences = text.split(".")
chunks = []
current_chunk = ""
for sentence in sentences:
sentence = sentence.strip()
if len(current_chunk) + len(sentence) + 1 <= max_chunk_size:
current_chunk += sentence + ". "
else:
chunks.append(current_chunk.strip())
current_chunk = sentence + ". "
if current_chunk:
chunks.append(current_chunk.strip())
return chunks
def text_to_speech(text):
text_chunks = split_text_to_sentences(text)
threads = []
results = [None] * len(text_chunks)
initial_voice = random.choice(supported_openai_speakers)
for i, chunk in enumerate(text_chunks):
voice = (
initial_voice if i % 2 == 0 else random_model(initial_voice)
) # Alternate voices
thread = threading.Thread(
target=generate_speech_chunk, args=(chunk, i, voice, results)
)
threads.append(thread)
thread.start()
for thread in threads:
thread.join()
mp3_files = [result for result in results if result is not None]
return mp3_files