import gradio as gr import soundfile as sf import numpy as np from kittentts import KittenTTS # Initialize the model model = KittenTTS("KittenML/kitten-tts-nano-0.1") # Available voices AVAILABLE_VOICES = [ 'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f', 'expr-voice-4-m', 'expr-voice-4-f', 'expr-voice-5-m', 'expr-voice-5-f' ] def generate_speech(text, voice): """Generate speech from text using KittenTTS""" if not text.strip(): return None, "Please enter some text to generate speech." try: # Generate audio audio = model.generate(text, voice=voice) # Convert to the format expected by Gradio if len(audio.shape) > 1: audio = audio.mean(axis=1) # Convert stereo to mono if needed # Normalize audio audio = audio / np.max(np.abs(audio)) if np.max(np.abs(audio)) > 0 else audio return audio, f"✅ Successfully generated speech with voice: {voice}" except Exception as e: return None, f"❌ Error generating speech: {str(e)}" # Create the interface with gr.Blocks(title="KittenTTS - High Quality Text-to-Speech") as demo: gr.HTML("""
High Quality Text-to-Speech Generation