import gradio as gr import soundfile as sf import numpy as np from kittentts import KittenTTS # Initialize the model model = KittenTTS("KittenML/kitten-tts-nano-0.1") # Available voices AVAILABLE_VOICES = [ 'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f', 'expr-voice-4-m', 'expr-voice-4-f', 'expr-voice-5-m', 'expr-voice-5-f' ] def generate_speech(text, voice): """Generate speech from text using KittenTTS""" if not text.strip(): return None, "Please enter some text to generate speech." try: # Generate audio audio = model.generate(text, voice=voice) # Convert to the format expected by Gradio if len(audio.shape) > 1: audio = audio.mean(axis=1) # Convert stereo to mono if needed # Normalize audio audio = audio / np.max(np.abs(audio)) if np.max(np.abs(audio)) > 0 else audio return audio, f"✅ Successfully generated speech with voice: {voice}" except Exception as e: return None, f"❌ Error generating speech: {str(e)}" # Create the interface with gr.Blocks(title="KittenTTS - High Quality Text-to-Speech") as demo: gr.HTML("""

🎤 KittenTTS

High Quality Text-to-Speech Generation

""") with gr.Row(): with gr.Column(): text_input = gr.Textbox( label="Enter your text", placeholder="Type or paste your text here...", lines=4 ) voice_dropdown = gr.Dropdown( choices=AVAILABLE_VOICES, value=AVAILABLE_VOICES[1], label="Select Voice" ) generate_btn = gr.Button("🎵 Generate Speech", variant="primary") with gr.Column(): gr.HTML("""

Available Voices:

""") audio_output = gr.Audio(label="Generated Audio") status_output = gr.Textbox(label="Status", interactive=False) # Connect the generate button generate_btn.click( fn=generate_speech, inputs=[text_input, voice_dropdown], outputs=[audio_output, status_output] ) # Auto-generate when text is entered and Enter is pressed text_input.submit( fn=generate_speech, inputs=[text_input, voice_dropdown], outputs=[audio_output, status_output] ) # Launch the demo if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, share=True )