import spaces import gradio as gr from kokoro import KPipeline import soundfile as sf import tempfile # Set up the Kokoro pipeline (choose your language code) pipeline = KPipeline(lang_code='a') # 'a' = American English, see docs for more @spaces.GPU def kokoro_tts(text: str, voice: str = "af_heart", speed: float = 1.0) -> str: """ Generate speech audio from text using Kokoro TTS. Args: text: The input text to synthesize. voice: The Kokoro voice name (e.g., 'af_heart'). speed: Speech speed (default 1.0). Returns: Path to the generated WAV audio file. """ # Generate audio using Kokoro generator = pipeline(text, voice=voice, speed=speed) for i, (_, _, audio) in enumerate(generator): # Save the first audio chunk as a WAV file with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f: sf.write(f.name, audio, 24000) return f.name # Return file path for Gradio to serve # List some example voices voice_options = ["af_heart", "af_bella", "af_aria", "af_riley", "af_ryan"] description = """ Enter text to synthesize speech using Kokoro TTS.
**Voices:** af_heart, af_bella, af_aria, af_riley, af_ryan
**Languages:** American English (default), see Kokoro docs for more.
**Speed:** 1.0 = normal, lower = slower, higher = faster.
""" demo = gr.Interface( fn=kokoro_tts, inputs=[ gr.Textbox(label="Text"), gr.Dropdown(voice_options, label="Voice", value="af_heart"), gr.Slider(0.5, 2.0, value=1.0, label="Speed") ], outputs=gr.Audio(type="filepath", label="Generated Speech"), title="Kokoro TTS MCP Server", description=description ) if __name__ == "__main__": demo.launch(mcp_server=True)