import spaces
import gradio as gr
from kokoro import KPipeline
import soundfile as sf
import tempfile
# Set up the Kokoro pipeline (choose your language code)
pipeline = KPipeline(lang_code='a') # 'a' = American English, see docs for more
@spaces.GPU
def kokoro_tts(text: str, voice: str = "af_heart", speed: float = 1.0) -> str:
"""
Generate speech audio from text using Kokoro TTS.
Args:
text: The input text to synthesize.
voice: The Kokoro voice name (e.g., 'af_heart').
speed: Speech speed (default 1.0).
Returns:
Path to the generated WAV audio file.
"""
# Generate audio using Kokoro
generator = pipeline(text, voice=voice, speed=speed)
for i, (_, _, audio) in enumerate(generator):
# Save the first audio chunk as a WAV file
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
sf.write(f.name, audio, 24000)
return f.name # Return file path for Gradio to serve
# List some example voices
voice_options = ["af_heart", "af_bella", "af_aria", "af_riley", "af_ryan"]
description = """
Enter text to synthesize speech using Kokoro TTS.
**Voices:** af_heart, af_bella, af_aria, af_riley, af_ryan
**Languages:** American English (default), see Kokoro docs for more.
**Speed:** 1.0 = normal, lower = slower, higher = faster.
"""
demo = gr.Interface(
fn=kokoro_tts,
inputs=[
gr.Textbox(label="Text"),
gr.Dropdown(voice_options, label="Voice", value="af_heart"),
gr.Slider(0.5, 2.0, value=1.0, label="Speed")
],
outputs=gr.Audio(type="filepath", label="Generated Speech"),
title="Kokoro TTS MCP Server",
description=description
)
if __name__ == "__main__":
demo.launch(mcp_server=True)