Kokoro-TTS-mcp-test

Running on Zero

App Files Files Community

ysharma HF Staff commited on Jul 30

Commit

5cc7a53

verified ·

1 Parent(s): efc3bf4

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -7

app.py CHANGED Viewed

@@ -23,6 +23,26 @@ def forward_gpu(ps, ref_s, speed):
     return models[True](ps, ref_s, speed)
 def generate_first(text, voice='af_heart', speed=1, use_gpu=CUDA_AVAILABLE):
     text = text if CHAR_LIMIT is None else text.strip()[:CHAR_LIMIT]
     pipeline = pipelines[voice[0]]
     pack = pipeline.load_voice(voice)
@@ -189,14 +209,14 @@ with gr.Blocks() as app:
                 frankenstein_btn = gr.Button('💀 Frankenstein 📗', variant='secondary')
         with gr.Column():
             gr.TabbedInterface([generate_tab, stream_tab], ['Generate', 'Stream'])
-    random_btn.click(fn=get_random_quote, inputs=[], outputs=[text], api_name=API_NAME)
-    gatsby_btn.click(fn=get_gatsby, inputs=[], outputs=[text], api_name=API_NAME)
-    frankenstein_btn.click(fn=get_frankenstein, inputs=[], outputs=[text], api_name=API_NAME)
-    generate_btn.click(fn=generate_first, inputs=[text, voice, speed, use_gpu], outputs=[out_audio, out_ps], api_name=API_NAME)
-    tokenize_btn.click(fn=tokenize_first, inputs=[text, voice], outputs=[out_ps], api_name=API_NAME)
-    stream_event = stream_btn.click(fn=generate_all, inputs=[text, voice, speed, use_gpu], outputs=[out_stream], api_name=API_NAME)
     stop_btn.click(fn=None, cancels=stream_event)
-    predict_btn.click(fn=predict, inputs=[text, voice, speed], outputs=[out_audio], api_name=API_NAME)
 if __name__ == '__main__':
     app.queue(api_open=API_OPEN).launch(show_api=API_OPEN, ssr_mode=True, mcp_server=True)

     return models[True](ps, ref_s, speed)
 def generate_first(text, voice='af_heart', speed=1, use_gpu=CUDA_AVAILABLE):
+    """
+    Generate a single audio sample from input text using a specified voice.
+    This function performs text-to-speech synthesis using the Kokoro TTS model.
+    It supports both GPU and CPU execution depending on availability and user preference.
+    The function tokenizes the input text, selects a reference signal, and performs inference
+    to generate and return a waveform as a tuple of sample rate and audio data.
+    Args:
+        text: Input text to be synthesized. If CHAR_LIMIT is set, it will be truncated accordingly.
+        voice: Voice model identifier to be used for synthesis. Default is 'af_heart'.
+        speed: Speed modifier for speech (1 is normal speed).
+        use_gpu: Whether to use GPU for inference. Falls back to CPU if GPU is unavailable or if an error occurs.
+    Returns:
+        A tuple:
+            - (sample_rate, audio): The generated waveform as a NumPy array with sample rate 24000 Hz.
+            - ps: The phoneme or token sequence used during synthesis.
+    """
     text = text if CHAR_LIMIT is None else text.strip()[:CHAR_LIMIT]
     pipeline = pipelines[voice[0]]
     pack = pipeline.load_voice(voice)
                 frankenstein_btn = gr.Button('💀 Frankenstein 📗', variant='secondary')
         with gr.Column():
             gr.TabbedInterface([generate_tab, stream_tab], ['Generate', 'Stream'])
+    random_btn.click(fn=get_random_quote, inputs=[], outputs=[text], api_name=False)
+    gatsby_btn.click(fn=get_gatsby, inputs=[], outputs=[text], api_name=False)
+    frankenstein_btn.click(fn=get_frankenstein, inputs=[], outputs=[text], api_name=False)
+    generate_btn.click(fn=generate_first, inputs=[text, voice, speed, use_gpu], outputs=[out_audio, out_ps], api_name="generate_TTS")
+    tokenize_btn.click(fn=tokenize_first, inputs=[text, voice], outputs=[out_ps], api_name=False)
+    stream_event = stream_btn.click(fn=generate_all, inputs=[text, voice, speed, use_gpu], outputs=[out_stream], api_name=False)
     stop_btn.click(fn=None, cancels=stream_event)
+    predict_btn.click(fn=predict, inputs=[text, voice, speed], outputs=[out_audio], api_name=False)
 if __name__ == '__main__':
     app.queue(api_open=API_OPEN).launch(show_api=API_OPEN, ssr_mode=True, mcp_server=True)