Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -23,6 +23,26 @@ def forward_gpu(ps, ref_s, speed):
|
|
| 23 |
return models[True](ps, ref_s, speed)
|
| 24 |
|
| 25 |
def generate_first(text, voice='af_heart', speed=1, use_gpu=CUDA_AVAILABLE):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
text = text if CHAR_LIMIT is None else text.strip()[:CHAR_LIMIT]
|
| 27 |
pipeline = pipelines[voice[0]]
|
| 28 |
pack = pipeline.load_voice(voice)
|
|
@@ -189,14 +209,14 @@ with gr.Blocks() as app:
|
|
| 189 |
frankenstein_btn = gr.Button('💀 Frankenstein 📗', variant='secondary')
|
| 190 |
with gr.Column():
|
| 191 |
gr.TabbedInterface([generate_tab, stream_tab], ['Generate', 'Stream'])
|
| 192 |
-
random_btn.click(fn=get_random_quote, inputs=[], outputs=[text], api_name=
|
| 193 |
-
gatsby_btn.click(fn=get_gatsby, inputs=[], outputs=[text], api_name=
|
| 194 |
-
frankenstein_btn.click(fn=get_frankenstein, inputs=[], outputs=[text], api_name=
|
| 195 |
-
generate_btn.click(fn=generate_first, inputs=[text, voice, speed, use_gpu], outputs=[out_audio, out_ps], api_name=
|
| 196 |
-
tokenize_btn.click(fn=tokenize_first, inputs=[text, voice], outputs=[out_ps], api_name=
|
| 197 |
-
stream_event = stream_btn.click(fn=generate_all, inputs=[text, voice, speed, use_gpu], outputs=[out_stream], api_name=
|
| 198 |
stop_btn.click(fn=None, cancels=stream_event)
|
| 199 |
-
predict_btn.click(fn=predict, inputs=[text, voice, speed], outputs=[out_audio], api_name=
|
| 200 |
|
| 201 |
if __name__ == '__main__':
|
| 202 |
app.queue(api_open=API_OPEN).launch(show_api=API_OPEN, ssr_mode=True, mcp_server=True)
|
|
|
|
| 23 |
return models[True](ps, ref_s, speed)
|
| 24 |
|
| 25 |
def generate_first(text, voice='af_heart', speed=1, use_gpu=CUDA_AVAILABLE):
|
| 26 |
+
|
| 27 |
+
"""
|
| 28 |
+
Generate a single audio sample from input text using a specified voice.
|
| 29 |
+
|
| 30 |
+
This function performs text-to-speech synthesis using the Kokoro TTS model.
|
| 31 |
+
It supports both GPU and CPU execution depending on availability and user preference.
|
| 32 |
+
The function tokenizes the input text, selects a reference signal, and performs inference
|
| 33 |
+
to generate and return a waveform as a tuple of sample rate and audio data.
|
| 34 |
+
|
| 35 |
+
Args:
|
| 36 |
+
text: Input text to be synthesized. If CHAR_LIMIT is set, it will be truncated accordingly.
|
| 37 |
+
voice: Voice model identifier to be used for synthesis. Default is 'af_heart'.
|
| 38 |
+
speed: Speed modifier for speech (1 is normal speed).
|
| 39 |
+
use_gpu: Whether to use GPU for inference. Falls back to CPU if GPU is unavailable or if an error occurs.
|
| 40 |
+
|
| 41 |
+
Returns:
|
| 42 |
+
A tuple:
|
| 43 |
+
- (sample_rate, audio): The generated waveform as a NumPy array with sample rate 24000 Hz.
|
| 44 |
+
- ps: The phoneme or token sequence used during synthesis.
|
| 45 |
+
"""
|
| 46 |
text = text if CHAR_LIMIT is None else text.strip()[:CHAR_LIMIT]
|
| 47 |
pipeline = pipelines[voice[0]]
|
| 48 |
pack = pipeline.load_voice(voice)
|
|
|
|
| 209 |
frankenstein_btn = gr.Button('💀 Frankenstein 📗', variant='secondary')
|
| 210 |
with gr.Column():
|
| 211 |
gr.TabbedInterface([generate_tab, stream_tab], ['Generate', 'Stream'])
|
| 212 |
+
random_btn.click(fn=get_random_quote, inputs=[], outputs=[text], api_name=False)
|
| 213 |
+
gatsby_btn.click(fn=get_gatsby, inputs=[], outputs=[text], api_name=False)
|
| 214 |
+
frankenstein_btn.click(fn=get_frankenstein, inputs=[], outputs=[text], api_name=False)
|
| 215 |
+
generate_btn.click(fn=generate_first, inputs=[text, voice, speed, use_gpu], outputs=[out_audio, out_ps], api_name="generate_TTS")
|
| 216 |
+
tokenize_btn.click(fn=tokenize_first, inputs=[text, voice], outputs=[out_ps], api_name=False)
|
| 217 |
+
stream_event = stream_btn.click(fn=generate_all, inputs=[text, voice, speed, use_gpu], outputs=[out_stream], api_name=False)
|
| 218 |
stop_btn.click(fn=None, cancels=stream_event)
|
| 219 |
+
predict_btn.click(fn=predict, inputs=[text, voice, speed], outputs=[out_audio], api_name=False)
|
| 220 |
|
| 221 |
if __name__ == '__main__':
|
| 222 |
app.queue(api_open=API_OPEN).launch(show_api=API_OPEN, ssr_mode=True, mcp_server=True)
|