Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -23,6 +23,26 @@ def forward_gpu(ps, ref_s, speed):
|
|
23 |
return models[True](ps, ref_s, speed)
|
24 |
|
25 |
def generate_first(text, voice='af_heart', speed=1, use_gpu=CUDA_AVAILABLE):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
text = text if CHAR_LIMIT is None else text.strip()[:CHAR_LIMIT]
|
27 |
pipeline = pipelines[voice[0]]
|
28 |
pack = pipeline.load_voice(voice)
|
@@ -189,14 +209,14 @@ with gr.Blocks() as app:
|
|
189 |
frankenstein_btn = gr.Button('💀 Frankenstein 📗', variant='secondary')
|
190 |
with gr.Column():
|
191 |
gr.TabbedInterface([generate_tab, stream_tab], ['Generate', 'Stream'])
|
192 |
-
random_btn.click(fn=get_random_quote, inputs=[], outputs=[text], api_name=
|
193 |
-
gatsby_btn.click(fn=get_gatsby, inputs=[], outputs=[text], api_name=
|
194 |
-
frankenstein_btn.click(fn=get_frankenstein, inputs=[], outputs=[text], api_name=
|
195 |
-
generate_btn.click(fn=generate_first, inputs=[text, voice, speed, use_gpu], outputs=[out_audio, out_ps], api_name=
|
196 |
-
tokenize_btn.click(fn=tokenize_first, inputs=[text, voice], outputs=[out_ps], api_name=
|
197 |
-
stream_event = stream_btn.click(fn=generate_all, inputs=[text, voice, speed, use_gpu], outputs=[out_stream], api_name=
|
198 |
stop_btn.click(fn=None, cancels=stream_event)
|
199 |
-
predict_btn.click(fn=predict, inputs=[text, voice, speed], outputs=[out_audio], api_name=
|
200 |
|
201 |
if __name__ == '__main__':
|
202 |
app.queue(api_open=API_OPEN).launch(show_api=API_OPEN, ssr_mode=True, mcp_server=True)
|
|
|
23 |
return models[True](ps, ref_s, speed)
|
24 |
|
25 |
def generate_first(text, voice='af_heart', speed=1, use_gpu=CUDA_AVAILABLE):
|
26 |
+
|
27 |
+
"""
|
28 |
+
Generate a single audio sample from input text using a specified voice.
|
29 |
+
|
30 |
+
This function performs text-to-speech synthesis using the Kokoro TTS model.
|
31 |
+
It supports both GPU and CPU execution depending on availability and user preference.
|
32 |
+
The function tokenizes the input text, selects a reference signal, and performs inference
|
33 |
+
to generate and return a waveform as a tuple of sample rate and audio data.
|
34 |
+
|
35 |
+
Args:
|
36 |
+
text: Input text to be synthesized. If CHAR_LIMIT is set, it will be truncated accordingly.
|
37 |
+
voice: Voice model identifier to be used for synthesis. Default is 'af_heart'.
|
38 |
+
speed: Speed modifier for speech (1 is normal speed).
|
39 |
+
use_gpu: Whether to use GPU for inference. Falls back to CPU if GPU is unavailable or if an error occurs.
|
40 |
+
|
41 |
+
Returns:
|
42 |
+
A tuple:
|
43 |
+
- (sample_rate, audio): The generated waveform as a NumPy array with sample rate 24000 Hz.
|
44 |
+
- ps: The phoneme or token sequence used during synthesis.
|
45 |
+
"""
|
46 |
text = text if CHAR_LIMIT is None else text.strip()[:CHAR_LIMIT]
|
47 |
pipeline = pipelines[voice[0]]
|
48 |
pack = pipeline.load_voice(voice)
|
|
|
209 |
frankenstein_btn = gr.Button('💀 Frankenstein 📗', variant='secondary')
|
210 |
with gr.Column():
|
211 |
gr.TabbedInterface([generate_tab, stream_tab], ['Generate', 'Stream'])
|
212 |
+
random_btn.click(fn=get_random_quote, inputs=[], outputs=[text], api_name=False)
|
213 |
+
gatsby_btn.click(fn=get_gatsby, inputs=[], outputs=[text], api_name=False)
|
214 |
+
frankenstein_btn.click(fn=get_frankenstein, inputs=[], outputs=[text], api_name=False)
|
215 |
+
generate_btn.click(fn=generate_first, inputs=[text, voice, speed, use_gpu], outputs=[out_audio, out_ps], api_name="generate_TTS")
|
216 |
+
tokenize_btn.click(fn=tokenize_first, inputs=[text, voice], outputs=[out_ps], api_name=False)
|
217 |
+
stream_event = stream_btn.click(fn=generate_all, inputs=[text, voice, speed, use_gpu], outputs=[out_stream], api_name=False)
|
218 |
stop_btn.click(fn=None, cancels=stream_event)
|
219 |
+
predict_btn.click(fn=predict, inputs=[text, voice, speed], outputs=[out_audio], api_name=False)
|
220 |
|
221 |
if __name__ == '__main__':
|
222 |
app.queue(api_open=API_OPEN).launch(show_api=API_OPEN, ssr_mode=True, mcp_server=True)
|