ysharma HF Staff commited on
Commit
5cc7a53
·
verified ·
1 Parent(s): efc3bf4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -7
app.py CHANGED
@@ -23,6 +23,26 @@ def forward_gpu(ps, ref_s, speed):
23
  return models[True](ps, ref_s, speed)
24
 
25
  def generate_first(text, voice='af_heart', speed=1, use_gpu=CUDA_AVAILABLE):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  text = text if CHAR_LIMIT is None else text.strip()[:CHAR_LIMIT]
27
  pipeline = pipelines[voice[0]]
28
  pack = pipeline.load_voice(voice)
@@ -189,14 +209,14 @@ with gr.Blocks() as app:
189
  frankenstein_btn = gr.Button('💀 Frankenstein 📗', variant='secondary')
190
  with gr.Column():
191
  gr.TabbedInterface([generate_tab, stream_tab], ['Generate', 'Stream'])
192
- random_btn.click(fn=get_random_quote, inputs=[], outputs=[text], api_name=API_NAME)
193
- gatsby_btn.click(fn=get_gatsby, inputs=[], outputs=[text], api_name=API_NAME)
194
- frankenstein_btn.click(fn=get_frankenstein, inputs=[], outputs=[text], api_name=API_NAME)
195
- generate_btn.click(fn=generate_first, inputs=[text, voice, speed, use_gpu], outputs=[out_audio, out_ps], api_name=API_NAME)
196
- tokenize_btn.click(fn=tokenize_first, inputs=[text, voice], outputs=[out_ps], api_name=API_NAME)
197
- stream_event = stream_btn.click(fn=generate_all, inputs=[text, voice, speed, use_gpu], outputs=[out_stream], api_name=API_NAME)
198
  stop_btn.click(fn=None, cancels=stream_event)
199
- predict_btn.click(fn=predict, inputs=[text, voice, speed], outputs=[out_audio], api_name=API_NAME)
200
 
201
  if __name__ == '__main__':
202
  app.queue(api_open=API_OPEN).launch(show_api=API_OPEN, ssr_mode=True, mcp_server=True)
 
23
  return models[True](ps, ref_s, speed)
24
 
25
  def generate_first(text, voice='af_heart', speed=1, use_gpu=CUDA_AVAILABLE):
26
+
27
+ """
28
+ Generate a single audio sample from input text using a specified voice.
29
+
30
+ This function performs text-to-speech synthesis using the Kokoro TTS model.
31
+ It supports both GPU and CPU execution depending on availability and user preference.
32
+ The function tokenizes the input text, selects a reference signal, and performs inference
33
+ to generate and return a waveform as a tuple of sample rate and audio data.
34
+
35
+ Args:
36
+ text: Input text to be synthesized. If CHAR_LIMIT is set, it will be truncated accordingly.
37
+ voice: Voice model identifier to be used for synthesis. Default is 'af_heart'.
38
+ speed: Speed modifier for speech (1 is normal speed).
39
+ use_gpu: Whether to use GPU for inference. Falls back to CPU if GPU is unavailable or if an error occurs.
40
+
41
+ Returns:
42
+ A tuple:
43
+ - (sample_rate, audio): The generated waveform as a NumPy array with sample rate 24000 Hz.
44
+ - ps: The phoneme or token sequence used during synthesis.
45
+ """
46
  text = text if CHAR_LIMIT is None else text.strip()[:CHAR_LIMIT]
47
  pipeline = pipelines[voice[0]]
48
  pack = pipeline.load_voice(voice)
 
209
  frankenstein_btn = gr.Button('💀 Frankenstein 📗', variant='secondary')
210
  with gr.Column():
211
  gr.TabbedInterface([generate_tab, stream_tab], ['Generate', 'Stream'])
212
+ random_btn.click(fn=get_random_quote, inputs=[], outputs=[text], api_name=False)
213
+ gatsby_btn.click(fn=get_gatsby, inputs=[], outputs=[text], api_name=False)
214
+ frankenstein_btn.click(fn=get_frankenstein, inputs=[], outputs=[text], api_name=False)
215
+ generate_btn.click(fn=generate_first, inputs=[text, voice, speed, use_gpu], outputs=[out_audio, out_ps], api_name="generate_TTS")
216
+ tokenize_btn.click(fn=tokenize_first, inputs=[text, voice], outputs=[out_ps], api_name=False)
217
+ stream_event = stream_btn.click(fn=generate_all, inputs=[text, voice, speed, use_gpu], outputs=[out_stream], api_name=False)
218
  stop_btn.click(fn=None, cancels=stream_event)
219
+ predict_btn.click(fn=predict, inputs=[text, voice, speed], outputs=[out_audio], api_name=False)
220
 
221
  if __name__ == '__main__':
222
  app.queue(api_open=API_OPEN).launch(show_api=API_OPEN, ssr_mode=True, mcp_server=True)