Spaces:
Zandintel
/
Runtime error

Zandintel commited on
Commit
669ae84
·
verified ·
1 Parent(s): 3755a93

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -70
app.py CHANGED
@@ -11,9 +11,6 @@ import torch
11
  import torchaudio
12
  import gradio as gr
13
  from os import getenv
14
- import io
15
- import numpy as np
16
- import scipy.io.wavfile as wavfile
17
 
18
  from zonos.model import Zonos
19
  from zonos.conditioning import make_cond_dict, supported_language_codes
@@ -167,8 +164,7 @@ def generate_audio(
167
  estimated_total_steps = int(estimated_generation_duration * 86)
168
 
169
  def update_progress(_frame: torch.Tensor, step: int, _total_steps: int) -> bool:
170
- if progress is not None:
171
- progress((step, estimated_total_steps))
172
  return True
173
 
174
  codes = selected_model.generate(
@@ -188,34 +184,6 @@ def generate_audio(
188
  return (sr_out, wav_out.squeeze().numpy()), seed
189
 
190
 
191
- # Define a simpler version of the API function for the API tab
192
- @spaces.GPU(duration=120)
193
- def simple_api_generate_speech(text, language="en-us"):
194
- """Simple API endpoint for TTS generation with default parameters."""
195
- # Set default emotion values
196
- e1, e2, e3, e4 = 1.0, 0.05, 0.05, 0.05
197
- e5, e6, e7, e8 = 0.05, 0.05, 0.1, 0.2
198
-
199
- # Use the existing generate_audio function with fixed parameters
200
- (sr, audio_output), _ = generate_audio(
201
- model_choice="Zyphra/Zonos-v0.1-transformer",
202
- text=text,
203
- language=language,
204
- speaker_audio=None,
205
- prefix_audio=None,
206
- e1=e1, e2=e2, e3=e3, e4=e4,
207
- e5=e5, e6=e6, e7=e7, e8=e8,
208
- vq_single=0.78, fmax=24000, pitch_std=45.0,
209
- speaking_rate=15.0, dnsmos_ovrl=4.0,
210
- speaker_noised=False, cfg_scale=2.0,
211
- min_p=0.15, seed=420, randomize_seed=False,
212
- unconditional_keys=["emotion"],
213
- progress=None
214
- )
215
-
216
- return (sr, audio_output)
217
-
218
-
219
  def build_interface():
220
  with gr.Blocks(theme='ParityError/Interstellar') as demo:
221
  gr.Markdown("# Zonos v0.1")
@@ -249,10 +217,10 @@ def build_interface():
249
  type="filepath",
250
  )
251
  generate_button = gr.Button("Generate Audio")
252
-
253
  with gr.Column():
254
  output_audio = gr.Audio(label="Generated Audio", type="numpy", autoplay=True)
255
-
256
  with gr.Accordion("Toggles", open=True):
257
  gr.Markdown(
258
  "### Emotion Sliders\n"
@@ -269,7 +237,7 @@ def build_interface():
269
  emotion6 = gr.Slider(0.0, 1.0, 0.05, 0.05, label="Anger")
270
  emotion7 = gr.Slider(0.0, 1.0, 0.1, 0.05, label="Other")
271
  emotion8 = gr.Slider(0.0, 1.0, 0.2, 0.05, label="Neutral")
272
-
273
  gr.Markdown(
274
  "### Unconditional Toggles\n"
275
  "Checking a box will make the model ignore the corresponding conditioning value and make it unconditional.\n"
@@ -290,7 +258,7 @@ def build_interface():
290
  value=["emotion"],
291
  label="Unconditional Keys",
292
  )
293
-
294
  with gr.Accordion("Advanced Settings", open=False):
295
  with gr.Row():
296
  with gr.Column():
@@ -300,48 +268,20 @@ def build_interface():
300
  vq_single_slider = gr.Slider(0.5, 0.8, 0.78, 0.01, label="VQ Score")
301
  pitch_std_slider = gr.Slider(0.0, 300.0, value=45.0, step=1, label="Pitch Std")
302
  speaking_rate_slider = gr.Slider(5.0, 30.0, value=15.0, step=0.5, label="Speaking Rate")
303
-
304
  with gr.Column():
305
  gr.Markdown("## Generation Parameters")
306
  cfg_scale_slider = gr.Slider(1.0, 5.0, 2.0, 0.1, label="CFG Scale")
307
  min_p_slider = gr.Slider(0.0, 1.0, 0.15, 0.01, label="Min P")
308
  seed_number = gr.Number(label="Seed", value=420, precision=0)
309
  randomize_seed_toggle = gr.Checkbox(label="Randomize Seed (before generation)", value=True)
310
-
311
  prefix_audio = gr.Audio(
312
  value="assets/silence_100ms.wav",
313
  label="Optional Prefix Audio (continue from this audio)",
314
  type="filepath",
315
  )
316
 
317
- # API Interface
318
- with gr.Tab("API"):
319
- gr.Markdown("""### Text-to-Speech API""")
320
- with gr.Row():
321
- api_text = gr.Textbox(label="Text", value="API test sentence")
322
- api_language = gr.Dropdown(choices=supported_language_codes, value="en-us", label="Language")
323
- api_btn = gr.Button("Generate Speech")
324
- api_output = gr.Audio(label="Generated Speech")
325
-
326
- # Connect the API components
327
- api_btn.click(
328
- fn=simple_api_generate_speech,
329
- inputs=[api_text, api_language],
330
- outputs=api_output
331
- )
332
-
333
- # Example usage
334
- gr.Examples(
335
- examples=[
336
- ["This is a test of the text to speech system.", "en-us"],
337
- ["Esto es una prueba del sistema de síntesis de voz.", "es"],
338
- ["Dies ist ein Test des Text-zu-Sprache-Systems.", "de"]
339
- ],
340
- fn=simple_api_generate_speech,
341
- inputs=[api_text, api_language],
342
- outputs=api_output
343
- )
344
-
345
  model_choice.change(
346
  fn=update_ui,
347
  inputs=[model_choice],
@@ -433,6 +373,4 @@ def build_interface():
433
  if __name__ == "__main__":
434
  demo = build_interface()
435
  share = getenv("GRADIO_SHARE", "True").lower() in ("true", "1", "t")
436
-
437
- # Launch with queue enabled
438
- demo.queue().launch(server_name="0.0.0.0", server_port=7860, share=share)
 
11
  import torchaudio
12
  import gradio as gr
13
  from os import getenv
 
 
 
14
 
15
  from zonos.model import Zonos
16
  from zonos.conditioning import make_cond_dict, supported_language_codes
 
164
  estimated_total_steps = int(estimated_generation_duration * 86)
165
 
166
  def update_progress(_frame: torch.Tensor, step: int, _total_steps: int) -> bool:
167
+ progress((step, estimated_total_steps))
 
168
  return True
169
 
170
  codes = selected_model.generate(
 
184
  return (sr_out, wav_out.squeeze().numpy()), seed
185
 
186
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  def build_interface():
188
  with gr.Blocks(theme='ParityError/Interstellar') as demo:
189
  gr.Markdown("# Zonos v0.1")
 
217
  type="filepath",
218
  )
219
  generate_button = gr.Button("Generate Audio")
220
+
221
  with gr.Column():
222
  output_audio = gr.Audio(label="Generated Audio", type="numpy", autoplay=True)
223
+
224
  with gr.Accordion("Toggles", open=True):
225
  gr.Markdown(
226
  "### Emotion Sliders\n"
 
237
  emotion6 = gr.Slider(0.0, 1.0, 0.05, 0.05, label="Anger")
238
  emotion7 = gr.Slider(0.0, 1.0, 0.1, 0.05, label="Other")
239
  emotion8 = gr.Slider(0.0, 1.0, 0.2, 0.05, label="Neutral")
240
+
241
  gr.Markdown(
242
  "### Unconditional Toggles\n"
243
  "Checking a box will make the model ignore the corresponding conditioning value and make it unconditional.\n"
 
258
  value=["emotion"],
259
  label="Unconditional Keys",
260
  )
261
+
262
  with gr.Accordion("Advanced Settings", open=False):
263
  with gr.Row():
264
  with gr.Column():
 
268
  vq_single_slider = gr.Slider(0.5, 0.8, 0.78, 0.01, label="VQ Score")
269
  pitch_std_slider = gr.Slider(0.0, 300.0, value=45.0, step=1, label="Pitch Std")
270
  speaking_rate_slider = gr.Slider(5.0, 30.0, value=15.0, step=0.5, label="Speaking Rate")
271
+
272
  with gr.Column():
273
  gr.Markdown("## Generation Parameters")
274
  cfg_scale_slider = gr.Slider(1.0, 5.0, 2.0, 0.1, label="CFG Scale")
275
  min_p_slider = gr.Slider(0.0, 1.0, 0.15, 0.01, label="Min P")
276
  seed_number = gr.Number(label="Seed", value=420, precision=0)
277
  randomize_seed_toggle = gr.Checkbox(label="Randomize Seed (before generation)", value=True)
278
+
279
  prefix_audio = gr.Audio(
280
  value="assets/silence_100ms.wav",
281
  label="Optional Prefix Audio (continue from this audio)",
282
  type="filepath",
283
  )
284
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  model_choice.change(
286
  fn=update_ui,
287
  inputs=[model_choice],
 
373
  if __name__ == "__main__":
374
  demo = build_interface()
375
  share = getenv("GRADIO_SHARE", "True").lower() in ("true", "1", "t")
376
+ demo.launch(server_name="0.0.0.0", server_port=7860, share=share, ssr_mode=False)