Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -11,9 +11,6 @@ import torch
|
|
11 |
import torchaudio
|
12 |
import gradio as gr
|
13 |
from os import getenv
|
14 |
-
import io
|
15 |
-
import numpy as np
|
16 |
-
import scipy.io.wavfile as wavfile
|
17 |
|
18 |
from zonos.model import Zonos
|
19 |
from zonos.conditioning import make_cond_dict, supported_language_codes
|
@@ -167,8 +164,7 @@ def generate_audio(
|
|
167 |
estimated_total_steps = int(estimated_generation_duration * 86)
|
168 |
|
169 |
def update_progress(_frame: torch.Tensor, step: int, _total_steps: int) -> bool:
|
170 |
-
|
171 |
-
progress((step, estimated_total_steps))
|
172 |
return True
|
173 |
|
174 |
codes = selected_model.generate(
|
@@ -188,34 +184,6 @@ def generate_audio(
|
|
188 |
return (sr_out, wav_out.squeeze().numpy()), seed
|
189 |
|
190 |
|
191 |
-
# Define a simpler version of the API function for the API tab
|
192 |
-
@spaces.GPU(duration=120)
|
193 |
-
def simple_api_generate_speech(text, language="en-us"):
|
194 |
-
"""Simple API endpoint for TTS generation with default parameters."""
|
195 |
-
# Set default emotion values
|
196 |
-
e1, e2, e3, e4 = 1.0, 0.05, 0.05, 0.05
|
197 |
-
e5, e6, e7, e8 = 0.05, 0.05, 0.1, 0.2
|
198 |
-
|
199 |
-
# Use the existing generate_audio function with fixed parameters
|
200 |
-
(sr, audio_output), _ = generate_audio(
|
201 |
-
model_choice="Zyphra/Zonos-v0.1-transformer",
|
202 |
-
text=text,
|
203 |
-
language=language,
|
204 |
-
speaker_audio=None,
|
205 |
-
prefix_audio=None,
|
206 |
-
e1=e1, e2=e2, e3=e3, e4=e4,
|
207 |
-
e5=e5, e6=e6, e7=e7, e8=e8,
|
208 |
-
vq_single=0.78, fmax=24000, pitch_std=45.0,
|
209 |
-
speaking_rate=15.0, dnsmos_ovrl=4.0,
|
210 |
-
speaker_noised=False, cfg_scale=2.0,
|
211 |
-
min_p=0.15, seed=420, randomize_seed=False,
|
212 |
-
unconditional_keys=["emotion"],
|
213 |
-
progress=None
|
214 |
-
)
|
215 |
-
|
216 |
-
return (sr, audio_output)
|
217 |
-
|
218 |
-
|
219 |
def build_interface():
|
220 |
with gr.Blocks(theme='ParityError/Interstellar') as demo:
|
221 |
gr.Markdown("# Zonos v0.1")
|
@@ -249,10 +217,10 @@ def build_interface():
|
|
249 |
type="filepath",
|
250 |
)
|
251 |
generate_button = gr.Button("Generate Audio")
|
252 |
-
|
253 |
with gr.Column():
|
254 |
output_audio = gr.Audio(label="Generated Audio", type="numpy", autoplay=True)
|
255 |
-
|
256 |
with gr.Accordion("Toggles", open=True):
|
257 |
gr.Markdown(
|
258 |
"### Emotion Sliders\n"
|
@@ -269,7 +237,7 @@ def build_interface():
|
|
269 |
emotion6 = gr.Slider(0.0, 1.0, 0.05, 0.05, label="Anger")
|
270 |
emotion7 = gr.Slider(0.0, 1.0, 0.1, 0.05, label="Other")
|
271 |
emotion8 = gr.Slider(0.0, 1.0, 0.2, 0.05, label="Neutral")
|
272 |
-
|
273 |
gr.Markdown(
|
274 |
"### Unconditional Toggles\n"
|
275 |
"Checking a box will make the model ignore the corresponding conditioning value and make it unconditional.\n"
|
@@ -290,7 +258,7 @@ def build_interface():
|
|
290 |
value=["emotion"],
|
291 |
label="Unconditional Keys",
|
292 |
)
|
293 |
-
|
294 |
with gr.Accordion("Advanced Settings", open=False):
|
295 |
with gr.Row():
|
296 |
with gr.Column():
|
@@ -300,48 +268,20 @@ def build_interface():
|
|
300 |
vq_single_slider = gr.Slider(0.5, 0.8, 0.78, 0.01, label="VQ Score")
|
301 |
pitch_std_slider = gr.Slider(0.0, 300.0, value=45.0, step=1, label="Pitch Std")
|
302 |
speaking_rate_slider = gr.Slider(5.0, 30.0, value=15.0, step=0.5, label="Speaking Rate")
|
303 |
-
|
304 |
with gr.Column():
|
305 |
gr.Markdown("## Generation Parameters")
|
306 |
cfg_scale_slider = gr.Slider(1.0, 5.0, 2.0, 0.1, label="CFG Scale")
|
307 |
min_p_slider = gr.Slider(0.0, 1.0, 0.15, 0.01, label="Min P")
|
308 |
seed_number = gr.Number(label="Seed", value=420, precision=0)
|
309 |
randomize_seed_toggle = gr.Checkbox(label="Randomize Seed (before generation)", value=True)
|
310 |
-
|
311 |
prefix_audio = gr.Audio(
|
312 |
value="assets/silence_100ms.wav",
|
313 |
label="Optional Prefix Audio (continue from this audio)",
|
314 |
type="filepath",
|
315 |
)
|
316 |
|
317 |
-
# API Interface
|
318 |
-
with gr.Tab("API"):
|
319 |
-
gr.Markdown("""### Text-to-Speech API""")
|
320 |
-
with gr.Row():
|
321 |
-
api_text = gr.Textbox(label="Text", value="API test sentence")
|
322 |
-
api_language = gr.Dropdown(choices=supported_language_codes, value="en-us", label="Language")
|
323 |
-
api_btn = gr.Button("Generate Speech")
|
324 |
-
api_output = gr.Audio(label="Generated Speech")
|
325 |
-
|
326 |
-
# Connect the API components
|
327 |
-
api_btn.click(
|
328 |
-
fn=simple_api_generate_speech,
|
329 |
-
inputs=[api_text, api_language],
|
330 |
-
outputs=api_output
|
331 |
-
)
|
332 |
-
|
333 |
-
# Example usage
|
334 |
-
gr.Examples(
|
335 |
-
examples=[
|
336 |
-
["This is a test of the text to speech system.", "en-us"],
|
337 |
-
["Esto es una prueba del sistema de síntesis de voz.", "es"],
|
338 |
-
["Dies ist ein Test des Text-zu-Sprache-Systems.", "de"]
|
339 |
-
],
|
340 |
-
fn=simple_api_generate_speech,
|
341 |
-
inputs=[api_text, api_language],
|
342 |
-
outputs=api_output
|
343 |
-
)
|
344 |
-
|
345 |
model_choice.change(
|
346 |
fn=update_ui,
|
347 |
inputs=[model_choice],
|
@@ -433,6 +373,4 @@ def build_interface():
|
|
433 |
if __name__ == "__main__":
|
434 |
demo = build_interface()
|
435 |
share = getenv("GRADIO_SHARE", "True").lower() in ("true", "1", "t")
|
436 |
-
|
437 |
-
# Launch with queue enabled
|
438 |
-
demo.queue().launch(server_name="0.0.0.0", server_port=7860, share=share)
|
|
|
11 |
import torchaudio
|
12 |
import gradio as gr
|
13 |
from os import getenv
|
|
|
|
|
|
|
14 |
|
15 |
from zonos.model import Zonos
|
16 |
from zonos.conditioning import make_cond_dict, supported_language_codes
|
|
|
164 |
estimated_total_steps = int(estimated_generation_duration * 86)
|
165 |
|
166 |
def update_progress(_frame: torch.Tensor, step: int, _total_steps: int) -> bool:
|
167 |
+
progress((step, estimated_total_steps))
|
|
|
168 |
return True
|
169 |
|
170 |
codes = selected_model.generate(
|
|
|
184 |
return (sr_out, wav_out.squeeze().numpy()), seed
|
185 |
|
186 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
def build_interface():
|
188 |
with gr.Blocks(theme='ParityError/Interstellar') as demo:
|
189 |
gr.Markdown("# Zonos v0.1")
|
|
|
217 |
type="filepath",
|
218 |
)
|
219 |
generate_button = gr.Button("Generate Audio")
|
220 |
+
|
221 |
with gr.Column():
|
222 |
output_audio = gr.Audio(label="Generated Audio", type="numpy", autoplay=True)
|
223 |
+
|
224 |
with gr.Accordion("Toggles", open=True):
|
225 |
gr.Markdown(
|
226 |
"### Emotion Sliders\n"
|
|
|
237 |
emotion6 = gr.Slider(0.0, 1.0, 0.05, 0.05, label="Anger")
|
238 |
emotion7 = gr.Slider(0.0, 1.0, 0.1, 0.05, label="Other")
|
239 |
emotion8 = gr.Slider(0.0, 1.0, 0.2, 0.05, label="Neutral")
|
240 |
+
|
241 |
gr.Markdown(
|
242 |
"### Unconditional Toggles\n"
|
243 |
"Checking a box will make the model ignore the corresponding conditioning value and make it unconditional.\n"
|
|
|
258 |
value=["emotion"],
|
259 |
label="Unconditional Keys",
|
260 |
)
|
261 |
+
|
262 |
with gr.Accordion("Advanced Settings", open=False):
|
263 |
with gr.Row():
|
264 |
with gr.Column():
|
|
|
268 |
vq_single_slider = gr.Slider(0.5, 0.8, 0.78, 0.01, label="VQ Score")
|
269 |
pitch_std_slider = gr.Slider(0.0, 300.0, value=45.0, step=1, label="Pitch Std")
|
270 |
speaking_rate_slider = gr.Slider(5.0, 30.0, value=15.0, step=0.5, label="Speaking Rate")
|
271 |
+
|
272 |
with gr.Column():
|
273 |
gr.Markdown("## Generation Parameters")
|
274 |
cfg_scale_slider = gr.Slider(1.0, 5.0, 2.0, 0.1, label="CFG Scale")
|
275 |
min_p_slider = gr.Slider(0.0, 1.0, 0.15, 0.01, label="Min P")
|
276 |
seed_number = gr.Number(label="Seed", value=420, precision=0)
|
277 |
randomize_seed_toggle = gr.Checkbox(label="Randomize Seed (before generation)", value=True)
|
278 |
+
|
279 |
prefix_audio = gr.Audio(
|
280 |
value="assets/silence_100ms.wav",
|
281 |
label="Optional Prefix Audio (continue from this audio)",
|
282 |
type="filepath",
|
283 |
)
|
284 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
285 |
model_choice.change(
|
286 |
fn=update_ui,
|
287 |
inputs=[model_choice],
|
|
|
373 |
if __name__ == "__main__":
|
374 |
demo = build_interface()
|
375 |
share = getenv("GRADIO_SHARE", "True").lower() in ("true", "1", "t")
|
376 |
+
demo.launch(server_name="0.0.0.0", server_port=7860, share=share, ssr_mode=False)
|
|
|
|