Steveeeeeeen HF staff multimodalart HF staff commited on
Commit
df1e17e
·
verified ·
1 Parent(s): b981444

Suggested UI changes (#4)

Browse files

- Suggested UI changes (d7c8d6c6a40c063261c6bf51c94a520dd80ab8f4)


Co-authored-by: Apolinário from multimodal AI art <[email protected]>

Files changed (1) hide show
  1. app.py +57 -51
app.py CHANGED
@@ -186,14 +186,10 @@ def generate_audio(
186
 
187
  def build_interface():
188
  with gr.Blocks(theme='ParityError/Interstellar') as demo:
 
 
189
  with gr.Row():
190
  with gr.Column():
191
- model_choice = gr.Dropdown(
192
- choices=MODEL_NAMES,
193
- value="Zyphra/Zonos-v0.1-transformer",
194
- label="Zonos Model Type",
195
- info="Select the model variant to use.",
196
- )
197
  text = gr.Textbox(
198
  label="Text to Synthesize",
199
  value="Zonos uses eSpeak for text to phoneme conversion!",
@@ -203,38 +199,45 @@ def build_interface():
203
  language = gr.Dropdown(
204
  choices=supported_language_codes,
205
  value="en-us",
206
- label="Language Code",
207
- info="Select a language code.",
 
 
 
 
 
208
  )
209
- prefix_audio = gr.Audio(
210
- value="assets/silence_100ms.wav",
211
- label="Optional Prefix Audio (continue from this audio)",
212
- type="filepath",
213
- )
214
- with gr.Column():
215
  speaker_audio = gr.Audio(
216
  label="Optional Speaker Audio (for cloning)",
217
  type="filepath",
218
  )
219
- speaker_noised_checkbox = gr.Checkbox(label="Denoise Speaker?", value=False)
220
-
221
- with gr.Row():
222
- with gr.Column():
223
- gr.Markdown("## Conditioning Parameters")
224
- dnsmos_slider = gr.Slider(1.0, 5.0, value=4.0, step=0.1, label="DNSMOS Overall")
225
- fmax_slider = gr.Slider(0, 24000, value=24000, step=1, label="Fmax (Hz)")
226
- vq_single_slider = gr.Slider(0.5, 0.8, 0.78, 0.01, label="VQ Score")
227
- pitch_std_slider = gr.Slider(0.0, 300.0, value=45.0, step=1, label="Pitch Std")
228
- speaking_rate_slider = gr.Slider(5.0, 30.0, value=15.0, step=0.5, label="Speaking Rate")
229
-
230
  with gr.Column():
231
- gr.Markdown("## Generation Parameters")
232
- cfg_scale_slider = gr.Slider(1.0, 5.0, 2.0, 0.1, label="CFG Scale")
233
- min_p_slider = gr.Slider(0.0, 1.0, 0.15, 0.01, label="Min P")
234
- seed_number = gr.Number(label="Seed", value=420, precision=0)
235
- randomize_seed_toggle = gr.Checkbox(label="Randomize Seed (before generation)", value=True)
236
-
237
- with gr.Accordion("Advanced Parameters", open=False):
 
 
 
 
 
 
 
 
 
 
 
 
238
  gr.Markdown(
239
  "### Unconditional Toggles\n"
240
  "Checking a box will make the model ignore the corresponding conditioning value and make it unconditional.\n"
@@ -255,26 +258,29 @@ def build_interface():
255
  value=["emotion"],
256
  label="Unconditional Keys",
257
  )
258
-
259
- gr.Markdown(
260
- "### Emotion Sliders\n"
261
- "Warning: The way these sliders work is not intuitive and may require some trial and error to get the desired effect.\n"
262
- "Certain configurations can cause the model to become unstable. Setting emotion to unconditional may help."
263
- )
264
  with gr.Row():
265
- emotion1 = gr.Slider(0.0, 1.0, 1.0, 0.05, label="Happiness")
266
- emotion2 = gr.Slider(0.0, 1.0, 0.05, 0.05, label="Sadness")
267
- emotion3 = gr.Slider(0.0, 1.0, 0.05, 0.05, label="Disgust")
268
- emotion4 = gr.Slider(0.0, 1.0, 0.05, 0.05, label="Fear")
269
- with gr.Row():
270
- emotion5 = gr.Slider(0.0, 1.0, 0.05, 0.05, label="Surprise")
271
- emotion6 = gr.Slider(0.0, 1.0, 0.05, 0.05, label="Anger")
272
- emotion7 = gr.Slider(0.0, 1.0, 0.1, 0.05, label="Other")
273
- emotion8 = gr.Slider(0.0, 1.0, 0.2, 0.05, label="Neutral")
274
-
275
- with gr.Column():
276
- generate_button = gr.Button("Generate Audio")
277
- output_audio = gr.Audio(label="Generated Audio", type="numpy", autoplay=True)
 
 
 
 
 
 
 
278
 
279
  model_choice.change(
280
  fn=update_ui,
 
186
 
187
  def build_interface():
188
  with gr.Blocks(theme='ParityError/Interstellar') as demo:
189
+ gr.Markdown("# Zonos v0.1")
190
+ gr.Markdown("State of the art text-to-speech model [[model]](https://huggingface.co/collections/Zyphra/zonos-v01-67ac661c85e1898670823b4f), [[blog]](https://www.zyphra.com/post/beta-release-of-zonos-v0-1), [[Zyphra Audio (hosted service)]](https://maia.zyphra.com/sign-in?redirect_url=https%3A%2F%2Fmaia.zyphra.com%2Faudio) ")
191
  with gr.Row():
192
  with gr.Column():
 
 
 
 
 
 
193
  text = gr.Textbox(
194
  label="Text to Synthesize",
195
  value="Zonos uses eSpeak for text to phoneme conversion!",
 
199
  language = gr.Dropdown(
200
  choices=supported_language_codes,
201
  value="en-us",
202
+ label="Language",
203
+ )
204
+ model_choice = gr.Dropdown(
205
+ choices=MODEL_NAMES,
206
+ value="Zyphra/Zonos-v0.1-transformer",
207
+ label="Zonos Model Type",
208
+ info="Select the model variant to use.",
209
  )
 
 
 
 
 
 
210
  speaker_audio = gr.Audio(
211
  label="Optional Speaker Audio (for cloning)",
212
  type="filepath",
213
  )
214
+ generate_button = gr.Button("Generate Audio")
215
+ #with gr.Column():
216
+ speaker_noised_checkbox = gr.Checkbox(
217
+ label="Denoise Speaker?",
218
+ value=False,
219
+ visible=False
220
+ )
 
 
 
 
221
  with gr.Column():
222
+ output_audio = gr.Audio(label="Generated Audio", type="numpy", autoplay=True)
223
+
224
+ with gr.Accordion("Toggles", open=True):
225
+ gr.Markdown(
226
+ "### Emotion Sliders\n"
227
+ "Warning: The way these sliders work is not intuitive and may require some trial and error to get the desired effect.\n"
228
+ "Certain configurations can cause the model to become unstable. Setting emotion to unconditional may help."
229
+ )
230
+ with gr.Row():
231
+ emotion1 = gr.Slider(0.0, 1.0, 1.0, 0.05, label="Happiness")
232
+ emotion2 = gr.Slider(0.0, 1.0, 0.05, 0.05, label="Sadness")
233
+ emotion3 = gr.Slider(0.0, 1.0, 0.05, 0.05, label="Disgust")
234
+ emotion4 = gr.Slider(0.0, 1.0, 0.05, 0.05, label="Fear")
235
+ with gr.Row():
236
+ emotion5 = gr.Slider(0.0, 1.0, 0.05, 0.05, label="Surprise")
237
+ emotion6 = gr.Slider(0.0, 1.0, 0.05, 0.05, label="Anger")
238
+ emotion7 = gr.Slider(0.0, 1.0, 0.1, 0.05, label="Other")
239
+ emotion8 = gr.Slider(0.0, 1.0, 0.2, 0.05, label="Neutral")
240
+
241
  gr.Markdown(
242
  "### Unconditional Toggles\n"
243
  "Checking a box will make the model ignore the corresponding conditioning value and make it unconditional.\n"
 
258
  value=["emotion"],
259
  label="Unconditional Keys",
260
  )
261
+
262
+ with gr.Accordion("Advanced Settings", open=False):
 
 
 
 
263
  with gr.Row():
264
+ with gr.Column():
265
+ gr.Markdown("## Conditioning Parameters")
266
+ dnsmos_slider = gr.Slider(1.0, 5.0, value=4.0, step=0.1, label="DNSMOS Overall")
267
+ fmax_slider = gr.Slider(0, 24000, value=24000, step=1, label="Fmax (Hz)")
268
+ vq_single_slider = gr.Slider(0.5, 0.8, 0.78, 0.01, label="VQ Score")
269
+ pitch_std_slider = gr.Slider(0.0, 300.0, value=45.0, step=1, label="Pitch Std")
270
+ speaking_rate_slider = gr.Slider(5.0, 30.0, value=15.0, step=0.5, label="Speaking Rate")
271
+
272
+ with gr.Column():
273
+ gr.Markdown("## Generation Parameters")
274
+ cfg_scale_slider = gr.Slider(1.0, 5.0, 2.0, 0.1, label="CFG Scale")
275
+ min_p_slider = gr.Slider(0.0, 1.0, 0.15, 0.01, label="Min P")
276
+ seed_number = gr.Number(label="Seed", value=420, precision=0)
277
+ randomize_seed_toggle = gr.Checkbox(label="Randomize Seed (before generation)", value=True)
278
+
279
+ prefix_audio = gr.Audio(
280
+ value="assets/silence_100ms.wav",
281
+ label="Optional Prefix Audio (continue from this audio)",
282
+ type="filepath",
283
+ )
284
 
285
  model_choice.change(
286
  fn=update_ui,