zonos-longform-unleashed

Running on Zero

App Files Files Community

benjamin-paine commited on 25 days ago

Commit

9dd4304

verified ·

1 Parent(s): 31b7f65

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -8

app.py CHANGED Viewed

@@ -283,8 +283,9 @@ if __name__ == "__main__":
                     label="Optional Prefix Audio (continue from this audio)",
                     type="filepath",
                 )
-                prefix_equalized_checkbox = gr.Checkbox(label="Equalize Prefix Audio", value=True)
-                prefix_enhanced_checkbox = gr.Checkbox(label="Enhance Prefix Audio with DeepFilterNet", value=True)
             with gr.Column(scale=3):
                 cfg_scale_slider = gr.Slider(1.0, 5.0, 2.0, 0.1, label="CFG Scale")
                 min_p_slider = gr.Slider(0.0, 1.0, 0.15, 0.01, label="Min P")
@@ -304,8 +305,8 @@ if __name__ == "__main__":
                     value=False,
                     info="Note; this is not a pre-processing step, it is a conditioning value that the model understands. Check this box if your input audio is noisy."
                 )
-                speaker_equalized_checkbox = gr.Checkbox(label="Equalize Speaker Audio", value=True)
-                speaker_enhanced_checkbox = gr.Checkbox(label="Enhance Speaker Audio with DeepFilterNet", value=True)
                 def on_enhanced_change(use_enhance: bool) -> Dict[str, Any]:
                     update_dict = {"enabled": not use_enhance}
@@ -322,6 +323,7 @@ if __name__ == "__main__":
                     -1200, 1200, -44.99, 0.01, label="Speaker Pitch Shift (Cents)",
                     info="A pitch shift to apply to speaker audio before extracting embeddings. A slight down-shift of ~45 cents tends to produce a more accurate voice cloning."
                 )
             speaker_audio = gr.Audio(
                 label="Optional Speaker Audio (for cloning)",
                 type="filepath",
@@ -445,10 +447,10 @@ if __name__ == "__main__":
                 emotion_uncond,
                 speaker_uncond,
                 speaker_pitch_shift,
-                speaker_equalized_checkbox,
-                speaker_enhanced_checkbox,
-                prefix_equalized_checkbox,
-                prefix_enhanced_checkbox,
                 enhanced_checkbox,
             ],
             outputs=[output_audio, seed_number],

                     label="Optional Prefix Audio (continue from this audio)",
                     type="filepath",
                 )
+                prefix_equalize_checkbox = gr.Checkbox(label="Equalize Prefix Audio", value=True)
+                prefix_enhance_checkbox = gr.Checkbox(label="Enhance Prefix Audio with DeepFilterNet", value=True)
             with gr.Column(scale=3):
                 cfg_scale_slider = gr.Slider(1.0, 5.0, 2.0, 0.1, label="CFG Scale")
                 min_p_slider = gr.Slider(0.0, 1.0, 0.15, 0.01, label="Min P")
                     value=False,
                     info="Note; this is not a pre-processing step, it is a conditioning value that the model understands. Check this box if your input audio is noisy."
                 )
+                speaker_equalize_checkbox = gr.Checkbox(label="Equalize Speaker Audio", value=True)
+                speaker_enhance_checkbox = gr.Checkbox(label="Enhance Speaker Audio with DeepFilterNet", value=True)
                 def on_enhanced_change(use_enhance: bool) -> Dict[str, Any]:
                     update_dict = {"enabled": not use_enhance}
                     -1200, 1200, -44.99, 0.01, label="Speaker Pitch Shift (Cents)",
                     info="A pitch shift to apply to speaker audio before extracting embeddings. A slight down-shift of ~45 cents tends to produce a more accurate voice cloning."
                 )
             speaker_audio = gr.Audio(
                 label="Optional Speaker Audio (for cloning)",
                 type="filepath",
                 emotion_uncond,
                 speaker_uncond,
                 speaker_pitch_shift,
+                speaker_equalize_checkbox,
+                speaker_enhance_checkbox,
+                prefix_equalize_checkbox,
+                prefix_enhance_checkbox,
                 enhanced_checkbox,
             ],
             outputs=[output_audio, seed_number],