LAP-DEV commited on
Commit
70dc53b
·
verified ·
1 Parent(s): 2f5a60e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -38
app.py CHANGED
@@ -121,44 +121,45 @@ class App:
121
  choices=self.whisper_inf.diarizer.get_available_device(),
122
  value=self.whisper_inf.diarizer.get_device(),
123
  interactive=True, visible=False)
124
-
125
- with gr.Accordion("Voice Detection Filter (⚠ experimental feature)", open=False, visible=True):
126
- cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=vad_params["vad_filter"],
127
- interactive=True,
128
- info="Enable to transcribe only detected voice parts")
129
- sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
130
- value=vad_params["threshold"],
131
- info="Lower it to be more sensitive to small sounds")
132
- nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0,
133
- value=vad_params["min_speech_duration_ms"],
134
- info="Final speech chunks shorter than this time are thrown out")
135
- nb_max_speech_duration_s = gr.Number(label="Maximum Speech Duration (s)",
136
- value=vad_params["max_speech_duration_s"],
137
- info="Maximum duration of speech chunks in seconds")
138
- nb_min_silence_duration_ms = gr.Number(label="Minimum Silence Duration (ms)", precision=0,
139
- value=vad_params["min_silence_duration_ms"],
140
- info="In the end of each speech chunk wait for this time"
141
- " before separating it")
142
- nb_speech_pad_ms = gr.Number(label="Speech Padding (ms)", precision=0, value=vad_params["speech_pad_ms"],
143
- info="Final speech chunks are padded by this time each side")
144
-
145
-
146
- with gr.Accordion("Background Music Remover Filter (⚠ experimental feature)", open=False):
147
- cb_bgm_separation = gr.Checkbox(label="Enable Background Music Remover Filter", value=uvr_params["is_separate_bgm"],
148
- info="Enable to remove background music by submodel before transcribing",
149
- interactive=True)
150
- dd_uvr_device = gr.Dropdown(label="Device",
151
- value=self.whisper_inf.music_separator.device,
152
- choices=self.whisper_inf.music_separator.available_devices,
153
- interactive=True, visible=False)
154
- dd_uvr_model_size = gr.Dropdown(label="Model", value=uvr_params["model_size"],
155
- choices=self.whisper_inf.music_separator.available_models)
156
- nb_uvr_segment_size = gr.Number(label="Segment Size", value=uvr_params["segment_size"], precision=0,
157
- interactive=True, visible=False)
158
- cb_uvr_save_file = gr.Checkbox(label="Save separated files to output", value=uvr_params["save_file"],
159
- interactive=True, visible=False)
160
- cb_uvr_enable_offload = gr.Checkbox(label="Offload sub model after removing background music",value=uvr_params["enable_offload"],
161
- interactive=True, visible=False)
 
162
 
163
  with gr.Accordion("Advanced processing options", open=False, visible=False):
164
  nb_beam_size = gr.Number(label="Beam Size", value=whisper_params["beam_size"], precision=0, interactive=True,
 
121
  choices=self.whisper_inf.diarizer.get_available_device(),
122
  value=self.whisper_inf.diarizer.get_device(),
123
  interactive=True, visible=False)
124
+
125
+ with gr.Row():
126
+ with gr.Accordion("Voice Detection Filter (⚠ experimental feature)", open=False, visible=True):
127
+ cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=vad_params["vad_filter"],
128
+ interactive=True,
129
+ info="Enable to transcribe only detected voice parts")
130
+ sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
131
+ value=vad_params["threshold"],
132
+ info="Lower it to be more sensitive to small sounds")
133
+ nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0,
134
+ value=vad_params["min_speech_duration_ms"],
135
+ info="Final speech chunks shorter than this time are thrown out")
136
+ nb_max_speech_duration_s = gr.Number(label="Maximum Speech Duration (s)",
137
+ value=vad_params["max_speech_duration_s"],
138
+ info="Maximum duration of speech chunks in seconds")
139
+ nb_min_silence_duration_ms = gr.Number(label="Minimum Silence Duration (ms)", precision=0,
140
+ value=vad_params["min_silence_duration_ms"],
141
+ info="In the end of each speech chunk wait for this time"
142
+ " before separating it")
143
+ nb_speech_pad_ms = gr.Number(label="Speech Padding (ms)", precision=0, value=vad_params["speech_pad_ms"],
144
+ info="Final speech chunks are padded by this time each side")
145
+
146
+
147
+ with gr.Accordion("Background Music Remover Filter (⚠ experimental feature)", open=False):
148
+ cb_bgm_separation = gr.Checkbox(label="Enable Background Music Remover Filter", value=uvr_params["is_separate_bgm"],
149
+ info="Enable to remove background music by submodel before transcribing",
150
+ interactive=True)
151
+ dd_uvr_device = gr.Dropdown(label="Device",
152
+ value=self.whisper_inf.music_separator.device,
153
+ choices=self.whisper_inf.music_separator.available_devices,
154
+ interactive=True, visible=False)
155
+ dd_uvr_model_size = gr.Dropdown(label="Model", value=uvr_params["model_size"],
156
+ choices=self.whisper_inf.music_separator.available_models)
157
+ nb_uvr_segment_size = gr.Number(label="Segment Size", value=uvr_params["segment_size"], precision=0,
158
+ interactive=True, visible=False)
159
+ cb_uvr_save_file = gr.Checkbox(label="Save separated files to output", value=uvr_params["save_file"],
160
+ interactive=True, visible=False)
161
+ cb_uvr_enable_offload = gr.Checkbox(label="Offload sub model after removing background music",value=uvr_params["enable_offload"],
162
+ interactive=True, visible=False)
163
 
164
  with gr.Accordion("Advanced processing options", open=False, visible=False):
165
  nb_beam_size = gr.Number(label="Beam Size", value=whisper_params["beam_size"], precision=0, interactive=True,