Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -172,9 +172,30 @@ def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence,
|
|
| 172 |
elif exp_name == "Deutsh":
|
| 173 |
ema_model = E2TTS_ema_model2
|
| 174 |
#ref_audio, ref_text = preprocess_ref_audio_text(ref_audio, ref_text, show_info=show_info)
|
| 175 |
-
|
| 176 |
|
| 177 |
audio, sr = ref_audio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
if audio.shape[0] > 1:
|
| 179 |
audio = torch.mean(audio, dim=0, keepdim=True)
|
| 180 |
|
|
|
|
| 172 |
elif exp_name == "Deutsh":
|
| 173 |
ema_model = E2TTS_ema_model2
|
| 174 |
#ref_audio, ref_text = preprocess_ref_audio_text(ref_audio, ref_text, show_info=show_info)
|
| 175 |
+
|
| 176 |
|
| 177 |
audio, sr = ref_audio
|
| 178 |
+
|
| 179 |
+
# Convert audio to PyDub AudioSegment (assuming it's a NumPy array in [-1, 1] range)
|
| 180 |
+
audio_segment = AudioSegment(
|
| 181 |
+
(audio * (2**15)).astype(np.int16).tobytes(), # Scale to 16-bit PCM range
|
| 182 |
+
frame_rate=sr,
|
| 183 |
+
sample_width=2, # 16-bit audio
|
| 184 |
+
channels=1 if len(audio.shape) == 1 else audio.shape[0] # Mono or multi-channel
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
# Remove silence using your custom function
|
| 188 |
+
audio_segment = remove_silence_edges(audio_segment, silence_threshold=-42)
|
| 189 |
+
|
| 190 |
+
# Convert back to NumPy array for further processing
|
| 191 |
+
audio_trimmed = np.array(audio_segment.get_array_of_samples(), dtype=np.float32) / (2**15)
|
| 192 |
+
|
| 193 |
+
# Continue processing with trimmed audio
|
| 194 |
+
audio = torch.from_numpy(audio_trimmed).unsqueeze(0) # Add batch/channel dimension
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
|
| 199 |
if audio.shape[0] > 1:
|
| 200 |
audio = torch.mean(audio, dim=0, keepdim=True)
|
| 201 |
|