Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -35,10 +35,21 @@ def whisper_speech_demo(text, lang, speaker_audio, mix_lang, mix_text):
|
|
| 35 |
|
| 36 |
resample_audio = resampler(newsr=24000)
|
| 37 |
audio_data_resampled = next(resample_audio([{'sample_rate': 24000, 'samples': audio_data.cpu()}]))['samples_24k']
|
|
|
|
| 38 |
audio_np = audio_data_resampled.cpu().numpy()
|
| 39 |
audio_np = audio_np / np.max(np.abs(audio_np))
|
|
|
|
|
|
|
| 40 |
audio_np = np.asarray(audio_np, dtype=np.float32)
|
|
|
|
|
|
|
| 41 |
audio_stereo = np.stack((audio_np, audio_np), axis=-1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp_file:
|
| 43 |
# Write the stereo data with a sample rate of 24000 Hz
|
| 44 |
sf.write(tmp_file.name, audio_stereo, 24000, format='WAV', subtype='PCM_16')
|
|
|
|
| 35 |
|
| 36 |
resample_audio = resampler(newsr=24000)
|
| 37 |
audio_data_resampled = next(resample_audio([{'sample_rate': 24000, 'samples': audio_data.cpu()}]))['samples_24k']
|
| 38 |
+
# Normalize audio
|
| 39 |
audio_np = audio_data_resampled.cpu().numpy()
|
| 40 |
audio_np = audio_np / np.max(np.abs(audio_np))
|
| 41 |
+
|
| 42 |
+
# Ensure audio data is in the correct format
|
| 43 |
audio_np = np.asarray(audio_np, dtype=np.float32)
|
| 44 |
+
|
| 45 |
+
# Create stereo audio by duplicating the mono channel
|
| 46 |
audio_stereo = np.stack((audio_np, audio_np), axis=-1)
|
| 47 |
+
|
| 48 |
+
# Debugging: Inspect the shape and dtype of the audio array
|
| 49 |
+
print("Audio Array Shape:", audio_stereo.shape)
|
| 50 |
+
print("Audio Array Dtype:", audio_stereo.dtype)
|
| 51 |
+
|
| 52 |
+
# Save to a temporary WAV file as stereo
|
| 53 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp_file:
|
| 54 |
# Write the stereo data with a sample rate of 24000 Hz
|
| 55 |
sf.write(tmp_file.name, audio_stereo, 24000, format='WAV', subtype='PCM_16')
|