Spaces:

ales
/

wav2vec2-cv-be-lm

Runtime error

ales commited on Apr 13, 2022

Commit

d71b5df

1 Parent(s): 3702096

converting stereo audio to mono if needed

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Tuple
 import numpy as np
@@ -20,6 +20,12 @@ LM_HUB_FP = 'language_model/cv8be_5gram.bin'
 def main(audio_fp: str):
     audio, sampling_rate = torchaudio.load(audio_fp, normalize=True)
     # resample audio to 16kHz
     resampler = Resample(orig_freq=sampling_rate, new_freq=16_000)
     audio_resampled = resampler(audio)
@@ -37,17 +43,20 @@ def main(audio_fp: str):
     res['sampling_rate_orig'] = sampling_rate
     res['init_audio_shape'] = audio.shape
     res['inputs_shape'] = inputs.shape
     res['inputs_max'] = np.max(inputs).item()
     res['inputs_min'] = np.min(inputs).item()
-    return str(res)
 iface = gr.Interface(
     fn=main,
     inputs=gr.inputs.Audio(
-        source='microphone', type='filepath',
         label='Запішыце аўдыяфайл, каб распазнаваць маўленне'
     ),
     outputs='text'

+import json
 import numpy as np
 def main(audio_fp: str):
     audio, sampling_rate = torchaudio.load(audio_fp, normalize=True)
+    # convert stereo to mono
+    converted_to_mono = False
+    if audio.shape[0] > 1:
+        audio = torch.mean(audio, dim=0, keepdim=True)
+        converted_to_mono = True
     # resample audio to 16kHz
     resampler = Resample(orig_freq=sampling_rate, new_freq=16_000)
     audio_resampled = resampler(audio)
     res['sampling_rate_orig'] = sampling_rate
     res['init_audio_shape'] = audio.shape
+    res['converted_to_mono'] = converted_to_mono
     res['inputs_shape'] = inputs.shape
     res['inputs_max'] = np.max(inputs).item()
     res['inputs_min'] = np.min(inputs).item()
+    res_str = json.dumps(res, indent=2)
+    return res_str
 iface = gr.Interface(
     fn=main,
     inputs=gr.inputs.Audio(
+        source='microphone', type='filepath',
         label='Запішыце аўдыяфайл, каб распазнаваць маўленне'
     ),
     outputs='text'