clr commited on
Commit
0e17f64
·
1 Parent(s): f815a95

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -14
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import gradio as gr
2
  import soundfile as sf
 
3
  import numpy as np
4
  import torch, torchaudio
5
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
@@ -15,6 +16,7 @@ processor_is = Wav2Vec2Processor.from_pretrained(MODEL_IS)
15
  model_fo = Wav2Vec2ForCTC.from_pretrained(MODEL_FO).to(device)
16
  processor_fo = Wav2Vec2Processor.from_pretrained(MODEL_FO)
17
 
 
18
  def readwav(a_f):
19
  wav, sr = sf.read(a_f, dtype=np.float32)
20
  if len(wav.shape) == 2:
@@ -24,25 +26,23 @@ def readwav(a_f):
24
  wav = signal.resample(wav, wlen)
25
  return wav
26
 
27
- def recis(audio_file):
28
  wav = readwav(audio_file)
29
  with torch.inference_mode():
30
- input_values = processor_is(wav,sampling_rate=16000).input_values[0]
31
  input_values = torch.tensor(input_values, device=device).unsqueeze(0)
32
- logits = model_is(input_values).logits
33
  pred_ids = torch.argmax(logits, dim=-1)
34
- xcp = processor_is.batch_decode(pred_ids)
35
- return xcp
 
 
 
 
36
 
37
  def recfo(audio_file):
38
- wav = readwav(audio_file)
39
- with torch.inference_mode():
40
- input_values = processor_fo(wav,sampling_rate=16000).input_values[0]
41
- input_values = torch.tensor(input_values, device=device).unsqueeze(0)
42
- logits = model_fo(input_values).logits
43
- pred_ids = torch.argmax(logits, dim=-1)
44
- xcp = processor_fo.batch_decode(pred_ids)
45
- return xcp
46
 
47
  bl = gr.Blocks()
48
  with bl:
@@ -54,8 +54,9 @@ with bl:
54
  https://huggingface.co/carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h
55
  or https://huggingface.co/carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h
56
 
57
- - For some reason, the huggingface 'Hosted inference API' on the model page does not work, but this does.
58
  - There is no language model (yet), so it can generate non-words.
 
59
  """
60
  )
61
 
 
1
  import gradio as gr
2
  import soundfile as sf
3
+ from scipy import signal
4
  import numpy as np
5
  import torch, torchaudio
6
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 
16
  model_fo = Wav2Vec2ForCTC.from_pretrained(MODEL_FO).to(device)
17
  processor_fo = Wav2Vec2Processor.from_pretrained(MODEL_FO)
18
 
19
+
20
  def readwav(a_f):
21
  wav, sr = sf.read(a_f, dtype=np.float32)
22
  if len(wav.shape) == 2:
 
26
  wav = signal.resample(wav, wlen)
27
  return wav
28
 
29
+ def recc(audio_file,model,processor):
30
  wav = readwav(audio_file)
31
  with torch.inference_mode():
32
+ input_values = processor(wav,sampling_rate=16000).input_values[0]
33
  input_values = torch.tensor(input_values, device=device).unsqueeze(0)
34
+ logits = model(input_values).logits
35
  pred_ids = torch.argmax(logits, dim=-1)
36
+ xcp = processor.batch_decode(pred_ids)
37
+ return xcp[0]
38
+
39
+
40
+ def recis(audio_file):
41
+ return recc(audio_file,model_is,processor_is)
42
 
43
  def recfo(audio_file):
44
+ return recc(audio_file,model_fo,processor_fo)
45
+
 
 
 
 
 
 
46
 
47
  bl = gr.Blocks()
48
  with bl:
 
54
  https://huggingface.co/carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h
55
  or https://huggingface.co/carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h
56
 
57
+ - For some reason, the huggingface 'Hosted inference API' on the model page does not currently work, but this does.
58
  - There is no language model (yet), so it can generate non-words.
59
+ - Send errors/bugs to [email protected]
60
  """
61
  )
62