Spaces:
Sleeping
Sleeping
fix
Browse files- app.py +13 -1
- requirements.txt +1 -0
app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import gradio as gr
|
|
|
2 |
import torch
|
3 |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline
|
4 |
|
@@ -23,9 +24,20 @@ def transcribe(file_upload):
|
|
23 |
text = pipe(file)["text"]
|
24 |
return warn_output + text
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
def transcribe2(file_upload):
|
|
|
27 |
with torch.inference_mode():
|
28 |
-
input_values = processor(
|
29 |
input_values = torch.tensor(input_values, device=device).unsqueeze(0)
|
30 |
logits = model(input_values).logits
|
31 |
pred_ids = torch.argmax(logits, dim=-1)
|
|
|
1 |
import gradio as gr
|
2 |
+
import soundfile as sf
|
3 |
import torch
|
4 |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline
|
5 |
|
|
|
24 |
text = pipe(file)["text"]
|
25 |
return warn_output + text
|
26 |
|
27 |
+
|
28 |
+
def readwav(a_f):
|
29 |
+
wav, sr = sf.read(a_f, dtype=np.float32)
|
30 |
+
if len(wav.shape) == 2:
|
31 |
+
wav = wav.mean(1)
|
32 |
+
if sr != 16000:
|
33 |
+
wlen = int(wav.shape[0] / sr * 16000)
|
34 |
+
wav = signal.resample(wav, wlen)
|
35 |
+
return wav
|
36 |
+
|
37 |
def transcribe2(file_upload):
|
38 |
+
wav = readwav(file_upload)
|
39 |
with torch.inference_mode():
|
40 |
+
input_values = processor(wav, sampling_rate=16000).input_values[0]
|
41 |
input_values = torch.tensor(input_values, device=device).unsqueeze(0)
|
42 |
logits = model(input_values).logits
|
43 |
pred_ids = torch.argmax(logits, dim=-1)
|
requirements.txt
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
git+https://github.com/huggingface/transformers
|
2 |
torch
|
|
|
|
1 |
git+https://github.com/huggingface/transformers
|
2 |
torch
|
3 |
+
soundfile
|