Spaces:

mikr
/

w2v-bert2-czech

Sleeping

mikr commited on Feb 3, 2024

Commit

e473647

1 Parent(s): 9963f1b

trans2

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,16 +1,17 @@
 import gradio as gr
 import torch
-from transformers import pipeline
 MODEL_NAME = "mikr/w2v-bert-2.0-czech-colab-cv16"
 lang = "cs"
 device = 0 if torch.cuda.is_available() else "cpu"
 pipe = pipeline(
-    task="automatic-speech-recognition",
     model=MODEL_NAME,
-    chunk_length_s=30,
-    device=device,
 )
 def transcribe(file_upload):
@@ -19,14 +20,21 @@ def transcribe(file_upload):
         return "ERROR: You have to either use the microphone or upload an audio file"
     file = file_upload
     text = pipe(file)["text"]
     return warn_output + text
 iface = gr.Interface(
-    fn=transcribe,
     inputs=[
         gr.File(type="binary", label="Upload Audio File"),  # Audio file upload
     ],

 import gradio as gr
 import torch
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline
 MODEL_NAME = "mikr/w2v-bert-2.0-czech-colab-cv16"
 lang = "cs"
 device = 0 if torch.cuda.is_available() else "cpu"
+model = Wav2Vec2ForCTC.from_pretrained(MODEL_NAME).to(device)
+processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
 pipe = pipeline(
     model=MODEL_NAME,
 )
 def transcribe(file_upload):
         return "ERROR: You have to either use the microphone or upload an audio file"
     file = file_upload
     text = pipe(file)["text"]
     return warn_output + text
+def transcribe2(file_upload):
+    with torch.inference_mode():
+        input_values = processor(wav, sampling_rate=16000).input_values[0]
+        input_values = torch.tensor(input_values, device=device).unsqueeze(0)
+        logits = model(input_values).logits
+        pred_ids = torch.argmax(logits, dim=-1)
+        xcp = processor.batch_decode(pred_ids)
+        return xcp[0]
 iface = gr.Interface(
+    fn=transcribe2,
     inputs=[
         gr.File(type="binary", label="Upload Audio File"),  # Audio file upload
     ],