Vaibhav Srivastav commited on
Commit
0d56eb9
·
1 Parent(s): bbbf923

testing multiple models

Browse files
Files changed (1) hide show
  1. app.py +4 -5
app.py CHANGED
@@ -3,14 +3,13 @@ import librosa
3
  import torch
4
  import gradio as gr
5
  from pyctcdecode import build_ctcdecoder
6
- from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
7
 
8
  nltk.download("punkt")
9
 
10
- #Loading the model and the tokenizer
11
  model_name = "facebook/wav2vec2-base-960h"
12
- processor = Wav2Vec2Processor.from_pretrained(model_name)
13
- model = Wav2Vec2ForCTC.from_pretrained(model_name)
14
 
15
  def load_and_fix_data(input_file):
16
  #read the file
@@ -59,7 +58,7 @@ def return_all_predictions(input_file):
59
 
60
 
61
  gr.Interface(return_all_predictions,
62
- inputs = gr.inputs.Audio(source="microphone", type="filepath", optional=True, label="Record/ Drop audio"),
63
  outputs = [gr.outputs.Textbox(label="Beam CTC Decoding"), gr.outputs.Textbox(label="Greedy Decoding")],
64
  title="ASR using Wav2Vec 2.0 & pyctcdecode",
65
  description = "Extending HF ASR models with pyctcdecode decoder",
 
3
  import torch
4
  import gradio as gr
5
  from pyctcdecode import build_ctcdecoder
6
+ from transformers import AutoModelProcessor, AutoModelForCTC
7
 
8
  nltk.download("punkt")
9
 
 
10
  model_name = "facebook/wav2vec2-base-960h"
11
+ processor = AutoModelProcessor.from_pretrained(model_name)
12
+ model = AutoModelForCTC.from_pretrained(model_name)
13
 
14
  def load_and_fix_data(input_file):
15
  #read the file
 
58
 
59
 
60
  gr.Interface(return_all_predictions,
61
+ inputs = [gr.inputs.Audio(source="microphone", type="filepath", optional=True, label="Record/ Drop audio"), gr.inputs.Dropdown(["facebook/wav2vec2-base-960h", "facebook/hubert-large-ls960-ft"])],
62
  outputs = [gr.outputs.Textbox(label="Beam CTC Decoding"), gr.outputs.Textbox(label="Greedy Decoding")],
63
  title="ASR using Wav2Vec 2.0 & pyctcdecode",
64
  description = "Extending HF ASR models with pyctcdecode decoder",