h4d35 commited on
Commit
e6add42
·
1 Parent(s): f7413c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -39
app.py CHANGED
@@ -1,41 +1,10 @@
1
- #from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
2
- import soundfile as sf
3
- import torch
4
  import gradio as gr
5
-
6
-
7
- # load model and processor
8
- from transformers import AutoProcessor, AutoModelForCTC
9
-
10
- processor = AutoProcessor.from_pretrained("h4d35/Wav2Vec2-hi")
11
-
12
- model = AutoModelForCTC.from_pretrained("h4d35/Wav2Vec2-hi")
13
-
14
- # define function to read in sound file
15
- def map_to_array(file):
16
- speech, _ = sf.read(file)
17
- return speech
18
-
19
-
20
-
21
- # tokenize
22
- def inference(audio):
23
- input_values = processor(map_to_array(audio.name), return_tensors="pt", padding="longest").input_values # Batch size 1
24
-
25
- # retrieve logits
26
- logits = model(input_values).logits
27
-
28
- # take argmax and decode
29
- predicted_ids = torch.argmax(logits, dim=-1)
30
- transcription = processor.batch_decode(predicted_ids)
31
- return transcription[0]
32
-
33
- inputs = gr.inputs.Audio(label="Input Audio", type="file")
34
- outputs = gr.outputs.Textbox(label="Output Text")
35
-
36
  title = "HindiASR"
37
- description = "HindiASR using Wav2Vec2.0"
38
-
39
-
40
- #examples=[['poem.wav']]
41
- gr.Interface(inference, inputs, outputs, title=title, description=description).launch()
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  title = "HindiASR"
3
+ description = "Gradio demo for HindiASR"
4
+ # article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2104.06678'>Large-Scale Self- and Semi-Supervised Learning for Speech Translation</a></p>"
5
+
6
+ # examples = [['common_voice_en_18301577.mp3']]
7
+ gr.Interface.load("huggingface/h4d35/Wav2Vec2-hi"",
8
+ title=title,
9
+ description=description
10
+ ).launch()