yasserrmd commited on
Commit
08b1403
·
verified ·
1 Parent(s): 6aee703

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from wenet.cli.model import load_model
4
+ from huggingface_hub import hf_hub_download
5
+ import spaces
6
+
7
+
8
+
9
+
10
+
11
+
12
+ REPO_ID = "Revai/reverb-asr"
13
+ files = ['reverb_asr_v1.jit.zip', 'tk.units.txt']
14
+ downloaded_files = [hf_hub_download(repo_id=REPO_ID, filename=f) for f in files]
15
+ model = load_model(downloaded_files[0], downloaded_files[1])
16
+
17
+
18
+
19
+ def process_cat_embs(cat_embs):
20
+ device = "gpu"
21
+ cat_embs = torch.tensor([float(c) for c in cat_embs.split(',')]).to(device)
22
+ return cat_embs
23
+
24
+
25
+ def recognition(audio, style=0):
26
+ if not audio:
27
+ return "Input Error! Please enter one audio!"
28
+
29
+ cat_embs = process_cat_embs(f'{style},{1-style}')
30
+ result = model.transcribe(audio, cat_embs=cat_embs)
31
+
32
+ if not result or 'text' not in result:
33
+ return "ERROR! No text output! Please try again!"
34
+
35
+ text_output = result['text'].replace('▁', ' ')
36
+ return text_output
37
+
38
+
39
+ # Gradio UI Components
40
+ inputs = [
41
+ gr.Audio(source="microphone", type="filepath", label='Input audio'),
42
+ gr.Slider(0, 1, value=0, label="Transcription Style", info="Adjust between non-verbatim (0) and verbatim (1) transcription")
43
+ ]
44
+
45
+ output = gr.Textbox(label="Output Text")
46
+
47
+ # UI and Interface
48
+ iface = gr.Interface(
49
+ fn=recognition,
50
+ inputs=inputs,
51
+ outputs=output,
52
+ title="Reverb ASR Transcription",
53
+ description="Supports verbatim and non-verbatim transcription styles.",
54
+ article="<p style='text-align: center'><a href='https://rev.com' target='_blank'>Learn more about Rev</a></p>",
55
+ theme='huggingface'
56
+ )
57
+
58
+ iface.launch(enable_queue=True)