Spaces:
Sleeping
Sleeping
File size: 1,660 Bytes
08b1403 daff1a1 08b1403 2055db9 d10e3f6 e5c80b6 2055db9 544ba80 5c42cb4 08b1403 daff1a1 e5c80b6 08b1403 3d435c7 08b1403 3d435c7 d10e3f6 3d435c7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import gradio as gr
import torch
from wenet.cli.model import load_model
from huggingface_hub import hf_hub_download
#import spaces
REPO_ID = "Revai/reverb-asr"
files = ['reverb_asr_v1.jit.zip', 'tk.units.txt']
downloaded_files = [hf_hub_download(repo_id=REPO_ID, filename=f) for f in files]
model = load_model(downloaded_files[0], downloaded_files[1])
def process_cat_embs(style):
device = torch.device("cpu")
cat_embs = torch.tensor([float(c) for c in style.split(',')]).to(device)
return cat_embs
#@spaces.GPU
def transcribe_audio(audio, style=0):
if not audio:
return "Input Error! Please enter one audio!"
cat_embs = process_cat_embs(f'{style},{1-style}')
result = model.transcribe(audio, cat_embs=cat_embs)
if not result or 'text' not in result:
return "ERROR! No text output! Please try again!"
text_output = result['text'].replace('▁', ' ')
return text_output
audio_input = gr.Audio(type="filepath", label="Upload or Record Audio")
style_slider = gr.Slider(0, 1, value=0, step=0.1, label="Transcription Style",
info="Adjust the transcription style: 0 (casual) to 1 (formal).")
output_textbox = gr.Textbox(label="Transcription Output")
description = "This tool transcribes audio using a customizable transcription style ranging from casual to formal. Upload or record an audio file to begin."
iface = gr.Interface(
fn=transcribe_audio,
inputs=[audio_input, style_slider],
outputs=output_textbox,
title="Audio Transcription",
description=description,
theme="default"
)
iface.launch()
|