w2v2asr / app.py
clr's picture
Update app.py
a16e474
raw
history blame
2.36 kB
import gradio as gr
import soundfile as sf
import numpy as np
import torch, torchaudio
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
from datasets import load_dataset, Audio
import matplotlib.pyplot as plt
MODEL_NAME="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h"
torch.random.manual_seed(0)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Wav2Vec2ForCTC.from_pretrained(MODEL_NAME).to(device)
processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME) # do i need this? can't remember
#ds = load_dataset("language-and-voice-lab/samromur_asr",split='train',streaming=True)
#ds = load_dataset("language-and-voice-lab/samromur_asr",split='test')
#ds = ds.cast_column("audio", Audio(sampling_rate=16_000))
def show_ex(exnum):
#return(ds['audio_id'][exnum])
return(exnum)
def recc(a_f):
wav, sr = sf.read(a_f, dtype=np.float32)
if len(wav.shape) == 2:
wav = wav.mean(1)
if sr != 16000:
wlen = int(wav.shape[0] / sr * 16000)
wav = signal.resample(wav, wlen)
with torch.inference_mode():
#wav = torch.from_numpy(wav).unsqueeze(0)
#if torch.cuda.is_available():
# wav = wav.cuda()
input_values = processor(wav,sampling_rate=16000).input_values[0]
input_values = torch.tensor(input_values, device=device).unsqueeze(0)
logits = model(input_values).logits
pred_ids = torch.argmax(logits, dim=-1)
pred_ids= pred_ids[0].cpu().detach()
return pred_ids
#xcp = processor.decode(pred_ids)
#return xcp
def dec(pids):
with torch.inference_mode():
xcp = processor.decode(pids)
return xcp
bl = gr.Blocks()
with bl:
audio_file = gr.Audio(type="filepath")
text_button = gr.Button("Recognise")
text_output = gr.Textbox()
text_button.click(recc, inputs=audio_file, outputs=text_output)
text_button2 = gr.Button("Dec")
text_output2 = gr.Textbox()
text_button2.click(dec, inputs=text_output, outputs=text_output2)
bl.launch()
#https://mercury-docs.readthedocs.io/en/latest/deploy/hugging-face-spaces/
#https://huggingface.co/spaces/pplonski/deploy-mercury
#https://discuss.huggingface.co/t/deploy-interactive-jupyter-notebook-on-spaces-with-mercury/17000
#https://huggingface.co/docs/transformers/notebooks