w2v2asr / app.py
clr's picture
Update app.py
a64c958
raw
history blame
2.19 kB
import gradio as gr
import soundfile as sf
import numpy as np
import torch, torchaudio
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
from datasets import load_dataset, Audio
import matplotlib.pyplot as plt
MODEL_NAME="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h"
torch.random.manual_seed(0)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Wav2Vec2ForCTC.from_pretrained(MODEL_NAME).to(device)
processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME) # do i need this? can't remember
#def greet(name):
# return "Hello " + name + "!!"
#iface = gr.Interface(fn=greet, inputs="text", outputs="text")
#iface.launch()
#api = gr.Interface.load("models/carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h")
#iface.launch()
#ds = load_dataset("language-and-voice-lab/samromur_asr",split='train',streaming=True)
#ds = load_dataset("language-and-voice-lab/samromur_asr",split='test')
#ds = ds.cast_column("audio", Audio(sampling_rate=16_000))
def show_ex(exnum):
#return(ds['audio_id'][exnum])
return(exnum)
def recc(a_f):
wav, sr = sf.read(a_f, dtype=np.float32)
if len(wav.shape) == 2:
wav = wav.mean(1)
if sr != 16000:
wlen = int(wav.shape[0] / sr * 16000)
wav = signal.resample(wav, wlen)
with torch.inference_mode():
wav = torch.from_numpy(wav).unsqueeze(0)
if torch.cuda.is_available():
wav = wav.cuda()
input_values = processor(wav).input_values
return input_values
bl = gr.Blocks()
with bl:
text_input = gr.Textbox()
text_output = gr.Textbox()
text_button = gr.Button("Run")
#text_button.click(show_ex, inputs=text_input, outputs=text_output)
audio_file = gr.Audio(type="filepath")
#ipt =
text_button.click(recc, inputs=audio_file, outputs=text_output)
bl.launch()
#https://mercury-docs.readthedocs.io/en/latest/deploy/hugging-face-spaces/
#https://huggingface.co/spaces/pplonski/deploy-mercury
#https://discuss.huggingface.co/t/deploy-interactive-jupyter-notebook-on-spaces-with-mercury/17000
#https://huggingface.co/docs/transformers/notebooks