File size: 2,169 Bytes
dd2b02c 56f1ec9 88ec444 56f1ec9 6b73084 72390b0 149c35c 72390b0 56f1ec9 149c35c 7a73b7f 2624e4a 149c35c 72390b0 7d31b0d 04ae345 56f1ec9 d4d3d57 04ae345 56f1ec9 d4d3d57 56f1ec9 149c35c f9a5e8b d4d3d57 93555f9 d4d3d57 93555f9 04ae345 56f1ec9 f9a5e8b d4d3d57 df9a8d7 79be1a5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import gradio as gr
import soundfile as sf
import torch, torchaudio
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
from datasets import load_dataset, Audio
import matplotlib.pyplot as plt
MODEL_NAME="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h"
model = Wav2Vec2ForCTC.from_pretrained(MODEL_NAME).to(device)
processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME) # do i need this? can't remember
torch.random.manual_seed(0)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#def greet(name):
# return "Hello " + name + "!!"
#iface = gr.Interface(fn=greet, inputs="text", outputs="text")
#iface.launch()
#api = gr.Interface.load("models/carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h")
#iface.launch()
#ds = load_dataset("language-and-voice-lab/samromur_asr",split='train',streaming=True)
#ds = load_dataset("language-and-voice-lab/samromur_asr",split='test')
#ds = ds.cast_column("audio", Audio(sampling_rate=16_000))
def show_ex(exnum):
#return(ds['audio_id'][exnum])
return(exnum)
def recc(a_f):
wav, sr = sf.read(a_f, dtype=np.float32)
if len(wav.shape) == 2:
wav = wav.mean(1)
if sr != 16000:
wlen = int(wav.shape[0] / sr * 16000)
wav = signal.resample(wav, wlen)
with torch.inference_mode():
wav = torch.from_numpy(wav).unsqueeze(0)
if torch.cuda.is_available():
wav = wav.cuda()
input_values = processor(wav).input_values
return input_values
bl = gr.Blocks()
with bl:
text_input = gr.Textbox()
text_output = gr.Textbox()
text_button = gr.Button("Run")
#text_button.click(show_ex, inputs=text_input, outputs=text_output)
audio_file = gr.Audio(type="filepath")
#ipt =
text_button.click(recc, inputs=audio_file, outputs=text_output)
bl.launch()
#https://mercury-docs.readthedocs.io/en/latest/deploy/hugging-face-spaces/
#https://huggingface.co/spaces/pplonski/deploy-mercury
#https://discuss.huggingface.co/t/deploy-interactive-jupyter-notebook-on-spaces-with-mercury/17000
#https://huggingface.co/docs/transformers/notebooks
|