import gradio as gr import soundfile as sf import numpy as np import torch, torchaudio from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor from datasets import load_dataset, Audio import matplotlib.pyplot as plt MODEL_NAME="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h" torch.random.manual_seed(0) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = Wav2Vec2ForCTC.from_pretrained(MODEL_NAME).to(device) processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME) # do i need this? can't remember #ds = load_dataset("language-and-voice-lab/samromur_asr",split='train',streaming=True) #ds = load_dataset("language-and-voice-lab/samromur_asr",split='test') #ds = ds.cast_column("audio", Audio(sampling_rate=16_000)) def show_ex(exnum): #return(ds['audio_id'][exnum]) return(exnum) def recc(a_f): wav, sr = sf.read(a_f, dtype=np.float32) if len(wav.shape) == 2: wav = wav.mean(1) if sr != 16000: wlen = int(wav.shape[0] / sr * 16000) wav = signal.resample(wav, wlen) with torch.inference_mode(): #wav = torch.from_numpy(wav).unsqueeze(0) #if torch.cuda.is_available(): # wav = wav.cuda() input_values = processor(wav,sampling_rate=16000).input_values[0] input_values = torch.tensor(input_values, device=device).unsqueeze(0) logits = model(input_values).logits pred_ids = torch.argmax(logits, dim=-1)[0] #s[0].cpu().detach() return pred_ids #xcp = processor.decode(pred_ids) #return xcp def dec(pids): with torch.inference_mode(): xcp = processor.decode(pids) return xcp bl = gr.Blocks() with bl: audio_file = gr.Audio(type="filepath") text_button = gr.Button("Recognise") text_output = gr.Textbox() text_button.click(recc, inputs=audio_file, outputs=text_output) text_button2 = gr.Button("Dec") text_output2 = gr.Textbox() text_button2.click(dec, inputs=text_output, outputs=text_output2) bl.launch() #https://mercury-docs.readthedocs.io/en/latest/deploy/hugging-face-spaces/ #https://huggingface.co/spaces/pplonski/deploy-mercury #https://discuss.huggingface.co/t/deploy-interactive-jupyter-notebook-on-spaces-with-mercury/17000 #https://huggingface.co/docs/transformers/notebooks