File size: 2,594 Bytes
462d118 573ac8e 462d118 a43fdc1 573ac8e 4a6c542 573ac8e 462d118 a43fdc1 462d118 573ac8e 462d118 573ac8e a43fdc1 573ac8e 462d118 573ac8e a43fdc1 462d118 a43fdc1 cecbe0e a43fdc1 462d118 a43fdc1 573ac8e a43fdc1 462d118 a43fdc1 462d118 573ac8e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import torchaudio
import torch
from model import M11
import gradio as gr
def _cut_if_necessary(signal):
if signal.shape[1] > 400000:
signal = signal[:, :400000]
return signal
def _right_pad_if_necessary(signal):
signal_length = signal.shape[1]
if signal_length < 400000:
num_missing_samples = 400000 - signal_length
last_dim_padding = (0, num_missing_samples) # will add 0 number of zeros in the left side of array and num_missing_samples number of zeros in the right part
signal = torch.nn.functional.pad(signal, last_dim_padding)
return signal
def preprocess(signal, sr, device):
# add a channel dimension for 1d samples
if len(signal.shape) == 1:
signal = signal.unsqueeze(0)
# resampling the audio signal with the training sample rate
if sr != 8_000:
resampler = torchaudio.transforms.Resample(sr, 8_000).to(device)
signal = resampler(signal)
# turning the stereo signals into mono
if signal.shape[0] > 1:
signal = torch.mean(signal, dim=0, keepdim=True)
signal = _cut_if_necessary(signal) # truncating longer signals
signal = _right_pad_if_necessary(signal) # extending shorter signals
return signal
def pipeline(audio_file):
audio_PATH = audio_file.name
audio, sample_rate = torchaudio.load(audio_PATH)
processed_audio = preprocess(audio.to(DEVICE), sample_rate, DEVICE)
with torch.no_grad():
pred = torch.exp(classifier(processed_audio.unsqueeze(0)).squeeze()) # turning log_softmax into probabilities
print({labels[i]: float(pred[i]) for i in range(3)})
print(classifier(processed_audio.unsqueeze(0)).squeeze())
return {labels[i]: float(pred[i]) for i in range(3)}
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
model_PATH = "./model.ckpt"
labels = ["Threat", "Normal", "Sarcastic"]
classifier = M11.load_from_checkpoint(model_PATH).to(DEVICE)
classifier.eval()
inputs = gr.inputs.Audio(label="Input Audio", type="file")
outputs = gr.outputs.Label(num_top_classes=3)
title = "Threat Detection From Bengali Voice Calls"
description = "Gradio demo for Audio Classification, simply upload your audio, or click one of the examples to load them. Read more at the links below."
article = "<p style='text-align: center'><a href='https://github.com/khalidsaifullaah' target='_blank'>Github Repo</a></p>"
examples = [
['sample_audio.wav']
]
gr.Interface(pipeline, inputs, outputs, title=title, description=description, article=article, examples=examples).launch() |