Spaces:
Running
Running
File size: 2,314 Bytes
9791162 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import warnings
import librosa
import numpy as np
import resampy
import torch
import crepe
###############################################################################
# Constants
###############################################################################
# Minimum decibel level
MIN_DB = -100.
# Reference decibel level
REF_DB = 20.
###############################################################################
# A-weighted loudness
###############################################################################
def a_weighted(audio, sample_rate, hop_length=None, pad=True):
"""Retrieve the per-frame loudness"""
# Save device
device = audio.device
# Default hop length of 10 ms
hop_length = sample_rate // 100 if hop_length is None else hop_length
# Convert to numpy
audio = audio.detach().cpu().numpy().squeeze(0)
# Resample
if sample_rate != crepe.SAMPLE_RATE:
audio = resampy.resample(audio, sample_rate, crepe.SAMPLE_RATE)
hop_length = int(hop_length * crepe.SAMPLE_RATE / sample_rate)
# Cache weights
if not hasattr(a_weighted, 'weights'):
a_weighted.weights = perceptual_weights()
# Take stft
stft = librosa.stft(audio,
n_fft=crepe.WINDOW_SIZE,
hop_length=hop_length,
win_length=crepe.WINDOW_SIZE,
center=pad,
pad_mode='constant')
# Compute magnitude on db scale
db = librosa.amplitude_to_db(np.abs(stft))
# Apply A-weighting
weighted = db + a_weighted.weights
# Threshold
weighted[weighted < MIN_DB] = MIN_DB
# Average over weighted frequencies
return torch.from_numpy(weighted.mean(axis=0)).float().to(device)[None]
def perceptual_weights():
"""A-weighted frequency-dependent perceptual loudness weights"""
frequencies = librosa.fft_frequencies(sr=crepe.SAMPLE_RATE,
n_fft=crepe.WINDOW_SIZE)
# A warning is raised for nearly inaudible frequencies, but it ends up
# defaulting to -100 db. That default is fine for our purposes.
with warnings.catch_warnings():
warnings.simplefilter('ignore', RuntimeWarning)
return librosa.A_weighting(frequencies)[:, None] - REF_DB
|