sovits-test / crepe /loudness.py
atsushieee's picture
Upload folder using huggingface_hub
9791162
raw
history blame
2.31 kB
import warnings
import librosa
import numpy as np
import resampy
import torch
import crepe
###############################################################################
# Constants
###############################################################################
# Minimum decibel level
MIN_DB = -100.
# Reference decibel level
REF_DB = 20.
###############################################################################
# A-weighted loudness
###############################################################################
def a_weighted(audio, sample_rate, hop_length=None, pad=True):
"""Retrieve the per-frame loudness"""
# Save device
device = audio.device
# Default hop length of 10 ms
hop_length = sample_rate // 100 if hop_length is None else hop_length
# Convert to numpy
audio = audio.detach().cpu().numpy().squeeze(0)
# Resample
if sample_rate != crepe.SAMPLE_RATE:
audio = resampy.resample(audio, sample_rate, crepe.SAMPLE_RATE)
hop_length = int(hop_length * crepe.SAMPLE_RATE / sample_rate)
# Cache weights
if not hasattr(a_weighted, 'weights'):
a_weighted.weights = perceptual_weights()
# Take stft
stft = librosa.stft(audio,
n_fft=crepe.WINDOW_SIZE,
hop_length=hop_length,
win_length=crepe.WINDOW_SIZE,
center=pad,
pad_mode='constant')
# Compute magnitude on db scale
db = librosa.amplitude_to_db(np.abs(stft))
# Apply A-weighting
weighted = db + a_weighted.weights
# Threshold
weighted[weighted < MIN_DB] = MIN_DB
# Average over weighted frequencies
return torch.from_numpy(weighted.mean(axis=0)).float().to(device)[None]
def perceptual_weights():
"""A-weighted frequency-dependent perceptual loudness weights"""
frequencies = librosa.fft_frequencies(sr=crepe.SAMPLE_RATE,
n_fft=crepe.WINDOW_SIZE)
# A warning is raised for nearly inaudible frequencies, but it ends up
# defaulting to -100 db. That default is fine for our purposes.
with warnings.catch_warnings():
warnings.simplefilter('ignore', RuntimeWarning)
return librosa.A_weighting(frequencies)[:, None] - REF_DB