Spaces:
Running
Running
| # Copyright (c) 2023 Amphion. | |
| # | |
| # This source code is licensed under the MIT license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| import torch | |
| import librosa | |
| from utils.util import JsonHParams | |
| from utils.f0 import get_f0_features_using_parselmouth, get_pitch_sub_median | |
| from utils.mel import extract_mel_features | |
| def extract_spr( | |
| audio, | |
| fs=None, | |
| hop_length=256, | |
| win_length=1024, | |
| n_fft=1024, | |
| n_mels=128, | |
| f0_min=37, | |
| f0_max=1000, | |
| pitch_bin=256, | |
| pitch_max=1100.0, | |
| pitch_min=50.0, | |
| ): | |
| """Compute Singing Power Ratio (SPR) from a given audio. | |
| audio: path to the audio. | |
| fs: sampling rate. | |
| hop_length: hop length. | |
| win_length: window length. | |
| n_mels: number of mel filters. | |
| f0_min: lower limit for f0. | |
| f0_max: upper limit for f0. | |
| pitch_bin: number of bins for f0 quantization. | |
| pitch_max: upper limit for f0 quantization. | |
| pitch_min: lower limit for f0 quantization. | |
| """ | |
| # Load audio | |
| if fs != None: | |
| audio, _ = librosa.load(audio, sr=fs) | |
| else: | |
| audio, fs = librosa.load(audio) | |
| audio = torch.from_numpy(audio) | |
| # Initialize config | |
| cfg = JsonHParams() | |
| cfg.sample_rate = fs | |
| cfg.hop_size = hop_length | |
| cfg.win_size = win_length | |
| cfg.n_fft = n_fft | |
| cfg.n_mel = n_mels | |
| cfg.f0_min = f0_min | |
| cfg.f0_max = f0_max | |
| cfg.pitch_bin = pitch_bin | |
| cfg.pitch_max = pitch_max | |
| cfg.pitch_min = pitch_min | |
| # Extract mel spectrograms | |
| cfg.fmin = 2000 | |
| cfg.fmax = 4000 | |
| mel1 = extract_mel_features( | |
| y=audio.unsqueeze(0), | |
| cfg=cfg, | |
| ).squeeze(0) | |
| cfg.fmin = 0 | |
| cfg.fmax = 2000 | |
| mel2 = extract_mel_features( | |
| y=audio.unsqueeze(0), | |
| cfg=cfg, | |
| ).squeeze(0) | |
| f0 = get_f0_features_using_parselmouth( | |
| audio, | |
| cfg, | |
| )[0] | |
| # Mel length alignment | |
| length = min(len(f0), mel1.shape[-1]) | |
| f0 = f0[:length] | |
| mel1 = mel1[:, :length] | |
| mel2 = mel2[:, :length] | |
| # Compute SPR | |
| res = [] | |
| for i in range(mel1.shape[-1]): | |
| if f0[i] <= 1: | |
| continue | |
| chunk1 = mel1[:, i] | |
| chunk2 = mel2[:, i] | |
| max1 = max(chunk1.numpy()) | |
| max2 = max(chunk2.numpy()) | |
| tmp_res = max2 - max1 | |
| res.append(tmp_res) | |
| if len(res) == 0: | |
| return False | |
| else: | |
| return sum(res) / len(res) | |