Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
# Copyright (c) Facebook, Inc. and its affiliates. | |
# All rights reserved. | |
# | |
# This source code is licensed under the license found in the | |
# LICENSE file in the root directory of this source tree. | |
# Original copyright 2019 Tomoki Hayashi | |
# MIT License (https://opensource.org/licenses/MIT) | |
import torch | |
import torch.nn.functional as F | |
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
"""STFT-based Loss modules.""" | |
def stft(x, fft_size, hop_size, win_length, window): | |
"""Perform STFT and convert to magnitude spectrogram. | |
Args: | |
x (Tensor): Input signal tensor (B, T). | |
fft_size (int): FFT size. | |
hop_size (int): Hop size. | |
win_length (int): Window length. | |
window (str): Window function type. | |
Returns: | |
Tensor: Magnitude spectrogram (B, #frames, fft_size // 2 + 1). | |
""" | |
x_stft = torch.stft(x[:, 0, :], fft_size, hop_size, win_length, window, return_complex=True) | |
x_stft = torch.view_as_real(x_stft) | |
real = x_stft[..., 0] | |
imag = x_stft[..., 1] | |
# NOTE(kan-bayashi): clamp is needed to avoid nan or inf | |
return torch.sqrt(torch.clamp(real ** 2 + imag ** 2, min=1e-7)).transpose(2, 1) | |
class SpectralConvergengeLoss(torch.nn.Module): | |
"""Spectral convergence loss module.""" | |
def __init__(self): | |
"""Initilize spectral convergence loss module.""" | |
super(SpectralConvergengeLoss, self).__init__() | |
def forward(self, x_mag, y_mag): | |
"""Calculate forward propagation. | |
Args: | |
x_mag (Tensor): Magnitude spectrogram of predicted signal (B, #frames, #freq_bins). | |
y_mag (Tensor): Magnitude spectrogram of groundtruth signal (B, #frames, #freq_bins). | |
Returns: | |
Tensor: Spectral convergence loss value. | |
""" | |
return torch.norm(y_mag - x_mag, p="fro") / torch.norm(y_mag, p="fro") | |
class LogSTFTMagnitudeLoss(torch.nn.Module): | |
"""Log STFT magnitude loss module.""" | |
def __init__(self): | |
"""Initilize los STFT magnitude loss module.""" | |
super(LogSTFTMagnitudeLoss, self).__init__() | |
def forward(self, x_mag, y_mag): | |
"""Calculate forward propagation. | |
Args: | |
x_mag (Tensor): Magnitude spectrogram of predicted signal (B, #frames, #freq_bins). | |
y_mag (Tensor): Magnitude spectrogram of groundtruth signal (B, #frames, #freq_bins). | |
Returns: | |
Tensor: Log STFT magnitude loss value. | |
""" | |
return F.l1_loss(torch.log(y_mag), torch.log(x_mag)) | |
class STFTLoss(torch.nn.Module): | |
"""STFT loss module.""" | |
def __init__(self, fft_size=1024, shift_size=120, win_length=600, window="hann_window"): | |
"""Initialize STFT loss module.""" | |
super(STFTLoss, self).__init__() | |
self.fft_size = fft_size | |
self.shift_size = shift_size | |
self.win_length = win_length | |
self.register_buffer("window", getattr(torch, window)(win_length)) | |
self.spectral_convergenge_loss = SpectralConvergengeLoss() | |
self.log_stft_magnitude_loss = LogSTFTMagnitudeLoss() | |
def forward(self, x, y): | |
"""Calculate forward propagation. | |
Args: | |
x (Tensor): Predicted signal (B, T). | |
y (Tensor): Groundtruth signal (B, T). | |
Returns: | |
Tensor: Spectral convergence loss value. | |
Tensor: Log STFT magnitude loss value. | |
""" | |
x_mag = stft(x, self.fft_size, self.shift_size, self.win_length, self.window) | |
y_mag = stft(y, self.fft_size, self.shift_size, self.win_length, self.window) | |
sc_loss = self.spectral_convergenge_loss(x_mag, y_mag) | |
mag_loss = self.log_stft_magnitude_loss(x_mag, y_mag) | |
return sc_loss, mag_loss | |
class MultiResolutionSTFTLoss(torch.nn.Module): | |
"""Multi resolution STFT loss module.""" | |
def __init__(self, | |
fft_sizes=(1024, 2048, 512), | |
hop_sizes=(120, 240, 50), | |
win_lengths=(600, 1200, 240), | |
window="hann_window", factor_sc=0.1, factor_mag=0.1): | |
"""Initialize Multi resolution STFT loss module. | |
Args: | |
fft_sizes (list): List of FFT sizes. | |
hop_sizes (list): List of hop sizes. | |
win_lengths (list): List of window lengths. | |
window (str): Window function type. | |
factor (float): a balancing factor across different losses. | |
""" | |
super(MultiResolutionSTFTLoss, self).__init__() | |
assert len(fft_sizes) == len(hop_sizes) == len(win_lengths) | |
self.stft_losses = torch.nn.ModuleList() | |
for fs, ss, wl in zip(fft_sizes, hop_sizes, win_lengths): | |
self.stft_losses += [STFTLoss(fs, ss, wl, window)] | |
self.factor_sc = factor_sc | |
self.factor_mag = factor_mag | |
def forward(self, x, y): | |
"""Calculate forward propagation. | |
Args: | |
x (Tensor): Predicted signal (B, T). | |
y (Tensor): Groundtruth signal (B, T). | |
Returns: | |
Tensor: Multi resolution spectral convergence loss value. | |
Tensor: Multi resolution log STFT magnitude loss value. | |
""" | |
sc_loss = 0.0 | |
mag_loss = 0.0 | |
for f in self.stft_losses: | |
sc_l, mag_l = f(x, y) | |
sc_loss += sc_l | |
mag_loss += mag_l | |
sc_loss /= len(self.stft_losses) | |
mag_loss /= len(self.stft_losses) | |
return self.factor_sc*sc_loss, self.factor_mag*mag_loss | |
class L1_Multi_STFT(torch.nn.Module): | |
"""STFT loss module.""" | |
def __init__(self): | |
"""Initialize STFT loss module.""" | |
super(L1_Multi_STFT, self).__init__() | |
self.multi_STFT_loss = MultiResolutionSTFTLoss() | |
self.l1_loss = torch.nn.L1Loss() | |
def forward(self, x, y): | |
"""Calculate forward propagation. | |
Args: | |
x (Tensor): Predicted signal (B, T). | |
y (Tensor): Groundtruth signal (B, T). | |
Returns: | |
Tensor: Spectral convergence loss value. | |
Tensor: Log STFT magnitude loss value. | |
""" | |
sc_loss, mag_loss = self.multi_STFT_loss(x, y) | |
l1_loss = self.l1_loss(x, y) | |
return sc_loss + mag_loss + l1_loss | |
LOSSES = { | |
'mse': torch.nn.MSELoss(), | |
'L1': torch.nn.L1Loss(), | |
'Multi_STFT': MultiResolutionSTFTLoss(), | |
'L1_Multi_STFT': L1_Multi_STFT() | |
} | |
def get_loss(loss_config, device): | |
return LOSSES[loss_config['name']].to(device) | |