RemFx

Sleeping

File size: 22,920 Bytes

import torch
import torchaudio
import numpy as np
import scipy.signal
import scipy.stats
import pyloudnorm as pyln
from torchvision.transforms import Compose, RandomApply


from typing import List
from pedalboard import (
    Pedalboard,
    Chorus,
    Reverb,
    Compressor,
    Phaser,
    Delay,
    Distortion,
    Limiter,
)

__all__ = []


def loguniform(low=0, high=1):
    return scipy.stats.loguniform.rvs(low, high)


def rand(low=0, high=1):
    return (torch.rand(1).numpy()[0] * (high - low)) + low


def randint(low=0, high=1):
    return torch.randint(low, high + 1, (1,)).numpy()[0]


def biqaud(
    gain_db: float,
    cutoff_freq: float,
    q_factor: float,
    sample_rate: float,
    filter_type: str,
):
    """Use design parameters to generate coeffieicnets for a specific filter type.
    Args:
        gain_db (float): Shelving filter gain in dB.
        cutoff_freq (float): Cutoff frequency in Hz.
        q_factor (float): Q factor.
        sample_rate (float): Sample rate in Hz.
        filter_type (str): Filter type.
            One of "low_shelf", "high_shelf", or "peaking"
    Returns:
        b (np.ndarray): Numerator filter coefficients stored as [b0, b1, b2]
        a (np.ndarray): Denominator filter coefficients stored as [a0, a1, a2]
    """

    A = 10 ** (gain_db / 40.0)
    w0 = 2.0 * np.pi * (cutoff_freq / sample_rate)
    alpha = np.sin(w0) / (2.0 * q_factor)

    cos_w0 = np.cos(w0)
    sqrt_A = np.sqrt(A)

    if filter_type == "high_shelf":
        b0 = A * ((A + 1) + (A - 1) * cos_w0 + 2 * sqrt_A * alpha)
        b1 = -2 * A * ((A - 1) + (A + 1) * cos_w0)
        b2 = A * ((A + 1) + (A - 1) * cos_w0 - 2 * sqrt_A * alpha)
        a0 = (A + 1) - (A - 1) * cos_w0 + 2 * sqrt_A * alpha
        a1 = 2 * ((A - 1) - (A + 1) * cos_w0)
        a2 = (A + 1) - (A - 1) * cos_w0 - 2 * sqrt_A * alpha
    elif filter_type == "low_shelf":
        b0 = A * ((A + 1) - (A - 1) * cos_w0 + 2 * sqrt_A * alpha)
        b1 = 2 * A * ((A - 1) - (A + 1) * cos_w0)
        b2 = A * ((A + 1) - (A - 1) * cos_w0 - 2 * sqrt_A * alpha)
        a0 = (A + 1) + (A - 1) * cos_w0 + 2 * sqrt_A * alpha
        a1 = -2 * ((A - 1) + (A + 1) * cos_w0)
        a2 = (A + 1) + (A - 1) * cos_w0 - 2 * sqrt_A * alpha
    elif filter_type == "peaking":
        b0 = 1 + alpha * A
        b1 = -2 * cos_w0
        b2 = 1 - alpha * A
        a0 = 1 + alpha / A
        a1 = -2 * cos_w0
        a2 = 1 - alpha / A
    else:
        pass

    b = np.array([b0, b1, b2]) / a0
    a = np.array([a0, a1, a2]) / a0

    return b, a


def parametric_eq(
    x: np.ndarray,
    sample_rate: float,
    low_shelf_gain_db: float = 0.0,
    low_shelf_cutoff_freq: float = 80.0,
    low_shelf_q_factor: float = 0.707,
    band_gains_db: List[float] = [0.0],
    band_cutoff_freqs: List[float] = [300.0],
    band_q_factors: List[float] = [0.707],
    high_shelf_gain_db: float = 0.0,
    high_shelf_cutoff_freq: float = 1000.0,
    high_shelf_q_factor: float = 0.707,
    dtype=np.float32,
):
    """Multiband parametric EQ.
    Low-shelf -> Band 1 -> ... -> Band N -> High-shelf
    Args:
    """
    assert (
        len(band_gains_db) == len(band_cutoff_freqs) == len(band_q_factors)
    )  # must define for all bands

    # -------- apply low-shelf filter --------
    b, a = biqaud(
        low_shelf_gain_db,
        low_shelf_cutoff_freq,
        low_shelf_q_factor,
        sample_rate,
        "low_shelf",
    )
    x = scipy.signal.lfilter(b, a, x)

    # -------- apply peaking filters --------
    for gain_db, cutoff_freq, q_factor in zip(
        band_gains_db, band_cutoff_freqs, band_q_factors
    ):
        b, a = biqaud(
            gain_db,
            cutoff_freq,
            q_factor,
            sample_rate,
            "peaking",
        )
        x = scipy.signal.lfilter(b, a, x)

    # -------- apply high-shelf filter --------
    b, a = biqaud(
        high_shelf_gain_db,
        high_shelf_cutoff_freq,
        high_shelf_q_factor,
        sample_rate,
        "high_shelf",
    )
    sos5 = np.concatenate((b, a))
    x = scipy.signal.lfilter(b, a, x)

    return x.astype(dtype)


class RandomParametricEQ(torch.nn.Module):
    def __init__(
        self,
        sample_rate: float,
        num_bands: int = 3,
        min_gain_db: float = -6.0,
        max_gain_db: float = +6.0,
        min_cutoff_freq: float = 1000.0,
        max_cutoff_freq: float = 10000.0,
        min_q_factor: float = 0.1,
        max_q_factor: float = 4.0,
    ):
        super().__init__()
        self.sample_rate = sample_rate
        self.num_bands = num_bands
        self.min_gain_db = min_gain_db
        self.max_gain_db = max_gain_db
        self.min_cutoff_freq = min_cutoff_freq
        self.max_cutoff_freq = max_cutoff_freq
        self.min_q_factor = min_q_factor
        self.max_q_factor = max_q_factor

    def forward(self, x: torch.Tensor):
        """
        Args:
            x: (torch.Tensor): Array of audio samples with shape (chs, seq_leq).
                The filter will be applied the final dimension, and by default the same
                filter will be applied to all channels.
        """
        low_shelf_gain_db = rand(self.min_gain_db, self.max_gain_db)
        low_shelf_cutoff_freq = loguniform(20.0, 200.0)
        low_shelf_q_factor = rand(self.min_q_factor, self.max_q_factor)

        high_shelf_gain_db = rand(self.min_gain_db, self.max_gain_db)
        high_shelf_cutoff_freq = loguniform(8000.0, 16000.0)
        high_shelf_q_factor = rand(self.min_q_factor, self.max_q_factor)

        band_gain_dbs = []
        band_cutoff_freqs = []
        band_q_factors = []
        for _ in range(self.num_bands):
            band_gain_dbs.append(rand(self.min_gain_db, self.max_gain_db))
            band_cutoff_freqs.append(
                loguniform(self.min_cutoff_freq, self.max_cutoff_freq)
            )
            band_q_factors.append(rand(self.min_q_factor, self.max_q_factor))

        y = parametric_eq(
            x.numpy(),
            self.sample_rate,
            low_shelf_gain_db=low_shelf_gain_db,
            low_shelf_cutoff_freq=low_shelf_cutoff_freq,
            low_shelf_q_factor=low_shelf_q_factor,
            band_gains_db=band_gain_dbs,
            band_cutoff_freqs=band_cutoff_freqs,
            band_q_factors=band_q_factors,
            high_shelf_gain_db=high_shelf_gain_db,
            high_shelf_cutoff_freq=high_shelf_cutoff_freq,
            high_shelf_q_factor=high_shelf_q_factor,
        )

        return torch.from_numpy(y)


def stereo_widener(x: torch.Tensor, width: torch.Tensor):
    sqrt2 = np.sqrt(2)

    left = x[0, ...]
    right = x[1, ...]

    mid = (left + right) / sqrt2
    side = (left - right) / sqrt2

    # amplify mid and side signal seperately:
    mid *= 2 * (1 - width)
    side *= 2 * width

    left = (mid + side) / sqrt2
    right = (mid - side) / sqrt2

    x = torch.stack((left, right), dim=0)

    return x


class RandomStereoWidener(torch.nn.Module):
    def __init__(
        self,
        sample_rate: float,
        min_width: float = 0.0,
        max_width: float = 1.0,
    ) -> None:
        super().__init__()
        self.sample_rate = sample_rate
        self.min_width = min_width
        self.max_width = max_width

    def forward(self, x: torch.Tensor):
        width = rand(self.min_width, self.max_width)
        return stereo_widener(x, width)


class RandomVolumeAutomation(torch.nn.Module):
    def __init__(
        self,
        sample_rate: float,
        min_segments: int = 1,
        max_segments: int = 3,
        min_gain_db: float = -6.0,
        max_gain_db: float = 6.0,
    ) -> None:
        super().__init__()
        self.sample_rate = sample_rate
        self.min_segments = min_segments
        self.max_segments = max_segments
        self.min_gain_db = min_gain_db
        self.max_gain_db = max_gain_db

    def forward(self, x: torch.Tensor):
        gain_db = torch.zeros(x.shape[-1]).type_as(x)

        num_segments = randint(self.min_segments, self.max_segments)
        segment_lengths = (
            x.shape[-1]
            * np.random.dirichlet([rand(0, 10) for _ in range(num_segments)], 1)
        ).astype("int")[0]

        samples_filled = 0
        start_gain_db = 0
        for idx in range(num_segments):
            segment_samples = segment_lengths[idx]
            if idx != 0:
                start_gain_db = end_gain_db

            # sample random end gain
            end_gain_db = rand(self.min_gain_db, self.max_gain_db)
            fade = torch.linspace(start_gain_db, end_gain_db, steps=segment_samples)
            gain_db[samples_filled : samples_filled + segment_samples] = fade
            samples_filled = samples_filled + segment_samples

        x *= 10 ** (gain_db / 20.0)
        return x


class RandomPedalboardCompressor(torch.nn.Module):
    def __init__(
        self,
        sample_rate: float,
        min_threshold_db: float = -42.0,
        max_threshold_db: float = -6.0,
        min_ratio: float = 1.5,
        max_ratio: float = 4.0,
        min_attack_ms: float = 1.0,
        max_attack_ms: float = 50.0,
        min_release_ms: float = 10.0,
        max_release_ms: float = 250.0,
    ) -> None:
        super().__init__()
        self.sample_rate = sample_rate
        self.min_threshold_db = min_threshold_db
        self.max_threshold_db = max_threshold_db
        self.min_ratio = min_ratio
        self.max_ratio = max_ratio
        self.min_attack_ms = min_attack_ms
        self.max_attack_ms = max_attack_ms
        self.min_release_ms = min_release_ms
        self.max_release_ms = max_release_ms

    def forward(self, x: torch.Tensor):
        board = Pedalboard()
        threshold_db = rand(self.min_threshold_db, self.max_threshold_db)
        ratio = rand(self.min_ratio, self.max_ratio)
        attack_ms = rand(self.min_attack_ms, self.max_attack_ms)
        release_ms = rand(self.min_release_ms, self.max_release_ms)

        board.append(
            Compressor(
                threshold_db=threshold_db,
                ratio=ratio,
                attack_ms=attack_ms,
                release_ms=release_ms,
            )
        )

        # process audio using the pedalboard
        return torch.from_numpy(board(x.numpy(), self.sample_rate))


class RandomPedalboardDelay(torch.nn.Module):
    def __init__(
        self,
        sample_rate: float,
        min_delay_seconds: float = 0.1,
        max_delay_sconds: float = 1.0,
        min_feedback: float = 0.05,
        max_feedback: float = 0.6,
        min_mix: float = 0.0,
        max_mix: float = 0.7,
    ) -> None:
        super().__init__()
        self.sample_rate = sample_rate
        self.min_delay_seconds = min_delay_seconds
        self.max_delay_seconds = max_delay_sconds
        self.min_feedback = min_feedback
        self.max_feedback = max_feedback
        self.min_mix = min_mix
        self.max_mix = max_mix

    def forward(self, x: torch.Tensor):
        board = Pedalboard()
        delay_seconds = loguniform(self.min_delay_seconds, self.max_delay_seconds)
        feedback = rand(self.min_feedback, self.max_feedback)
        mix = rand(self.min_mix, self.max_mix)
        board.append(Delay(delay_seconds=delay_seconds, feedback=feedback, mix=mix))
        return torch.from_numpy(board(x.numpy(), self.sample_rate))


class RandomPedalboardChorus(torch.nn.Module):
    def __init__(
        self,
        sample_rate: float,
        min_rate_hz: float = 0.25,
        max_rate_hz: float = 4.0,
        min_depth: float = 0.0,
        max_depth: float = 0.6,
        min_centre_delay_ms: float = 5.0,
        max_centre_delay_ms: float = 10.0,
        min_feedback: float = 0.1,
        max_feedback: float = 0.6,
        min_mix: float = 0.1,
        max_mix: float = 0.7,
    ) -> None:
        super().__init__()
        self.sample_rate = sample_rate
        self.min_rate_hz = min_rate_hz
        self.max_rate_hz = max_rate_hz
        self.min_depth = min_depth
        self.max_depth = max_depth
        self.min_centre_delay_ms = min_centre_delay_ms
        self.max_centre_delay_ms = max_centre_delay_ms
        self.min_feedback = min_feedback
        self.max_feedback = max_feedback
        self.min_mix = min_mix
        self.max_mix = max_mix

    def forward(self, x: torch.Tensor):
        board = Pedalboard()
        rate_hz = rand(self.min_rate_hz, self.max_rate_hz)
        depth = rand(self.min_depth, self.max_depth)
        centre_delay_ms = rand(self.min_centre_delay_ms, self.max_centre_delay_ms)
        feedback = rand(self.min_feedback, self.max_feedback)
        mix = rand(self.min_mix, self.max_mix)
        board.append(
            Chorus(
                rate_hz=rate_hz,
                depth=depth,
                centre_delay_ms=centre_delay_ms,
                feedback=feedback,
                mix=mix,
            )
        )
        # process audio using the pedalboard
        return torch.from_numpy(board(x.numpy(), self.sample_rate))


class RandomPedalboardPhaser(torch.nn.Module):
    def __init__(
        self,
        sample_rate: float,
        min_rate_hz: float = 0.25,
        max_rate_hz: float = 5.0,
        min_depth: float = 0.1,
        max_depth: float = 0.6,
        min_centre_frequency_hz: float = 200.0,
        max_centre_frequency_hz: float = 600.0,
        min_feedback: float = 0.1,
        max_feedback: float = 0.6,
        min_mix: float = 0.1,
        max_mix: float = 0.7,
    ) -> None:
        super().__init__()
        self.sample_rate = sample_rate
        self.min_rate_hz = min_rate_hz
        self.max_rate_hz = max_rate_hz
        self.min_depth = min_depth
        self.max_depth = max_depth
        self.min_centre_frequency_hz = min_centre_frequency_hz
        self.max_centre_frequency_hz = max_centre_frequency_hz
        self.min_feedback = min_feedback
        self.max_feedback = max_feedback
        self.min_mix = min_mix
        self.max_mix = max_mix

    def forward(self, x: torch.Tensor):
        board = Pedalboard()
        rate_hz = rand(self.min_rate_hz, self.max_rate_hz)
        depth = rand(self.min_depth, self.max_depth)
        centre_frequency_hz = rand(
            self.min_centre_frequency_hz, self.min_centre_frequency_hz
        )
        feedback = rand(self.min_feedback, self.max_feedback)
        mix = rand(self.min_mix, self.max_mix)
        board.append(
            Phaser(
                rate_hz=rate_hz,
                depth=depth,
                centre_frequency_hz=centre_frequency_hz,
                feedback=feedback,
                mix=mix,
            )
        )
        # process audio using the pedalboard
        return torch.from_numpy(board(x.numpy(), self.sample_rate))


class RandomPedalboardLimiter(torch.nn.Module):
    def __init__(
        self,
        sample_rate: float,
        min_threshold_db: float = -32.0,
        max_threshold_db: float = -6.0,
        min_release_ms: float = 10.0,
        max_release_ms: float = 300.0,
    ) -> None:
        super().__init__()
        self.sample_rate = sample_rate
        self.min_threshold_db = min_threshold_db
        self.max_threshold_db = max_threshold_db
        self.min_release_ms = min_release_ms
        self.max_release_ms = max_release_ms

    def forward(self, x: torch.Tensor):
        board = Pedalboard()
        threshold_db = rand(self.min_threshold_db, self.max_threshold_db)
        release_ms = rand(self.min_release_ms, self.max_release_ms)
        board.append(
            Limiter(
                threshold_db=threshold_db,
                release_ms=release_ms,
            )
        )
        return torch.from_numpy(board(x.numpy(), self.sample_rate))


class RandomPedalboardDistortion(torch.nn.Module):
    def __init__(
        self,
        sample_rate: float,
        min_drive_db: float = -20.0,
        max_drive_db: float = 12.0,
    ):
        super().__init__()
        self.sample_rate = sample_rate
        self.min_drive_db = min_drive_db
        self.max_drive_db = max_drive_db

    def forward(self, x: torch.Tensor):
        board = Pedalboard()
        drive_db = rand(self.min_drive_db, self.max_drive_db)
        board.append(Distortion(drive_db=drive_db))
        return torch.from_numpy(board(x.numpy(), self.sample_rate))


class RandomSoxReverb(torch.nn.Module):
    def __init__(
        self,
        sample_rate: float,
        min_reverberance: float = 10.0,
        max_reverberance: float = 100.0,
        min_high_freq_damping: float = 0.0,
        max_high_freq_damping: float = 100.0,
        min_wet_dry: float = 0.0,
        max_wet_dry: float = 1.0,
        min_room_scale: float = 5.0,
        max_room_scale: float = 100.0,
        min_stereo_depth: float = 20.0,
        max_stereo_depth: float = 100.0,
        min_pre_delay: float = 0.0,
        max_pre_delay: float = 100.0,
    ) -> None:
        super().__init__()
        self.sample_rate = sample_rate
        self.min_reverberance = min_reverberance
        self.max_reverberance = max_reverberance
        self.min_high_freq_damping = min_high_freq_damping
        self.max_high_freq_damping = max_high_freq_damping
        self.min_wet_dry = min_wet_dry
        self.max_wet_dry = max_wet_dry
        self.min_room_scale = min_room_scale
        self.max_room_scale = max_room_scale
        self.min_stereo_depth = min_stereo_depth
        self.max_stereo_depth = max_stereo_depth
        self.min_pre_delay = min_pre_delay
        self.max_pre_delay = max_pre_delay

    def forward(self, x: torch.Tensor):
        reverberance = rand(self.min_reverberance, self.max_reverberance)
        high_freq_damping = rand(self.min_high_freq_damping, self.max_high_freq_damping)
        room_scale = rand(self.min_room_scale, self.max_room_scale)
        stereo_depth = rand(self.min_stereo_depth, self.max_stereo_depth)
        wet_dry = rand(self.min_wet_dry, self.max_wet_dry)
        pre_delay = rand(self.min_pre_delay, self.max_pre_delay)

        effects = [
            [
                "reverb",
                f"{reverberance}",
                f"{high_freq_damping}",
                f"{room_scale}",
                f"{stereo_depth}",
                f"{pre_delay}",
                "--wet-only",
            ]
        ]
        y, _ = torchaudio.sox_effects.apply_effects_tensor(
            x, self.sample_rate, effects, channels_first=True
        )

        # manual wet/dry mix
        return (x * (1 - wet_dry)) + (y * wet_dry)


class RandomPedalboardReverb(torch.nn.Module):
    def __init__(
        self,
        sample_rate: float,
        min_room_size: float = 0.0,
        max_room_size: float = 1.0,
        min_damping: float = 0.0,
        max_damping: float = 1.0,
        min_wet_dry: float = 0.0,
        max_wet_dry: float = 0.7,
        min_width: float = 0.0,
        max_width: float = 1.0,
    ) -> None:
        super().__init__()
        self.sample_rate = sample_rate
        self.min_room_size = min_room_size
        self.max_room_size = max_room_size
        self.min_damping = min_damping
        self.max_damping = max_damping
        self.min_wet_dry = min_wet_dry
        self.max_wet_dry = max_wet_dry
        self.min_width = min_width
        self.max_width = max_width

    def forward(self, x: torch.Tensor):
        board = Pedalboard()
        room_size = rand(self.min_room_size, self.max_room_size)
        damping = rand(self.min_damping, self.max_damping)
        wet_dry = rand(self.min_wet_dry, self.max_wet_dry)
        width = rand(self.min_width, self.max_width)

        board.append(
            Reverb(
                room_size=room_size,
                damping=damping,
                wet_level=wet_dry,
                dry_level=(1 - wet_dry),
                width=width,
            )
        )

        return torch.from_numpy(board(x.numpy(), self.sample_rate))


class LoudnessNormalize(torch.nn.Module):
    def __init__(self, sample_rate: float, target_lufs_db: float = -32.0) -> None:
        super().__init__()
        self.meter = pyln.Meter(sample_rate)
        self.target_lufs_db = target_lufs_db

    def forward(self, x: torch.Tensor):
        x_lufs_db = self.meter.integrated_loudness(x.permute(1, 0).numpy())
        delta_lufs_db = torch.tensor([self.target_lufs_db - x_lufs_db]).float()
        gain_lin = 10.0 ** (delta_lufs_db.clamp(-120, 40.0) / 20.0)
        return gain_lin * x


class RandomAudioEffectsChannel(torch.nn.Module):
    def __init__(
        self,
        sample_rate: float,
        parametric_eq_prob: float = 0.7,
        distortion_prob: float = 0.01,
        delay_prob: float = 0.1,
        chorus_prob: float = 0.01,
        phaser_prob: float = 0.01,
        compressor_prob: float = 0.4,
        reverb_prob: float = 0.2,
        stereo_widener_prob: float = 0.3,
        limiter_prob: float = 0.3,
        vol_automation_prob: float = 0.7,
        target_lufs_db: float = -32.0,
    ) -> None:
        super().__init__()
        self.transforms = Compose(
            [
                RandomApply(
                    [RandomParametricEQ(sample_rate)],
                    p=parametric_eq_prob,
                ),
                RandomApply(
                    [RandomPedalboardDistortion(sample_rate)],
                    p=distortion_prob,
                ),
                RandomApply(
                    [RandomPedalboardDelay(sample_rate)],
                    p=delay_prob,
                ),
                RandomApply(
                    [RandomPedalboardChorus(sample_rate)],
                    p=chorus_prob,
                ),
                RandomApply(
                    [RandomPedalboardPhaser(sample_rate)],
                    p=phaser_prob,
                ),
                RandomApply(
                    [RandomPedalboardCompressor(sample_rate)],
                    p=compressor_prob,
                ),
                RandomApply(
                    [RandomPedalboardReverb(sample_rate)],
                    p=reverb_prob,
                ),
                RandomApply(
                    [RandomStereoWidener(sample_rate)],
                    p=stereo_widener_prob,
                ),
                RandomApply(
                    [RandomPedalboardLimiter(sample_rate)],
                    p=limiter_prob,
                ),
                RandomApply(
                    [RandomVolumeAutomation(sample_rate)],
                    p=vol_automation_prob,
                ),
                LoudnessNormalize(sample_rate, target_lufs_db=target_lufs_db),
            ]
        )

    def forward(self, x: torch.Tensor):
        return self.transforms(x)


Pedalboard_Effects = [
    RandomPedalboardReverb,
    RandomPedalboardChorus,
    RandomPedalboardDelay,
    RandomPedalboardDistortion,
    RandomPedalboardCompressor,
    # RandomPedalboardPhaser,
    # RandomPedalboardLimiter,
]