Spaces:

mattricesound
/

RemFx

Runtime error

App Files Files Community

mattricesound commited on Mar 15, 2023

Commit

f3350b1

1 Parent(s): 15b101a

Add TCN

Browse files

Files changed (4) hide show

cfg/model/tcn.yaml +27 -0
remfx/models.py +31 -0
remfx/tcn.py +145 -0
remfx/utils.py +12 -0

cfg/model/tcn.yaml ADDED Viewed

	@@ -0,0 +1,27 @@

+# @package _global_
+model:
+  _target_: remfx.models.RemFX
+  lr: 1e-4
+  lr_beta1: 0.95
+  lr_beta2: 0.999
+  lr_eps: 1e-6
+  lr_weight_decay: 1e-3
+  sample_rate: ${sample_rate}
+  network:
+    _target_: remfx.models.TCNModel
+    ninputs: 1
+    noutputs: 1
+    nblocks: 4
+    channel_growth: 0
+    channel_width: 32
+    kernel_size: 13
+    stack_size: 10
+    dilation_growth: 10
+    condition: False
+    latent_dim: 2
+    norm_type: "identity"
+    causal: False
+    estimate_loudness: False
+    sample_rate: ${sample_rate}
+    num_bins: 1025

remfx/models.py CHANGED Viewed

@@ -12,6 +12,7 @@ from umx.openunmix.model import OpenUnmix, Separator
 from remfx.utils import FADLoss, spectrogram
 from remfx.dptnet import DPTNet_base
 from remfx.dcunet import RefineSpectrogramUnet
 class RemFX(pl.LightningModule):
@@ -240,6 +241,36 @@ class DCUNetModel(nn.Module):
         return output
 class FXClassifier(pl.LightningModule):
     def __init__(
         self,

 from remfx.utils import FADLoss, spectrogram
 from remfx.dptnet import DPTNet_base
 from remfx.dcunet import RefineSpectrogramUnet
+from remfx.tcn import TCN
 class RemFX(pl.LightningModule):
         return output
+class TCNModel(nn.Module):
+    def __init__(self, sample_rate, num_bins, **kwargs):
+        super().__init__()
+        self.model = TCN(**kwargs)
+        self.mrstftloss = MultiResolutionSTFTLoss(
+            n_bins=num_bins, sample_rate=sample_rate
+        )
+        self.l1loss = nn.L1Loss()
+    def forward(self, batch):
+        x, target = batch
+        output = self.model(x)  # B x 1 x T
+        # Pad or crop to match target
+        if output.shape[-1] > x.shape[-1]:
+            output = output[:, : x.shape[-1]]
+        elif output.shape[-1] < x.shape[-1]:
+            output = F.pad(output, (0, x.shape[-1] - output.shape[-1]))
+        loss = self.mrstftloss(output, target) + self.l1loss(output, target) * 100
+        return loss, output
+    def sample(self, x: Tensor) -> Tensor:
+        output = self.model(x)  # B x 1 x T
+        # Pad or crop to match target
+        if output.shape[-1] > x.shape[-1]:
+            output = output[:, : x.shape[-1]]
+        elif output.shape[-1] < x.shape[-1]:
+            output = F.pad(output, (0, x.shape[-1] - output.shape[-1]))
+        return output
 class FXClassifier(pl.LightningModule):
     def __init__(
         self,

remfx/tcn.py ADDED Viewed

	@@ -0,0 +1,145 @@

+# This code is based on the following repository written by Christian J. Steinmetz
+# https://github.com/csteinmetz1/micro-tcn
+from typing import Callable
+import torch
+import torch.nn as nn
+from torch import Tensor
+from remfx.utils import causal_crop, center_crop
+class TCNBlock(nn.Module):
+    def __init__(
+        self,
+        in_ch: int,
+        out_ch: int,
+        kernel_size: int = 3,
+        dilation: int = 1,
+        stride: int = 1,
+        crop_fn: Callable = causal_crop,
+    ) -> None:
+        super().__init__()
+        self.in_ch = in_ch
+        self.out_ch = out_ch
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.crop_fn = crop_fn
+        # Assumes stride of 1
+        padding = (kernel_size + (kernel_size - 1) * (dilation - 1) - 1) // 2
+        self.conv1 = nn.Conv1d(
+            in_ch,
+            out_ch,
+            kernel_size,
+            stride=stride,
+            padding=0,
+            dilation=dilation,
+            bias=True,
+        )
+        # residual connection
+        self.res = nn.Conv1d(
+            in_ch,
+            out_ch,
+            kernel_size=1,
+            groups=1,
+            stride=stride,
+            bias=False,
+        )
+        self.relu = nn.PReLU(out_ch)
+    def forward(self, x: Tensor) -> Tensor:
+        x_in = x
+        x = self.conv1(x)
+        x = self.relu(x)
+        # residual
+        x_res = self.res(x_in)
+        # causal crop
+        x = x + self.crop_fn(x_res, x.shape[-1])
+        return x
+class TCN(nn.Module):
+    def __init__(
+        self,
+        ninputs: int = 1,
+        noutputs: int = 1,
+        nblocks: int = 4,
+        channel_growth: int = 0,
+        channel_width: int = 32,
+        kernel_size: int = 13,
+        stack_size: int = 10,
+        dilation_growth: int = 10,
+        condition: bool = False,
+        latent_dim: int = 2,
+        norm_type: str = "identity",
+        causal: bool = False,
+        estimate_loudness: bool = False,
+    ) -> None:
+        super().__init__()
+        self.ninputs = ninputs
+        self.noutputs = noutputs
+        self.nblocks = nblocks
+        self.channel_growth = channel_growth
+        self.channel_width = channel_width
+        self.kernel_size = kernel_size
+        self.stack_size = stack_size
+        self.dilation_growth = dilation_growth
+        self.condition = condition
+        self.latent_dim = latent_dim
+        self.norm_type = norm_type
+        self.causal = causal
+        self.estimate_loudness = estimate_loudness
+        print(f"Causal: {self.causal}")
+        if self.causal:
+            self.crop_fn = causal_crop
+        else:
+            self.crop_fn = center_crop
+        if estimate_loudness:
+            self.loudness = torch.nn.Linear(latent_dim, 1)
+        # audio model
+        self.process_blocks = torch.nn.ModuleList()
+        out_ch = -1
+        for n in range(nblocks):
+            in_ch = out_ch if n > 0 else ninputs
+            out_ch = in_ch * channel_growth if channel_growth > 1 else channel_width
+            dilation = dilation_growth ** (n % stack_size)
+            self.process_blocks.append(
+                TCNBlock(
+                    in_ch,
+                    out_ch,
+                    kernel_size,
+                    dilation,
+                    stride=1,
+                    crop_fn=self.crop_fn,
+                )
+            )
+        self.output = nn.Conv1d(out_ch, noutputs, kernel_size=1)
+        # model configuration
+        self.receptive_field = self.compute_receptive_field()
+        self.block_size = 2048
+        self.buffer = torch.zeros(2, self.receptive_field + self.block_size - 1)
+    def forward(self, x: Tensor) -> Tensor:
+        x_in = x
+        for _, block in enumerate(self.process_blocks):
+            x = block(x)
+        # y_hat = torch.tanh(self.output(x))
+        x_in = causal_crop(x_in, x.shape[-1])
+        gain_ln = self.output(x)
+        y_hat = torch.tanh(gain_ln * x_in)
+        return y_hat
+    def compute_receptive_field(self):
+        """Compute the receptive field in samples."""
+        rf = self.kernel_size
+        for n in range(1, self.nblocks):
+            dilation = self.dilation_growth ** (n % self.stack_size)
+            rf = rf + ((self.kernel_size - 1) * dilation)
+        return rf

remfx/utils.py CHANGED Viewed

@@ -204,3 +204,15 @@ def concat_complex(a: torch.tensor, b: torch.tensor, dim: int = 1) -> torch.tens
     a_real, a_img = a.chunk(2, dim)
     b_real, b_img = b.chunk(2, dim)
     return torch.cat([a_real, b_real, a_img, b_img], dim=dim)

     a_real, a_img = a.chunk(2, dim)
     b_real, b_img = b.chunk(2, dim)
     return torch.cat([a_real, b_real, a_img, b_img], dim=dim)
+def center_crop(x, length: int):
+    start = (x.shape[-1] - length) // 2
+    stop = start + length
+    return x[..., start:stop]
+def causal_crop(x, length: int):
+    stop = x.shape[-1] - 1
+    start = stop - length
+    return x[..., start:stop]