Spaces:

BorisovMaksim
/

denoising

Runtime error

App Files Files Community

BorisovMaksim commited on May 5, 2023

Commit

3f8f152

1 Parent(s): bd0a813

add hydra configs

Browse files

Files changed (16) hide show

conf/config.yaml +15 -0
conf/dataset/valentini.yaml +3 -0
conf/loss/mse.yaml +1 -0
conf/model/demucs.yaml +18 -0
conf/optimizer/adam.yaml +0 -0
conf/optimizer/sgd.yaml +3 -0
datasets.py +8 -1
denoisers/__init__.py +14 -0
denoisers/demucs.py +20 -13
losses.py +9 -0
main.py +8 -10
EDA.ipynb → notebooks/EDA.ipynb +0 -0
optimizers.py +10 -0
evaluation.py → testing/evaluation.py +1 -1
metrics.py → testing/metrics.py +0 -0
train.py +53 -50

conf/config.yaml ADDED Viewed

	@@ -0,0 +1,15 @@

+defaults:
+  - dataset: valentini
+  - model: demucs
+  - loss: mse
+  - optimizer: sgd
+dataloader:
+  max_seconds: 2
+  sample_rate: 16000
+  batch_size: 12
+augmentations:
+  - random_crop

conf/dataset/valentini.yaml ADDED Viewed

	@@ -0,0 +1,3 @@


1	+
2	+ name: valentini
3	+ path: /media/public/dataset/denoising/DS_10283_2791/

conf/loss/mse.yaml ADDED Viewed

	@@ -0,0 +1 @@


1	+ name: mse

conf/model/demucs.yaml ADDED Viewed

	@@ -0,0 +1,18 @@

+demucs:
+ H: 64
+ encoder:
+   conv1:
+     kernel_size: 8
+     stride: 2
+   conv2:
+     kernel_size: 1
+     stride: 1
+ decoder:
+   conv1:
+     kernel_size: 1
+     stride: 1
+   conv2:
+     kernel_size: 8
+     stride: 2

conf/optimizer/adam.yaml ADDED Viewed

File without changes

conf/optimizer/sgd.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+sgd:
+  lr: 0.001
+  momentum: 0.9

datasets.py CHANGED Viewed

@@ -5,7 +5,7 @@ from utils import load_wav
 class Valentini(Dataset):
-    def __init__(self, dataset_path='/media/public/datasets/denoising/DS_10283_2791/', transform=None,
                  valid=False):
         clean_path = Path(dataset_path) / 'clean_trainset_56spk_wav'
         noisy_path = Path(dataset_path) / 'noisy_trainset_56spk_wav'
@@ -37,3 +37,10 @@ class Valentini(Dataset):
             torch.manual_seed(random_seed)
             clean_wav = self.transform(clean_wav)
         return noisy_wav, clean_wav

 class Valentini(Dataset):
+    def __init__(self, dataset_path='/media/public/dataset/denoising/DS_10283_2791/', transform=None,
                  valid=False):
         clean_path = Path(dataset_path) / 'clean_trainset_56spk_wav'
         noisy_path = Path(dataset_path) / 'noisy_trainset_56spk_wav'
             torch.manual_seed(random_seed)
             clean_wav = self.transform(clean_wav)
         return noisy_wav, clean_wav
+DATASETS_POOL = {
+    'valentini': Valentini
+}

denoisers/__init__.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from denoisers.demucs import Demucs
+from denoisers.SpectralGating import SpectralGating
+MODEL_POOL = {
+    'demucs': Demucs,
+    'baseline': SpectralGating
+}
+def get_model(model_config):
+    name, params = list(model_config.items())[0]
+    return MODEL_POOL[name](params)

denoisers/demucs.py CHANGED Viewed

@@ -1,15 +1,19 @@
 import torch
 from torch.nn.functional import pad
 class Encoder(torch.nn.Module):
-    def __init__(self, in_channels, out_channels):
         super(Encoder, self).__init__()
         self.conv1 = torch.nn.Conv1d(in_channels=in_channels, out_channels=out_channels,
-                                     kernel_size=8, stride=2)
         self.relu1 = torch.nn.ReLU()
         self.conv2 = torch.nn.Conv1d(in_channels=out_channels, out_channels=2 * out_channels,
-                                     kernel_size=1, stride=1)
         self.glu = torch.nn.GLU(dim=-2)
     def forward(self, x):
@@ -21,14 +25,16 @@ class Encoder(torch.nn.Module):
 class Decoder(torch.nn.Module):
-    def __init__(self, in_channels, out_channels):
         super(Decoder, self).__init__()
         self.conv1 = torch.nn.Conv1d(in_channels=in_channels, out_channels=2 * in_channels,
-                                     kernel_size=1, stride=1)
         self.glu = torch.nn.GLU(dim=-2)
         self.conv2 = torch.nn.ConvTranspose1d(in_channels=in_channels, out_channels=out_channels,
-                                              kernel_size=8, stride=2)
         self.relu = torch.nn.ReLU()
     def forward(self, x):
@@ -38,20 +44,21 @@ class Decoder(torch.nn.Module):
 class Demucs(torch.nn.Module):
-    def __init__(self, H):
         super(Demucs, self).__init__()
-        self.encoder1 = Encoder(in_channels=1, out_channels=H)
-        self.encoder2 = Encoder(in_channels=H, out_channels=2*H)
-        self.encoder3 = Encoder(in_channels=2*H, out_channels=4*H)
         self.lstm = torch.nn.LSTM(
                                   input_size=4*H,
                                   hidden_size=4*H, num_layers=2, batch_first=True)
-        self.decoder1 = Decoder(in_channels=4*H, out_channels=2*H)
-        self.decoder2 = Decoder(in_channels=2*H, out_channels=H)
-        self.decoder3 = Decoder(in_channels=H, out_channels=1)
     def forward(self, x):
         out1 = self.encoder1(x)

 import torch
 from torch.nn.functional import pad
 class Encoder(torch.nn.Module):
+    def __init__(self, in_channels, out_channels, cfg):
         super(Encoder, self).__init__()
         self.conv1 = torch.nn.Conv1d(in_channels=in_channels, out_channels=out_channels,
+                                     kernel_size=cfg['conv1']['kernel_size'],
+                                     stride=cfg['conv1']['stride'])
         self.relu1 = torch.nn.ReLU()
         self.conv2 = torch.nn.Conv1d(in_channels=out_channels, out_channels=2 * out_channels,
+                                     kernel_size=cfg['conv2']['kernel_size'],
+                                     stride=cfg['conv2']['stride'])
         self.glu = torch.nn.GLU(dim=-2)
     def forward(self, x):
 class Decoder(torch.nn.Module):
+    def __init__(self, in_channels, out_channels, cfg):
         super(Decoder, self).__init__()
         self.conv1 = torch.nn.Conv1d(in_channels=in_channels, out_channels=2 * in_channels,
+                                     kernel_size=cfg['conv1']['kernel_size'],
+                                     stride=cfg['conv1']['stride'])
         self.glu = torch.nn.GLU(dim=-2)
         self.conv2 = torch.nn.ConvTranspose1d(in_channels=in_channels, out_channels=out_channels,
+                                              kernel_size=cfg['conv2']['kernel_size'],
+                                              stride=cfg['conv2']['kernel_size'])
         self.relu = torch.nn.ReLU()
     def forward(self, x):
 class Demucs(torch.nn.Module):
+    def __init__(self, cfg):
         super(Demucs, self).__init__()
+        H = cfg['H']
+        self.encoder1 = Encoder(in_channels=1, out_channels=H, cfg=cfg['encoder'])
+        self.encoder2 = Encoder(in_channels=H, out_channels=2*H, cfg=cfg['encoder'])
+        self.encoder3 = Encoder(in_channels=2*H, out_channels=4*H, cfg=cfg['encoder'])
         self.lstm = torch.nn.LSTM(
                                   input_size=4*H,
                                   hidden_size=4*H, num_layers=2, batch_first=True)
+        self.decoder1 = Decoder(in_channels=4*H, out_channels=2*H, cfg=cfg['decoder'])
+        self.decoder2 = Decoder(in_channels=2*H, out_channels=H, cfg=cfg['decoder'])
+        self.decoder3 = Decoder(in_channels=H, out_channels=1, cfg=cfg['decoder'])
     def forward(self, x):
         out1 = self.encoder1(x)

losses.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+LOSSES = {
+    'mse': torch.nn.MSELoss()
+}
+def get_loss(loss_config):
+    return LOSSES[loss_config['name']]

main.py CHANGED Viewed

@@ -1,15 +1,13 @@
-import torch
-print(torch.__version__)
-from torchaudio.utils import download_asset
-def print_hi(name):
-    # Use a breakpoint in the code line below to debug your script.
-    print(f'Hi, {name}')  # Press Ctrl+F8 to toggle the breakpoint.
-# Press the green button in the gutter to run the script.
 if __name__ == '__main__':
-    print_hi('PyCharm')
-# See PyCharm help at https://www.jetbrains.com/help/pycharm/

+import hydra
+from omegaconf import DictConfig, OmegaConf
+from train import train
+@hydra.main(version_base=None, config_path="conf", config_name="config")
+def main(cfg: DictConfig):
+    print(OmegaConf.to_yaml(cfg))
+    train(cfg)
 if __name__ == '__main__':
+    main()

EDA.ipynb → notebooks/EDA.ipynb RENAMED Viewed

File without changes

optimizers.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import torch
+OPTIMIZERS_POOL = {
+    'sgd': torch.optim.SGD,
+}
+def get_optimizer(model_params, optimizer_config):
+    name, params = list(optimizer_config.items())[0]
+    optimizer = OPTIMIZERS_POOL[name](model_params, **params)
+    return optimizer

evaluation.py → testing/evaluation.py RENAMED Viewed

@@ -45,7 +45,7 @@ def evaluate_on_dataset(model_name, dataset_path, dataset_type):
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(prog='Program to evaluate denoising')
     parser.add_argument('--dataset_path', type=str,
-                        default='/media/public/datasets/denoising/DS_10283_2791/',
                         help='Path to dataset folder')
     parser.add_argument('--dataset_type', type=str, required=True,
                         choices=['valentini'])

 if __name__ == '__main__':
     parser = argparse.ArgumentParser(prog='Program to evaluate denoising')
     parser.add_argument('--dataset_path', type=str,
+                        default='/media/public/dataset/denoising/DS_10283_2791/',
                         help='Path to dataset folder')
     parser.add_argument('--dataset_type', type=str, required=True,
                         choices=['valentini'])

metrics.py → testing/metrics.py RENAMED Viewed

File without changes

train.py CHANGED Viewed

@@ -3,66 +3,46 @@ from torch.utils.tensorboard import SummaryWriter
 import torch
 from torch.nn import Sequential
 from torch.utils.data import DataLoader
-from datasets import Valentini
 from datetime import datetime
 from torchvision.transforms import RandomCrop
 from utils import load_wav
 from denoisers.demucs import Demucs
 from pathlib import Path
-os.environ['CUDA_VISIBLE_DEVICES'] = "1"
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-model = Demucs(H=64).to(device)
-DATASET_PATH = Path('/media/public/datasets/denoising/DS_10283_2791/')
-VALID_WAVS = {'hard': 'p257_171.wav',
-              'medium': 'p232_071.wav',
-              'easy': 'p232_284.wav'}
-MAX_SECONDS = 3.2
-SAMPLE_RATE = 16000
-transform = Sequential(RandomCrop((1, int(MAX_SECONDS * SAMPLE_RATE)), pad_if_needed=True))
-training_loader = DataLoader(Valentini(valid=False, transform=transform), batch_size=12, shuffle=True)
-validation_loader = DataLoader(Valentini(valid=True, transform=transform), batch_size=12, shuffle=True)
-optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
-loss_fn = torch.nn.MSELoss()
-def train_one_epoch(epoch_index, tb_writer):
-    running_loss = 0.
-    last_loss = 0.
-    for i, data in enumerate(training_loader):
-        inputs, labels = data
-        inputs, labels = inputs.to(device), labels.to(device)
-        optimizer.zero_grad()
-        outputs = model(inputs)
-        loss = loss_fn(outputs, labels)
-        loss.backward()
-        optimizer.step()
-        running_loss += loss.item()
-        if i % 1000 == 999:
-            last_loss = running_loss / 100  # loss per batch
-            print('  batch {} loss: {}'.format(i + 1, last_loss))
-            tb_x = epoch_index * len(training_loader) + i + 1
-            tb_writer.add_scalar('Loss/train', last_loss, tb_x)
-            running_loss = 0.
-    return last_loss
-def train():
     timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
     writer = SummaryWriter('runs/denoising_trainer_{}'.format(timestamp))
     epoch_number = 0
@@ -78,11 +58,34 @@ def train():
     for epoch in range(EPOCHS):
         print('EPOCH {}:'.format(epoch_number + 1))
-        # Make sure gradient tracking is on, and do a pass over the data
         model.train(True)
-        avg_loss = train_one_epoch(epoch_number, writer)
-        # We don't need gradients on to do reporting
         model.train(False)
         running_vloss = 0.0

 import torch
 from torch.nn import Sequential
 from torch.utils.data import DataLoader
 from datetime import datetime
 from torchvision.transforms import RandomCrop
 from utils import load_wav
 from denoisers.demucs import Demucs
 from pathlib import Path
+from omegaconf import DictConfig
+from optimizers import OPTIMIZERS_POOL
+from losses import LOSSES
+from datasets import DATASETS_POOL
+from denoisers import get_model
+from optimizers import get_optimizer
+from losses import get_loss
+os.environ['CUDA_VISIBLE_DEVICES'] = "1"
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+#
+#
+# DATASET_PATH = Path('/media/public/dataset/denoising/DS_10283_2791/')
+# VALID_WAVS = {'hard': 'p257_171.wav',
+#               'medium': 'p232_071.wav',
+#               'easy': 'p232_284.wav'}
+# MAX_SECONDS = 2
+# SAMPLE_RATE = 16000
+#
+# transform = Sequential(RandomCrop((1, int(MAX_SECONDS * SAMPLE_RATE)), pad_if_needed=True))
+#
+# training_loader = DataLoader(Valentini(valid=False, transform=transform), batch_size=12, shuffle=True)
+# validation_loader = DataLoader(Valentini(valid=True, transform=transform), batch_size=12, shuffle=True)
+def train(cfg: DictConfig):
     timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+    model = get_model(cfg['model'])
+    optimizer = get_optimizer(model.parameters(), cfg['optimizer'])
+    loss_fn = get_loss(cfg['loss'])
     writer = SummaryWriter('runs/denoising_trainer_{}'.format(timestamp))
     epoch_number = 0
     for epoch in range(EPOCHS):
         print('EPOCH {}:'.format(epoch_number + 1))
         model.train(True)
+        running_loss = 0.
+        last_loss = 0.
+        for i, data in enumerate(training_loader):
+            inputs, labels = data
+            inputs, labels = inputs.to(device), labels.to(device)
+            optimizer.zero_grad()
+            outputs = model(inputs)
+            loss = loss_fn(outputs, labels)
+            loss.backward()
+            optimizer.step()
+            running_loss += loss.item()
+            if i % 1000 == 999:
+                last_loss = running_loss / 1000  # loss per batch
+                print('  batch {} loss: {}'.format(i + 1, last_loss))
+                tb_x = epoch_number * len(training_loader) + i + 1
+                writer.add_scalar('Loss/train', last_loss, tb_x)
+                running_loss = 0.
+        avg_loss = last_loss
         model.train(False)
         running_vloss = 0.0