BorisovMaksim commited on
Commit
3f8f152
·
1 Parent(s): bd0a813

add hydra configs

Browse files
conf/config.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - dataset: valentini
3
+ - model: demucs
4
+ - loss: mse
5
+ - optimizer: sgd
6
+
7
+
8
+ dataloader:
9
+ max_seconds: 2
10
+ sample_rate: 16000
11
+ batch_size: 12
12
+
13
+
14
+ augmentations:
15
+ - random_crop
conf/dataset/valentini.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+
2
+ name: valentini
3
+ path: /media/public/dataset/denoising/DS_10283_2791/
conf/loss/mse.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ name: mse
conf/model/demucs.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ demucs:
2
+ H: 64
3
+ encoder:
4
+ conv1:
5
+ kernel_size: 8
6
+ stride: 2
7
+ conv2:
8
+ kernel_size: 1
9
+ stride: 1
10
+
11
+ decoder:
12
+ conv1:
13
+ kernel_size: 1
14
+ stride: 1
15
+ conv2:
16
+ kernel_size: 8
17
+ stride: 2
18
+
conf/optimizer/adam.yaml ADDED
File without changes
conf/optimizer/sgd.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ sgd:
2
+ lr: 0.001
3
+ momentum: 0.9
datasets.py CHANGED
@@ -5,7 +5,7 @@ from utils import load_wav
5
 
6
 
7
  class Valentini(Dataset):
8
- def __init__(self, dataset_path='/media/public/datasets/denoising/DS_10283_2791/', transform=None,
9
  valid=False):
10
  clean_path = Path(dataset_path) / 'clean_trainset_56spk_wav'
11
  noisy_path = Path(dataset_path) / 'noisy_trainset_56spk_wav'
@@ -37,3 +37,10 @@ class Valentini(Dataset):
37
  torch.manual_seed(random_seed)
38
  clean_wav = self.transform(clean_wav)
39
  return noisy_wav, clean_wav
 
 
 
 
 
 
 
 
5
 
6
 
7
  class Valentini(Dataset):
8
+ def __init__(self, dataset_path='/media/public/dataset/denoising/DS_10283_2791/', transform=None,
9
  valid=False):
10
  clean_path = Path(dataset_path) / 'clean_trainset_56spk_wav'
11
  noisy_path = Path(dataset_path) / 'noisy_trainset_56spk_wav'
 
37
  torch.manual_seed(random_seed)
38
  clean_wav = self.transform(clean_wav)
39
  return noisy_wav, clean_wav
40
+
41
+
42
+ DATASETS_POOL = {
43
+ 'valentini': Valentini
44
+ }
45
+
46
+
denoisers/__init__.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from denoisers.demucs import Demucs
2
+ from denoisers.SpectralGating import SpectralGating
3
+
4
+
5
+ MODEL_POOL = {
6
+ 'demucs': Demucs,
7
+ 'baseline': SpectralGating
8
+ }
9
+
10
+
11
+ def get_model(model_config):
12
+ name, params = list(model_config.items())[0]
13
+ return MODEL_POOL[name](params)
14
+
denoisers/demucs.py CHANGED
@@ -1,15 +1,19 @@
1
  import torch
2
  from torch.nn.functional import pad
3
 
 
 
4
  class Encoder(torch.nn.Module):
5
- def __init__(self, in_channels, out_channels):
6
  super(Encoder, self).__init__()
7
 
8
  self.conv1 = torch.nn.Conv1d(in_channels=in_channels, out_channels=out_channels,
9
- kernel_size=8, stride=2)
 
10
  self.relu1 = torch.nn.ReLU()
11
  self.conv2 = torch.nn.Conv1d(in_channels=out_channels, out_channels=2 * out_channels,
12
- kernel_size=1, stride=1)
 
13
  self.glu = torch.nn.GLU(dim=-2)
14
 
15
  def forward(self, x):
@@ -21,14 +25,16 @@ class Encoder(torch.nn.Module):
21
 
22
 
23
  class Decoder(torch.nn.Module):
24
- def __init__(self, in_channels, out_channels):
25
  super(Decoder, self).__init__()
26
 
27
  self.conv1 = torch.nn.Conv1d(in_channels=in_channels, out_channels=2 * in_channels,
28
- kernel_size=1, stride=1)
 
29
  self.glu = torch.nn.GLU(dim=-2)
30
  self.conv2 = torch.nn.ConvTranspose1d(in_channels=in_channels, out_channels=out_channels,
31
- kernel_size=8, stride=2)
 
32
  self.relu = torch.nn.ReLU()
33
 
34
  def forward(self, x):
@@ -38,20 +44,21 @@ class Decoder(torch.nn.Module):
38
 
39
 
40
  class Demucs(torch.nn.Module):
41
- def __init__(self, H):
42
  super(Demucs, self).__init__()
 
43
 
44
- self.encoder1 = Encoder(in_channels=1, out_channels=H)
45
- self.encoder2 = Encoder(in_channels=H, out_channels=2*H)
46
- self.encoder3 = Encoder(in_channels=2*H, out_channels=4*H)
47
 
48
  self.lstm = torch.nn.LSTM(
49
  input_size=4*H,
50
  hidden_size=4*H, num_layers=2, batch_first=True)
51
 
52
- self.decoder1 = Decoder(in_channels=4*H, out_channels=2*H)
53
- self.decoder2 = Decoder(in_channels=2*H, out_channels=H)
54
- self.decoder3 = Decoder(in_channels=H, out_channels=1)
55
 
56
  def forward(self, x):
57
  out1 = self.encoder1(x)
 
1
  import torch
2
  from torch.nn.functional import pad
3
 
4
+
5
+
6
  class Encoder(torch.nn.Module):
7
+ def __init__(self, in_channels, out_channels, cfg):
8
  super(Encoder, self).__init__()
9
 
10
  self.conv1 = torch.nn.Conv1d(in_channels=in_channels, out_channels=out_channels,
11
+ kernel_size=cfg['conv1']['kernel_size'],
12
+ stride=cfg['conv1']['stride'])
13
  self.relu1 = torch.nn.ReLU()
14
  self.conv2 = torch.nn.Conv1d(in_channels=out_channels, out_channels=2 * out_channels,
15
+ kernel_size=cfg['conv2']['kernel_size'],
16
+ stride=cfg['conv2']['stride'])
17
  self.glu = torch.nn.GLU(dim=-2)
18
 
19
  def forward(self, x):
 
25
 
26
 
27
  class Decoder(torch.nn.Module):
28
+ def __init__(self, in_channels, out_channels, cfg):
29
  super(Decoder, self).__init__()
30
 
31
  self.conv1 = torch.nn.Conv1d(in_channels=in_channels, out_channels=2 * in_channels,
32
+ kernel_size=cfg['conv1']['kernel_size'],
33
+ stride=cfg['conv1']['stride'])
34
  self.glu = torch.nn.GLU(dim=-2)
35
  self.conv2 = torch.nn.ConvTranspose1d(in_channels=in_channels, out_channels=out_channels,
36
+ kernel_size=cfg['conv2']['kernel_size'],
37
+ stride=cfg['conv2']['kernel_size'])
38
  self.relu = torch.nn.ReLU()
39
 
40
  def forward(self, x):
 
44
 
45
 
46
  class Demucs(torch.nn.Module):
47
+ def __init__(self, cfg):
48
  super(Demucs, self).__init__()
49
+ H = cfg['H']
50
 
51
+ self.encoder1 = Encoder(in_channels=1, out_channels=H, cfg=cfg['encoder'])
52
+ self.encoder2 = Encoder(in_channels=H, out_channels=2*H, cfg=cfg['encoder'])
53
+ self.encoder3 = Encoder(in_channels=2*H, out_channels=4*H, cfg=cfg['encoder'])
54
 
55
  self.lstm = torch.nn.LSTM(
56
  input_size=4*H,
57
  hidden_size=4*H, num_layers=2, batch_first=True)
58
 
59
+ self.decoder1 = Decoder(in_channels=4*H, out_channels=2*H, cfg=cfg['decoder'])
60
+ self.decoder2 = Decoder(in_channels=2*H, out_channels=H, cfg=cfg['decoder'])
61
+ self.decoder3 = Decoder(in_channels=H, out_channels=1, cfg=cfg['decoder'])
62
 
63
  def forward(self, x):
64
  out1 = self.encoder1(x)
losses.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ LOSSES = {
4
+ 'mse': torch.nn.MSELoss()
5
+ }
6
+
7
+
8
+ def get_loss(loss_config):
9
+ return LOSSES[loss_config['name']]
main.py CHANGED
@@ -1,15 +1,13 @@
1
- import torch
2
- print(torch.__version__)
3
- from torchaudio.utils import download_asset
4
 
5
 
6
- def print_hi(name):
7
- # Use a breakpoint in the code line below to debug your script.
8
- print(f'Hi, {name}') # Press Ctrl+F8 to toggle the breakpoint.
 
9
 
10
 
11
- # Press the green button in the gutter to run the script.
12
  if __name__ == '__main__':
13
- print_hi('PyCharm')
14
-
15
- # See PyCharm help at https://www.jetbrains.com/help/pycharm/
 
1
+ import hydra
2
+ from omegaconf import DictConfig, OmegaConf
3
+ from train import train
4
 
5
 
6
+ @hydra.main(version_base=None, config_path="conf", config_name="config")
7
+ def main(cfg: DictConfig):
8
+ print(OmegaConf.to_yaml(cfg))
9
+ train(cfg)
10
 
11
 
 
12
  if __name__ == '__main__':
13
+ main()
 
 
EDA.ipynb → notebooks/EDA.ipynb RENAMED
File without changes
optimizers.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ OPTIMIZERS_POOL = {
4
+ 'sgd': torch.optim.SGD,
5
+ }
6
+
7
+ def get_optimizer(model_params, optimizer_config):
8
+ name, params = list(optimizer_config.items())[0]
9
+ optimizer = OPTIMIZERS_POOL[name](model_params, **params)
10
+ return optimizer
evaluation.py → testing/evaluation.py RENAMED
@@ -45,7 +45,7 @@ def evaluate_on_dataset(model_name, dataset_path, dataset_type):
45
  if __name__ == '__main__':
46
  parser = argparse.ArgumentParser(prog='Program to evaluate denoising')
47
  parser.add_argument('--dataset_path', type=str,
48
- default='/media/public/datasets/denoising/DS_10283_2791/',
49
  help='Path to dataset folder')
50
  parser.add_argument('--dataset_type', type=str, required=True,
51
  choices=['valentini'])
 
45
  if __name__ == '__main__':
46
  parser = argparse.ArgumentParser(prog='Program to evaluate denoising')
47
  parser.add_argument('--dataset_path', type=str,
48
+ default='/media/public/dataset/denoising/DS_10283_2791/',
49
  help='Path to dataset folder')
50
  parser.add_argument('--dataset_type', type=str, required=True,
51
  choices=['valentini'])
metrics.py → testing/metrics.py RENAMED
File without changes
train.py CHANGED
@@ -3,66 +3,46 @@ from torch.utils.tensorboard import SummaryWriter
3
  import torch
4
  from torch.nn import Sequential
5
  from torch.utils.data import DataLoader
6
- from datasets import Valentini
7
  from datetime import datetime
8
  from torchvision.transforms import RandomCrop
9
  from utils import load_wav
10
  from denoisers.demucs import Demucs
11
  from pathlib import Path
 
12
 
13
- os.environ['CUDA_VISIBLE_DEVICES'] = "1"
14
-
15
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
16
-
17
- model = Demucs(H=64).to(device)
18
-
19
- DATASET_PATH = Path('/media/public/datasets/denoising/DS_10283_2791/')
20
- VALID_WAVS = {'hard': 'p257_171.wav',
21
- 'medium': 'p232_071.wav',
22
- 'easy': 'p232_284.wav'}
23
- MAX_SECONDS = 3.2
24
- SAMPLE_RATE = 16000
25
-
26
- transform = Sequential(RandomCrop((1, int(MAX_SECONDS * SAMPLE_RATE)), pad_if_needed=True))
27
-
28
- training_loader = DataLoader(Valentini(valid=False, transform=transform), batch_size=12, shuffle=True)
29
- validation_loader = DataLoader(Valentini(valid=True, transform=transform), batch_size=12, shuffle=True)
30
-
31
- optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
32
- loss_fn = torch.nn.MSELoss()
33
-
34
-
35
- def train_one_epoch(epoch_index, tb_writer):
36
- running_loss = 0.
37
- last_loss = 0.
38
-
39
- for i, data in enumerate(training_loader):
40
- inputs, labels = data
41
- inputs, labels = inputs.to(device), labels.to(device)
42
-
43
- optimizer.zero_grad()
44
-
45
- outputs = model(inputs)
46
 
47
- loss = loss_fn(outputs, labels)
48
- loss.backward()
49
 
50
- optimizer.step()
51
-
52
- running_loss += loss.item()
53
- if i % 1000 == 999:
54
- last_loss = running_loss / 100 # loss per batch
55
- print(' batch {} loss: {}'.format(i + 1, last_loss))
56
- tb_x = epoch_index * len(training_loader) + i + 1
57
- tb_writer.add_scalar('Loss/train', last_loss, tb_x)
58
- running_loss = 0.
59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
- return last_loss
62
 
63
 
64
- def train():
65
  timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
 
 
 
 
66
  writer = SummaryWriter('runs/denoising_trainer_{}'.format(timestamp))
67
  epoch_number = 0
68
 
@@ -78,11 +58,34 @@ def train():
78
  for epoch in range(EPOCHS):
79
  print('EPOCH {}:'.format(epoch_number + 1))
80
 
81
- # Make sure gradient tracking is on, and do a pass over the data
82
  model.train(True)
83
- avg_loss = train_one_epoch(epoch_number, writer)
84
 
85
- # We don't need gradients on to do reporting
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  model.train(False)
87
 
88
  running_vloss = 0.0
 
3
  import torch
4
  from torch.nn import Sequential
5
  from torch.utils.data import DataLoader
 
6
  from datetime import datetime
7
  from torchvision.transforms import RandomCrop
8
  from utils import load_wav
9
  from denoisers.demucs import Demucs
10
  from pathlib import Path
11
+ from omegaconf import DictConfig
12
 
13
+ from optimizers import OPTIMIZERS_POOL
14
+ from losses import LOSSES
15
+ from datasets import DATASETS_POOL
16
+ from denoisers import get_model
17
+ from optimizers import get_optimizer
18
+ from losses import get_loss
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
 
 
20
 
21
+ os.environ['CUDA_VISIBLE_DEVICES'] = "1"
22
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
 
 
 
 
 
 
23
 
24
+ #
25
+ #
26
+ # DATASET_PATH = Path('/media/public/dataset/denoising/DS_10283_2791/')
27
+ # VALID_WAVS = {'hard': 'p257_171.wav',
28
+ # 'medium': 'p232_071.wav',
29
+ # 'easy': 'p232_284.wav'}
30
+ # MAX_SECONDS = 2
31
+ # SAMPLE_RATE = 16000
32
+ #
33
+ # transform = Sequential(RandomCrop((1, int(MAX_SECONDS * SAMPLE_RATE)), pad_if_needed=True))
34
+ #
35
+ # training_loader = DataLoader(Valentini(valid=False, transform=transform), batch_size=12, shuffle=True)
36
+ # validation_loader = DataLoader(Valentini(valid=True, transform=transform), batch_size=12, shuffle=True)
37
 
 
38
 
39
 
40
+ def train(cfg: DictConfig):
41
  timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
42
+ model = get_model(cfg['model'])
43
+ optimizer = get_optimizer(model.parameters(), cfg['optimizer'])
44
+ loss_fn = get_loss(cfg['loss'])
45
+
46
  writer = SummaryWriter('runs/denoising_trainer_{}'.format(timestamp))
47
  epoch_number = 0
48
 
 
58
  for epoch in range(EPOCHS):
59
  print('EPOCH {}:'.format(epoch_number + 1))
60
 
 
61
  model.train(True)
 
62
 
63
+ running_loss = 0.
64
+ last_loss = 0.
65
+
66
+ for i, data in enumerate(training_loader):
67
+ inputs, labels = data
68
+ inputs, labels = inputs.to(device), labels.to(device)
69
+
70
+ optimizer.zero_grad()
71
+
72
+ outputs = model(inputs)
73
+
74
+ loss = loss_fn(outputs, labels)
75
+ loss.backward()
76
+
77
+ optimizer.step()
78
+
79
+ running_loss += loss.item()
80
+ if i % 1000 == 999:
81
+ last_loss = running_loss / 1000 # loss per batch
82
+ print(' batch {} loss: {}'.format(i + 1, last_loss))
83
+ tb_x = epoch_number * len(training_loader) + i + 1
84
+ writer.add_scalar('Loss/train', last_loss, tb_x)
85
+ running_loss = 0.
86
+
87
+ avg_loss = last_loss
88
+
89
  model.train(False)
90
 
91
  running_vloss = 0.0