Spaces:
Runtime error
Runtime error
Commit
·
3f8f152
1
Parent(s):
bd0a813
add hydra configs
Browse files- conf/config.yaml +15 -0
- conf/dataset/valentini.yaml +3 -0
- conf/loss/mse.yaml +1 -0
- conf/model/demucs.yaml +18 -0
- conf/optimizer/adam.yaml +0 -0
- conf/optimizer/sgd.yaml +3 -0
- datasets.py +8 -1
- denoisers/__init__.py +14 -0
- denoisers/demucs.py +20 -13
- losses.py +9 -0
- main.py +8 -10
- EDA.ipynb → notebooks/EDA.ipynb +0 -0
- optimizers.py +10 -0
- evaluation.py → testing/evaluation.py +1 -1
- metrics.py → testing/metrics.py +0 -0
- train.py +53 -50
conf/config.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
defaults:
|
2 |
+
- dataset: valentini
|
3 |
+
- model: demucs
|
4 |
+
- loss: mse
|
5 |
+
- optimizer: sgd
|
6 |
+
|
7 |
+
|
8 |
+
dataloader:
|
9 |
+
max_seconds: 2
|
10 |
+
sample_rate: 16000
|
11 |
+
batch_size: 12
|
12 |
+
|
13 |
+
|
14 |
+
augmentations:
|
15 |
+
- random_crop
|
conf/dataset/valentini.yaml
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
name: valentini
|
3 |
+
path: /media/public/dataset/denoising/DS_10283_2791/
|
conf/loss/mse.yaml
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
name: mse
|
conf/model/demucs.yaml
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
demucs:
|
2 |
+
H: 64
|
3 |
+
encoder:
|
4 |
+
conv1:
|
5 |
+
kernel_size: 8
|
6 |
+
stride: 2
|
7 |
+
conv2:
|
8 |
+
kernel_size: 1
|
9 |
+
stride: 1
|
10 |
+
|
11 |
+
decoder:
|
12 |
+
conv1:
|
13 |
+
kernel_size: 1
|
14 |
+
stride: 1
|
15 |
+
conv2:
|
16 |
+
kernel_size: 8
|
17 |
+
stride: 2
|
18 |
+
|
conf/optimizer/adam.yaml
ADDED
File without changes
|
conf/optimizer/sgd.yaml
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
sgd:
|
2 |
+
lr: 0.001
|
3 |
+
momentum: 0.9
|
datasets.py
CHANGED
@@ -5,7 +5,7 @@ from utils import load_wav
|
|
5 |
|
6 |
|
7 |
class Valentini(Dataset):
|
8 |
-
def __init__(self, dataset_path='/media/public/
|
9 |
valid=False):
|
10 |
clean_path = Path(dataset_path) / 'clean_trainset_56spk_wav'
|
11 |
noisy_path = Path(dataset_path) / 'noisy_trainset_56spk_wav'
|
@@ -37,3 +37,10 @@ class Valentini(Dataset):
|
|
37 |
torch.manual_seed(random_seed)
|
38 |
clean_wav = self.transform(clean_wav)
|
39 |
return noisy_wav, clean_wav
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
|
7 |
class Valentini(Dataset):
|
8 |
+
def __init__(self, dataset_path='/media/public/dataset/denoising/DS_10283_2791/', transform=None,
|
9 |
valid=False):
|
10 |
clean_path = Path(dataset_path) / 'clean_trainset_56spk_wav'
|
11 |
noisy_path = Path(dataset_path) / 'noisy_trainset_56spk_wav'
|
|
|
37 |
torch.manual_seed(random_seed)
|
38 |
clean_wav = self.transform(clean_wav)
|
39 |
return noisy_wav, clean_wav
|
40 |
+
|
41 |
+
|
42 |
+
DATASETS_POOL = {
|
43 |
+
'valentini': Valentini
|
44 |
+
}
|
45 |
+
|
46 |
+
|
denoisers/__init__.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from denoisers.demucs import Demucs
|
2 |
+
from denoisers.SpectralGating import SpectralGating
|
3 |
+
|
4 |
+
|
5 |
+
MODEL_POOL = {
|
6 |
+
'demucs': Demucs,
|
7 |
+
'baseline': SpectralGating
|
8 |
+
}
|
9 |
+
|
10 |
+
|
11 |
+
def get_model(model_config):
|
12 |
+
name, params = list(model_config.items())[0]
|
13 |
+
return MODEL_POOL[name](params)
|
14 |
+
|
denoisers/demucs.py
CHANGED
@@ -1,15 +1,19 @@
|
|
1 |
import torch
|
2 |
from torch.nn.functional import pad
|
3 |
|
|
|
|
|
4 |
class Encoder(torch.nn.Module):
|
5 |
-
def __init__(self, in_channels, out_channels):
|
6 |
super(Encoder, self).__init__()
|
7 |
|
8 |
self.conv1 = torch.nn.Conv1d(in_channels=in_channels, out_channels=out_channels,
|
9 |
-
kernel_size=
|
|
|
10 |
self.relu1 = torch.nn.ReLU()
|
11 |
self.conv2 = torch.nn.Conv1d(in_channels=out_channels, out_channels=2 * out_channels,
|
12 |
-
kernel_size=
|
|
|
13 |
self.glu = torch.nn.GLU(dim=-2)
|
14 |
|
15 |
def forward(self, x):
|
@@ -21,14 +25,16 @@ class Encoder(torch.nn.Module):
|
|
21 |
|
22 |
|
23 |
class Decoder(torch.nn.Module):
|
24 |
-
def __init__(self, in_channels, out_channels):
|
25 |
super(Decoder, self).__init__()
|
26 |
|
27 |
self.conv1 = torch.nn.Conv1d(in_channels=in_channels, out_channels=2 * in_channels,
|
28 |
-
kernel_size=
|
|
|
29 |
self.glu = torch.nn.GLU(dim=-2)
|
30 |
self.conv2 = torch.nn.ConvTranspose1d(in_channels=in_channels, out_channels=out_channels,
|
31 |
-
kernel_size=
|
|
|
32 |
self.relu = torch.nn.ReLU()
|
33 |
|
34 |
def forward(self, x):
|
@@ -38,20 +44,21 @@ class Decoder(torch.nn.Module):
|
|
38 |
|
39 |
|
40 |
class Demucs(torch.nn.Module):
|
41 |
-
def __init__(self,
|
42 |
super(Demucs, self).__init__()
|
|
|
43 |
|
44 |
-
self.encoder1 = Encoder(in_channels=1, out_channels=H)
|
45 |
-
self.encoder2 = Encoder(in_channels=H, out_channels=2*H)
|
46 |
-
self.encoder3 = Encoder(in_channels=2*H, out_channels=4*H)
|
47 |
|
48 |
self.lstm = torch.nn.LSTM(
|
49 |
input_size=4*H,
|
50 |
hidden_size=4*H, num_layers=2, batch_first=True)
|
51 |
|
52 |
-
self.decoder1 = Decoder(in_channels=4*H, out_channels=2*H)
|
53 |
-
self.decoder2 = Decoder(in_channels=2*H, out_channels=H)
|
54 |
-
self.decoder3 = Decoder(in_channels=H, out_channels=1)
|
55 |
|
56 |
def forward(self, x):
|
57 |
out1 = self.encoder1(x)
|
|
|
1 |
import torch
|
2 |
from torch.nn.functional import pad
|
3 |
|
4 |
+
|
5 |
+
|
6 |
class Encoder(torch.nn.Module):
|
7 |
+
def __init__(self, in_channels, out_channels, cfg):
|
8 |
super(Encoder, self).__init__()
|
9 |
|
10 |
self.conv1 = torch.nn.Conv1d(in_channels=in_channels, out_channels=out_channels,
|
11 |
+
kernel_size=cfg['conv1']['kernel_size'],
|
12 |
+
stride=cfg['conv1']['stride'])
|
13 |
self.relu1 = torch.nn.ReLU()
|
14 |
self.conv2 = torch.nn.Conv1d(in_channels=out_channels, out_channels=2 * out_channels,
|
15 |
+
kernel_size=cfg['conv2']['kernel_size'],
|
16 |
+
stride=cfg['conv2']['stride'])
|
17 |
self.glu = torch.nn.GLU(dim=-2)
|
18 |
|
19 |
def forward(self, x):
|
|
|
25 |
|
26 |
|
27 |
class Decoder(torch.nn.Module):
|
28 |
+
def __init__(self, in_channels, out_channels, cfg):
|
29 |
super(Decoder, self).__init__()
|
30 |
|
31 |
self.conv1 = torch.nn.Conv1d(in_channels=in_channels, out_channels=2 * in_channels,
|
32 |
+
kernel_size=cfg['conv1']['kernel_size'],
|
33 |
+
stride=cfg['conv1']['stride'])
|
34 |
self.glu = torch.nn.GLU(dim=-2)
|
35 |
self.conv2 = torch.nn.ConvTranspose1d(in_channels=in_channels, out_channels=out_channels,
|
36 |
+
kernel_size=cfg['conv2']['kernel_size'],
|
37 |
+
stride=cfg['conv2']['kernel_size'])
|
38 |
self.relu = torch.nn.ReLU()
|
39 |
|
40 |
def forward(self, x):
|
|
|
44 |
|
45 |
|
46 |
class Demucs(torch.nn.Module):
|
47 |
+
def __init__(self, cfg):
|
48 |
super(Demucs, self).__init__()
|
49 |
+
H = cfg['H']
|
50 |
|
51 |
+
self.encoder1 = Encoder(in_channels=1, out_channels=H, cfg=cfg['encoder'])
|
52 |
+
self.encoder2 = Encoder(in_channels=H, out_channels=2*H, cfg=cfg['encoder'])
|
53 |
+
self.encoder3 = Encoder(in_channels=2*H, out_channels=4*H, cfg=cfg['encoder'])
|
54 |
|
55 |
self.lstm = torch.nn.LSTM(
|
56 |
input_size=4*H,
|
57 |
hidden_size=4*H, num_layers=2, batch_first=True)
|
58 |
|
59 |
+
self.decoder1 = Decoder(in_channels=4*H, out_channels=2*H, cfg=cfg['decoder'])
|
60 |
+
self.decoder2 = Decoder(in_channels=2*H, out_channels=H, cfg=cfg['decoder'])
|
61 |
+
self.decoder3 = Decoder(in_channels=H, out_channels=1, cfg=cfg['decoder'])
|
62 |
|
63 |
def forward(self, x):
|
64 |
out1 = self.encoder1(x)
|
losses.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
LOSSES = {
|
4 |
+
'mse': torch.nn.MSELoss()
|
5 |
+
}
|
6 |
+
|
7 |
+
|
8 |
+
def get_loss(loss_config):
|
9 |
+
return LOSSES[loss_config['name']]
|
main.py
CHANGED
@@ -1,15 +1,13 @@
|
|
1 |
-
import
|
2 |
-
|
3 |
-
from
|
4 |
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
print(
|
|
|
9 |
|
10 |
|
11 |
-
# Press the green button in the gutter to run the script.
|
12 |
if __name__ == '__main__':
|
13 |
-
|
14 |
-
|
15 |
-
# See PyCharm help at https://www.jetbrains.com/help/pycharm/
|
|
|
1 |
+
import hydra
|
2 |
+
from omegaconf import DictConfig, OmegaConf
|
3 |
+
from train import train
|
4 |
|
5 |
|
6 |
+
@hydra.main(version_base=None, config_path="conf", config_name="config")
|
7 |
+
def main(cfg: DictConfig):
|
8 |
+
print(OmegaConf.to_yaml(cfg))
|
9 |
+
train(cfg)
|
10 |
|
11 |
|
|
|
12 |
if __name__ == '__main__':
|
13 |
+
main()
|
|
|
|
EDA.ipynb → notebooks/EDA.ipynb
RENAMED
File without changes
|
optimizers.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
OPTIMIZERS_POOL = {
|
4 |
+
'sgd': torch.optim.SGD,
|
5 |
+
}
|
6 |
+
|
7 |
+
def get_optimizer(model_params, optimizer_config):
|
8 |
+
name, params = list(optimizer_config.items())[0]
|
9 |
+
optimizer = OPTIMIZERS_POOL[name](model_params, **params)
|
10 |
+
return optimizer
|
evaluation.py → testing/evaluation.py
RENAMED
@@ -45,7 +45,7 @@ def evaluate_on_dataset(model_name, dataset_path, dataset_type):
|
|
45 |
if __name__ == '__main__':
|
46 |
parser = argparse.ArgumentParser(prog='Program to evaluate denoising')
|
47 |
parser.add_argument('--dataset_path', type=str,
|
48 |
-
default='/media/public/
|
49 |
help='Path to dataset folder')
|
50 |
parser.add_argument('--dataset_type', type=str, required=True,
|
51 |
choices=['valentini'])
|
|
|
45 |
if __name__ == '__main__':
|
46 |
parser = argparse.ArgumentParser(prog='Program to evaluate denoising')
|
47 |
parser.add_argument('--dataset_path', type=str,
|
48 |
+
default='/media/public/dataset/denoising/DS_10283_2791/',
|
49 |
help='Path to dataset folder')
|
50 |
parser.add_argument('--dataset_type', type=str, required=True,
|
51 |
choices=['valentini'])
|
metrics.py → testing/metrics.py
RENAMED
File without changes
|
train.py
CHANGED
@@ -3,66 +3,46 @@ from torch.utils.tensorboard import SummaryWriter
|
|
3 |
import torch
|
4 |
from torch.nn import Sequential
|
5 |
from torch.utils.data import DataLoader
|
6 |
-
from datasets import Valentini
|
7 |
from datetime import datetime
|
8 |
from torchvision.transforms import RandomCrop
|
9 |
from utils import load_wav
|
10 |
from denoisers.demucs import Demucs
|
11 |
from pathlib import Path
|
|
|
12 |
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
DATASET_PATH = Path('/media/public/datasets/denoising/DS_10283_2791/')
|
20 |
-
VALID_WAVS = {'hard': 'p257_171.wav',
|
21 |
-
'medium': 'p232_071.wav',
|
22 |
-
'easy': 'p232_284.wav'}
|
23 |
-
MAX_SECONDS = 3.2
|
24 |
-
SAMPLE_RATE = 16000
|
25 |
-
|
26 |
-
transform = Sequential(RandomCrop((1, int(MAX_SECONDS * SAMPLE_RATE)), pad_if_needed=True))
|
27 |
-
|
28 |
-
training_loader = DataLoader(Valentini(valid=False, transform=transform), batch_size=12, shuffle=True)
|
29 |
-
validation_loader = DataLoader(Valentini(valid=True, transform=transform), batch_size=12, shuffle=True)
|
30 |
-
|
31 |
-
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
|
32 |
-
loss_fn = torch.nn.MSELoss()
|
33 |
-
|
34 |
-
|
35 |
-
def train_one_epoch(epoch_index, tb_writer):
|
36 |
-
running_loss = 0.
|
37 |
-
last_loss = 0.
|
38 |
-
|
39 |
-
for i, data in enumerate(training_loader):
|
40 |
-
inputs, labels = data
|
41 |
-
inputs, labels = inputs.to(device), labels.to(device)
|
42 |
-
|
43 |
-
optimizer.zero_grad()
|
44 |
-
|
45 |
-
outputs = model(inputs)
|
46 |
|
47 |
-
loss = loss_fn(outputs, labels)
|
48 |
-
loss.backward()
|
49 |
|
50 |
-
|
51 |
-
|
52 |
-
running_loss += loss.item()
|
53 |
-
if i % 1000 == 999:
|
54 |
-
last_loss = running_loss / 100 # loss per batch
|
55 |
-
print(' batch {} loss: {}'.format(i + 1, last_loss))
|
56 |
-
tb_x = epoch_index * len(training_loader) + i + 1
|
57 |
-
tb_writer.add_scalar('Loss/train', last_loss, tb_x)
|
58 |
-
running_loss = 0.
|
59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
-
return last_loss
|
62 |
|
63 |
|
64 |
-
def train():
|
65 |
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
|
|
|
|
|
|
|
66 |
writer = SummaryWriter('runs/denoising_trainer_{}'.format(timestamp))
|
67 |
epoch_number = 0
|
68 |
|
@@ -78,11 +58,34 @@ def train():
|
|
78 |
for epoch in range(EPOCHS):
|
79 |
print('EPOCH {}:'.format(epoch_number + 1))
|
80 |
|
81 |
-
# Make sure gradient tracking is on, and do a pass over the data
|
82 |
model.train(True)
|
83 |
-
avg_loss = train_one_epoch(epoch_number, writer)
|
84 |
|
85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
model.train(False)
|
87 |
|
88 |
running_vloss = 0.0
|
|
|
3 |
import torch
|
4 |
from torch.nn import Sequential
|
5 |
from torch.utils.data import DataLoader
|
|
|
6 |
from datetime import datetime
|
7 |
from torchvision.transforms import RandomCrop
|
8 |
from utils import load_wav
|
9 |
from denoisers.demucs import Demucs
|
10 |
from pathlib import Path
|
11 |
+
from omegaconf import DictConfig
|
12 |
|
13 |
+
from optimizers import OPTIMIZERS_POOL
|
14 |
+
from losses import LOSSES
|
15 |
+
from datasets import DATASETS_POOL
|
16 |
+
from denoisers import get_model
|
17 |
+
from optimizers import get_optimizer
|
18 |
+
from losses import get_loss
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
|
|
|
|
20 |
|
21 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = "1"
|
22 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
+
#
|
25 |
+
#
|
26 |
+
# DATASET_PATH = Path('/media/public/dataset/denoising/DS_10283_2791/')
|
27 |
+
# VALID_WAVS = {'hard': 'p257_171.wav',
|
28 |
+
# 'medium': 'p232_071.wav',
|
29 |
+
# 'easy': 'p232_284.wav'}
|
30 |
+
# MAX_SECONDS = 2
|
31 |
+
# SAMPLE_RATE = 16000
|
32 |
+
#
|
33 |
+
# transform = Sequential(RandomCrop((1, int(MAX_SECONDS * SAMPLE_RATE)), pad_if_needed=True))
|
34 |
+
#
|
35 |
+
# training_loader = DataLoader(Valentini(valid=False, transform=transform), batch_size=12, shuffle=True)
|
36 |
+
# validation_loader = DataLoader(Valentini(valid=True, transform=transform), batch_size=12, shuffle=True)
|
37 |
|
|
|
38 |
|
39 |
|
40 |
+
def train(cfg: DictConfig):
|
41 |
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
42 |
+
model = get_model(cfg['model'])
|
43 |
+
optimizer = get_optimizer(model.parameters(), cfg['optimizer'])
|
44 |
+
loss_fn = get_loss(cfg['loss'])
|
45 |
+
|
46 |
writer = SummaryWriter('runs/denoising_trainer_{}'.format(timestamp))
|
47 |
epoch_number = 0
|
48 |
|
|
|
58 |
for epoch in range(EPOCHS):
|
59 |
print('EPOCH {}:'.format(epoch_number + 1))
|
60 |
|
|
|
61 |
model.train(True)
|
|
|
62 |
|
63 |
+
running_loss = 0.
|
64 |
+
last_loss = 0.
|
65 |
+
|
66 |
+
for i, data in enumerate(training_loader):
|
67 |
+
inputs, labels = data
|
68 |
+
inputs, labels = inputs.to(device), labels.to(device)
|
69 |
+
|
70 |
+
optimizer.zero_grad()
|
71 |
+
|
72 |
+
outputs = model(inputs)
|
73 |
+
|
74 |
+
loss = loss_fn(outputs, labels)
|
75 |
+
loss.backward()
|
76 |
+
|
77 |
+
optimizer.step()
|
78 |
+
|
79 |
+
running_loss += loss.item()
|
80 |
+
if i % 1000 == 999:
|
81 |
+
last_loss = running_loss / 1000 # loss per batch
|
82 |
+
print(' batch {} loss: {}'.format(i + 1, last_loss))
|
83 |
+
tb_x = epoch_number * len(training_loader) + i + 1
|
84 |
+
writer.add_scalar('Loss/train', last_loss, tb_x)
|
85 |
+
running_loss = 0.
|
86 |
+
|
87 |
+
avg_loss = last_loss
|
88 |
+
|
89 |
model.train(False)
|
90 |
|
91 |
running_vloss = 0.0
|