Spaces:

BorisovMaksim
/

denoising

Runtime error

App Files Files Community

BorisovMaksim commited on May 8, 2023

Commit

95d8ea8

1 Parent(s): 1160793

add try except for calculating pesq scores

Browse files

Files changed (7) hide show

checkpoing_saver.py +10 -3
conf/config.yaml +4 -1
datasets/minimal.py +1 -2
losses.py +9 -6
main.py +2 -1
testing/metrics.py +14 -3
train.py +19 -13

checkpoing_saver.py CHANGED Viewed

@@ -19,14 +19,21 @@ class CheckpointSaver:
         self.best_metric_val = np.Inf if decreasing else -np.Inf
         self.run_name = run_name
-    def __call__(self, model, epoch, metric_val):
-        model_path = os.path.join(self.dirpath, self.run_name, model.__class__.__name__ + f'_epoch{epoch}.pt')
         save = metric_val < self.best_metric_val if self.decreasing else metric_val > self.best_metric_val
         if save:
             logging.info(
                 f"Current metric value better than {metric_val} better than best {self.best_metric_val}, saving model at {model_path}, & logging model weights to W&B.")
             self.best_metric_val = metric_val
-            torch.save(model.state_dict(), model_path)
             self.log_artifact(f'model-ckpt-epoch-{epoch}.pt', model_path, metric_val)
             self.top_model_paths.append({'path': model_path, 'score': metric_val})
             self.top_model_paths = sorted(self.top_model_paths, key=lambda o: o['score'], reverse=not self.decreasing)

         self.best_metric_val = np.Inf if decreasing else -np.Inf
         self.run_name = run_name
+    def __call__(self, model, epoch, metric_val, optimizer, loss):
+        model_path = os.path.join(self.dirpath, model.__class__.__name__ + f'_{self.run_name}_epoch{epoch}.pt')
         save = metric_val < self.best_metric_val if self.decreasing else metric_val > self.best_metric_val
         if save:
             logging.info(
                 f"Current metric value better than {metric_val} better than best {self.best_metric_val}, saving model at {model_path}, & logging model weights to W&B.")
             self.best_metric_val = metric_val
+            torch.save(
+                {  # Save our checkpoint loc
+                    'epoch': epoch,
+                    'model_state_dict': model.state_dict(),
+                    'optimizer_state_dict': optimizer.state_dict(),
+                    'loss': loss,
+                }, model_path)
             self.log_artifact(f'model-ckpt-epoch-{epoch}.pt', model_path, metric_val)
             self.top_model_paths.append({'path': model_path, 'score': metric_val})
             self.top_model_paths = sorted(self.top_model_paths, key=lambda o: o['score'], reverse=not self.decreasing)

conf/config.yaml CHANGED Viewed

@@ -20,10 +20,13 @@ validation:
 wandb:
   project: denoising
   log_interval: 100
   api_key: local-e23d01ece807cb31e69b2cf4137e4998e4b9856f
   host: http://localhost:8080/
   notes: "Experiment note"
   tags:
-    - baseline

 wandb:
+  run_name: default
   project: denoising
   log_interval: 100
   api_key: local-e23d01ece807cb31e69b2cf4137e4998e4b9856f
   host: http://localhost:8080/
   notes: "Experiment note"
   tags:
+    - baseline
+gpu: 0

datasets/minimal.py CHANGED Viewed

@@ -18,7 +18,6 @@ class Minimal(Dataset):
         return len(self.wavs)
     def __getitem__(self, idx):
-        wav, rate = torchaudio.load(self.wavs[idx])
         wav = self.resampler(wav)
-        wav = torch.reshape(wav, (1, 1, -1))
         return wav, self.target_rate

         return len(self.wavs)
     def __getitem__(self, idx):
+        wav, rate = torchaudio.load(Path(self.dataset_path) / self.wavs[idx])
         wav = self.resampler(wav)
         return wav, self.target_rate

losses.py CHANGED Viewed

@@ -12,6 +12,8 @@
 import torch
 import torch.nn.functional as F
 """STFT-based Loss modules."""
@@ -26,7 +28,8 @@ def stft(x, fft_size, hop_size, win_length, window):
     Returns:
         Tensor: Magnitude spectrogram (B, #frames, fft_size // 2 + 1).
     """
-    x_stft = torch.stft(x, fft_size, hop_size, win_length, window)
     real = x_stft[..., 0]
     imag = x_stft[..., 1]
@@ -154,7 +157,7 @@ class L1_Multi_STFT(torch.nn.Module):
         """Initialize STFT loss module."""
         super(L1_Multi_STFT, self).__init__()
         self.multi_STFT_loss = MultiResolutionSTFTLoss()
-        self.l1_loss =  torch.nn.L1Loss()
     def forward(self, x, y):
         """Calculate forward propagation.
@@ -173,10 +176,10 @@ class L1_Multi_STFT(torch.nn.Module):
 LOSSES = {
     'mse': torch.nn.MSELoss(),
     'L1': torch.nn.L1Loss(),
-    'Multi_STFT': MultiResolutionSTFTLoss,
-    'L1_Multi_STFT': L1_Multi_STFT
 }
-def get_loss(loss_config):
-    return LOSSES[loss_config['name']]

 import torch
 import torch.nn.functional as F
+# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 """STFT-based Loss modules."""
     Returns:
         Tensor: Magnitude spectrogram (B, #frames, fft_size // 2 + 1).
     """
+    x_stft = torch.stft(x[:, 0, :], fft_size, hop_size, win_length, window, return_complex=True)
+    x_stft = torch.view_as_real(x_stft)
     real = x_stft[..., 0]
     imag = x_stft[..., 1]
         """Initialize STFT loss module."""
         super(L1_Multi_STFT, self).__init__()
         self.multi_STFT_loss = MultiResolutionSTFTLoss()
+        self.l1_loss = torch.nn.L1Loss()
     def forward(self, x, y):
         """Calculate forward propagation.
 LOSSES = {
     'mse': torch.nn.MSELoss(),
     'L1': torch.nn.L1Loss(),
+    'Multi_STFT': MultiResolutionSTFTLoss(),
+    'L1_Multi_STFT': L1_Multi_STFT()
 }
+def get_loss(loss_config, device):
+    return LOSSES[loss_config['name']].to(device)

main.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import hydra
-from omegaconf import DictConfig, OmegaConf
 from train import train
 @hydra.main(version_base=None, config_path="conf", config_name="config")
 def main(cfg: DictConfig):
     train(cfg)

 import hydra
+from omegaconf import DictConfig
 from train import train
 @hydra.main(version_base=None, config_path="conf", config_name="config")
 def main(cfg: DictConfig):
     train(cfg)

testing/metrics.py CHANGED Viewed

@@ -1,10 +1,10 @@
 from torchmetrics.audio.pesq import PerceptualEvaluationSpeechQuality
 from torchmetrics.audio.stoi import ShortTimeObjectiveIntelligibility
 import torch
 import torchaudio
 from torchmetrics import SignalNoiseRatio
 class Metrics:
     def __init__(self, rate=16000):
         self.nb_pesq = PerceptualEvaluationSpeechQuality(rate, 'wb')
@@ -12,7 +12,18 @@ class Metrics:
         self.snr = SignalNoiseRatio()
     def calculate(self, denoised, clean):
-        return {'PESQ': self.nb_pesq(denoised, clean).item(),
-                'STOI': self.stoi(denoised, clean).item()}

+import pesq
 from torchmetrics.audio.pesq import PerceptualEvaluationSpeechQuality
 from torchmetrics.audio.stoi import ShortTimeObjectiveIntelligibility
 import torch
 import torchaudio
 from torchmetrics import SignalNoiseRatio
 class Metrics:
     def __init__(self, rate=16000):
         self.nb_pesq = PerceptualEvaluationSpeechQuality(rate, 'wb')
         self.snr = SignalNoiseRatio()
     def calculate(self, denoised, clean):
+        pesq_scores, stoi_scores = 0, 0
+        for denoised_wav, clean_wav in zip(denoised, clean):
+            try:
+                pesq_scores += self.nb_pesq(denoised_wav, clean_wav).item()
+                stoi_scores += self.stoi(denoised_wav, clean_wav).item()
+            except pesq.NoUtterancesError as e:
+                print(e)
+            except ValueError as e:
+                print(e)
+        return {'PESQ': pesq_scores,
+                'STOI': stoi_scores}

train.py CHANGED Viewed

@@ -13,24 +13,24 @@ from datasets import get_datasets
 from testing.metrics import Metrics
 from datasets.minimal import Minimal
-os.environ['CUDA_VISIBLE_DEVICES'] = "1"
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 def train(cfg: DictConfig):
     wandb.login(key=cfg['wandb']['api_key'], host=cfg['wandb']['host'])
     wandb.init(project=cfg['wandb']['project'],
                notes=cfg['wandb']['notes'],
                tags=cfg['wandb']['tags'],
                config=omegaconf.OmegaConf.to_container(
         cfg, resolve=True, throw_on_missing=True))
     checkpoint_saver = CheckpointSaver(dirpath=cfg['training']['model_save_path'], run_name=wandb.run.name)
     metrics = Metrics(rate=cfg['dataloader']['sample_rate'])
     model = get_model(cfg['model']).to(device)
     optimizer = get_optimizer(model.parameters(), cfg['optimizer'])
-    loss_fn = get_loss(cfg['loss'])
     train_dataset, valid_dataset = get_datasets(cfg)
     minimal_dataset = Minimal(cfg)
@@ -70,12 +70,13 @@ def train(cfg: DictConfig):
                 running_stoi += running_metrics['STOI']
                 if phase == 'train' and i % cfg['wandb']['log_interval'] == 0:
-                    wandb.log({"train_loss": running_loss / (i + 1),
-                               "train_pesq": running_pesq / (i + 1),
-                               "train_stoi": running_stoi / (i + 1)})
-            epoch_loss = running_loss / len(dataloaders[phase])
-            eposh_pesq = running_pesq / len(dataloaders[phase])
-            eposh_stoi = running_stoi / len(dataloaders[phase])
             wandb.log({f"{phase}_loss": epoch_loss,
                        f"{phase}_pesq": eposh_pesq,
@@ -83,10 +84,15 @@ def train(cfg: DictConfig):
             if phase == 'val':
                 for i, (wav, rate) in enumerate(dataloaders['minimal']):
-                    prediction = model(wav)
                     wandb.log({
                         f"{i}_example": wandb.Audio(
-                            prediction.cpu()[0][0],
                             sample_rate=rate)})
-                checkpoint_saver(model, epoch, metric_val=eposh_pesq)

 from testing.metrics import Metrics
 from datasets.minimal import Minimal
 def train(cfg: DictConfig):
+    device = torch.device(f'cuda:{cfg.gpu}' if torch.cuda.is_available() else 'cpu')
     wandb.login(key=cfg['wandb']['api_key'], host=cfg['wandb']['host'])
     wandb.init(project=cfg['wandb']['project'],
                notes=cfg['wandb']['notes'],
                tags=cfg['wandb']['tags'],
                config=omegaconf.OmegaConf.to_container(
         cfg, resolve=True, throw_on_missing=True))
+    wandb.run.name = cfg['wandb']['run_name']
     checkpoint_saver = CheckpointSaver(dirpath=cfg['training']['model_save_path'], run_name=wandb.run.name)
     metrics = Metrics(rate=cfg['dataloader']['sample_rate'])
     model = get_model(cfg['model']).to(device)
     optimizer = get_optimizer(model.parameters(), cfg['optimizer'])
+    loss_fn = get_loss(cfg['loss'], device)
     train_dataset, valid_dataset = get_datasets(cfg)
     minimal_dataset = Minimal(cfg)
                 running_stoi += running_metrics['STOI']
                 if phase == 'train' and i % cfg['wandb']['log_interval'] == 0:
+                    wandb.log({"train_loss": running_loss / (i + 1) / inputs.size(0),
+                               "train_pesq": running_pesq / (i + 1) / inputs.size(0),
+                               "train_stoi": running_stoi / (i + 1) / inputs.size(0)})
+            epoch_loss = running_loss / len(dataloaders[phase].dataset)
+            eposh_pesq = running_pesq / len(dataloaders[phase].dataset)
+            eposh_stoi = running_stoi / len(dataloaders[phase].dataset)
             wandb.log({f"{phase}_loss": epoch_loss,
                        f"{phase}_pesq": eposh_pesq,
             if phase == 'val':
                 for i, (wav, rate) in enumerate(dataloaders['minimal']):
+                    prediction = model(wav.to(device))
                     wandb.log({
                         f"{i}_example": wandb.Audio(
+                            prediction.detach().cpu().numpy()[0][0],
                             sample_rate=rate)})
+                checkpoint_saver(model, epoch, metric_val=eposh_pesq,
+                                 optimizer=optimizer, loss=epoch_loss)
+if __name__ == "__main__":
+    pass