Spaces:

BorisovMaksim
/

denoising

Runtime error

App Files Files Community

bWX1204813 commited on Apr 26, 2023

Commit

091b1e0

0 Parent(s):

initial

Browse files

Files changed (17) hide show

.gitignore +3 -0
EDA.ipynb +0 -0
README.md +3 -0
__pycache__/denoise.cpython-38.pyc +0 -0
__pycache__/metrics.cpython-38.pyc +0 -0
__pycache__/utils.cpython-38.pyc +0 -0
app.py +43 -0
denoisers/.ipynb_checkpoints/SpectralGating-checkpoint.py +26 -0
denoisers/.ipynb_checkpoints/demucs-checkpoint.py +67 -0
denoisers/SpectralGating.py +26 -0
denoisers/__pycache__/SpectralGating.cpython-38.pyc +0 -0
denoisers/demucs.py +67 -0
evaluation.py +62 -0
main.py +15 -0
metrics.py +21 -0
tutorial.ipynb +0 -0
utils.py +53 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+.idea/*
+.ipynb_checkpoints/*
+nohup.out

EDA.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

README.md ADDED Viewed

	@@ -0,0 +1,3 @@

+| Attempt | #1    | #2    |
+| :---:   | :---: | :---: |
+| Seconds | 301   | 283   |

__pycache__/denoise.cpython-38.pyc ADDED Viewed

Binary file (3.81 kB). View file

__pycache__/metrics.cpython-38.pyc ADDED Viewed

Binary file (867 Bytes). View file

__pycache__/utils.cpython-38.pyc ADDED Viewed

Binary file (1.86 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import sys
+import os
+from re import M
+import uuid
+import shutil
+import ffmpeg
+import logging
+import gradio as gr
+from denoisers.SpectralGating import SpectralGating
+model = SpectralGating()
+def denoising_transform(audio):
+    src_path = "cache_wav/source/{}.wav".format(str(uuid.uuid4()))
+    tgt_path = "cache_wav/target/{}.wav".format(str(uuid.uuid4()))
+    # os.rename(audio.name, src_path)
+    (ffmpeg.input(audio)
+            .output(src_path, acodec='pcm_s16le', ac=1, ar=22050)
+            .run()
+    )
+    model.predict(src_path, tgt_path)
+    return tgt_path
+inputs = gr.inputs.Audio(label="Source Audio", source="microphone", type='filepath')
+outputs = gr.outputs.Audio(label="Target Audio", type='filepath')
+title = "Chinese-to-English Direct Speech-to-Speech Translation (BETA)"
+#"""
+gr.Interface(
+    denoising_transform, inputs, outputs, title=title,
+    allow_flagging='never',
+).launch(
+    server_name='localhost',
+    server_port=7871,
+    #ssl_keyfile='example.key',
+    #ssl_certfile="example.crt",
+)

denoisers/.ipynb_checkpoints/SpectralGating-checkpoint.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import noisereduce as nr
+import torch
+import torchaudio
+class SpectralGating(torch.nn.Module):
+    """example: wav_noisy = '/media/public/datasets/denoising/DS_10283_2791/noisy_trainset_56spk_wav/p312_002.wav' """
+    def __init__(self, rate=16000):
+        super(SpectralGating, self).__init__()
+        self.rate = rate
+    def forward(self, wav):
+        reduced_noise = torch.Tensor(nr.reduce_noise(y=wav, sr=self.rate))
+        return reduced_noise
+    def predict(self, wav_path, out_path):
+        data, rate = torchaudio.load(wav_path)
+        reduced_noise = torch.Tensor(nr.reduce_noise(y=data, sr=rate))
+        torchaudio.save(out_path, reduced_noise, rate)
+        return reduced_noise

denoisers/.ipynb_checkpoints/demucs-checkpoint.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import torch
+class Encoder(torch.nn.Module):
+    def __init__(self, in_channels, out_channels,
+                 kernel_size_1=8, stride_1=4,
+                 kernel_size_2=1, stride_2=1):
+        super(Encoder, self).__init__()
+        self.conv1 = torch.nn.Conv1d(in_channels=in_channels, out_channels=out_channels,
+                                     kernel_size=kernel_size_1, stride=stride_1)
+        self.relu1 = torch.nn.ReLU()
+        self.conv2 = torch.nn.Conv1d(in_channels=out_channels, out_channels=2 * out_channels,
+                                     kernel_size=kernel_size_2, stride=stride_2)
+        self.glu = torch.nn.GLU()
+    def forward(self, x):
+        x = self.relu1(self.conv1(x))
+        x = self.glu(self.conv2(x))
+        return x
+class Decoder(torch.nn.Module):
+    def __init__(self, in_channels, out_channels,
+                 kernel_size_1=3, stride_1=1,
+                 kernel_size_2=8, stride_2=4):
+        super(Decoder, self).__init__()
+        self.conv1 = torch.nn.Conv1d(in_channels=in_channels, out_channels=2 * in_channels,
+                                     kernel_size=kernel_size_1, stride=stride_1)
+        self.glu = torch.nn.GLU()
+        self.conv2 = torch.nn.ConvTranspose1d(in_channels=in_channels, out_channels=out_channels,
+                                              kernel_size=kernel_size_2, stride=stride_2)
+        self.relu = torch.nn.ReLU()
+    def forward(self, x):
+        x = self.glu(self.conv1(x))
+        x = self.relu(self.conv2(x))
+        return x
+class Demucs(torch.nn.Module):
+    def __init__(self):
+        super(Demucs, self).__init__()
+        self.encoder1 = Encoder(in_channels=1, out_channels=64)
+        self.encoder2 = Encoder(in_channels=64, out_channels=128)
+        self.encoder3 = Encoder(in_channels=128, out_channels=256)
+        self.lstm = torch.nn.LSTM(input_size=256, hidden_size=256, num_layers=2)
+        self.decoder1 = Decoder(in_channels=256, out_channels=128)
+        self.decoder2 = Decoder(in_channels=128, out_channels=64)
+        self.decoder3 = Decoder(in_channels=64, out_channels=1)
+    def forward(self, x):
+        out1 = self.encoder1(x)
+        out2 = self.encoder2(out1)
+        out3 = self.encoder3(out2)
+        x = self.lstm(out3)
+        x = self.decoder1(x + out3)
+        x = self.decoder2(x + out2)
+        x = self.decoder3(x + out1)
+        return x

denoisers/SpectralGating.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import noisereduce as nr
+import torch
+import torchaudio
+class SpectralGating(torch.nn.Module):
+    """example: wav_noisy = '/media/public/datasets/denoising/DS_10283_2791/noisy_trainset_56spk_wav/p312_002.wav' """
+    def __init__(self, rate=16000):
+        super(SpectralGating, self).__init__()
+        self.rate = rate
+    def forward(self, wav):
+        reduced_noise = torch.Tensor(nr.reduce_noise(y=wav, sr=self.rate))
+        return reduced_noise
+    def predict(self, wav_path, out_path):
+        data, rate = torchaudio.load(wav_path)
+        reduced_noise = torch.Tensor(nr.reduce_noise(y=data, sr=rate))
+        torchaudio.save(out_path, reduced_noise, rate)
+        return reduced_noise

denoisers/__pycache__/SpectralGating.cpython-38.pyc ADDED Viewed

Binary file (1.2 kB). View file

denoisers/demucs.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import torch
+class Encoder(torch.nn.Module):
+    def __init__(self, in_channels, out_channels,
+                 kernel_size_1=8, stride_1=4,
+                 kernel_size_2=1, stride_2=1):
+        super(Encoder, self).__init__()
+        self.conv1 = torch.nn.Conv1d(in_channels=in_channels, out_channels=out_channels,
+                                     kernel_size=kernel_size_1, stride=stride_1)
+        self.relu1 = torch.nn.ReLU()
+        self.conv2 = torch.nn.Conv1d(in_channels=out_channels, out_channels=2 * out_channels,
+                                     kernel_size=kernel_size_2, stride=stride_2)
+        self.glu = torch.nn.GLU()
+    def forward(self, x):
+        x = self.relu1(self.conv1(x))
+        x = self.glu(self.conv2(x))
+        return x
+class Decoder(torch.nn.Module):
+    def __init__(self, in_channels, out_channels,
+                 kernel_size_1=3, stride_1=1,
+                 kernel_size_2=8, stride_2=4):
+        super(Decoder, self).__init__()
+        self.conv1 = torch.nn.Conv1d(in_channels=in_channels, out_channels=2 * in_channels,
+                                     kernel_size=kernel_size_1, stride=stride_1)
+        self.glu = torch.nn.GLU()
+        self.conv2 = torch.nn.ConvTranspose1d(in_channels=in_channels, out_channels=out_channels,
+                                              kernel_size=kernel_size_2, stride=stride_2)
+        self.relu = torch.nn.ReLU()
+    def forward(self, x):
+        x = self.glu(self.conv1(x))
+        x = self.relu(self.conv2(x))
+        return x
+class Demucs(torch.nn.Module):
+    def __init__(self):
+        super(Demucs, self).__init__()
+        self.encoder1 = Encoder(in_channels=1, out_channels=64)
+        self.encoder2 = Encoder(in_channels=64, out_channels=128)
+        self.encoder3 = Encoder(in_channels=128, out_channels=256)
+        self.lstm = torch.nn.LSTM(input_size=256, hidden_size=256, num_layers=2)
+        self.decoder1 = Decoder(in_channels=256, out_channels=128)
+        self.decoder2 = Decoder(in_channels=128, out_channels=64)
+        self.decoder3 = Decoder(in_channels=64, out_channels=1)
+    def forward(self, x):
+        out1 = self.encoder1(x)
+        out2 = self.encoder2(out1)
+        out3 = self.encoder3(out2)
+        x = self.lstm(out3)
+        x = self.decoder1(x + out3)
+        x = self.decoder2(x + out2)
+        x = self.decoder3(x + out1)
+        return x

evaluation.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import argparse
+from tqdm import tqdm
+from utils import load_wav, collect_valentini_paths
+from metrics import Metrics
+from denoisers.SpectralGating import SpectralGating
+PARSERS = {
+    'valentini': collect_valentini_paths
+}
+MODELS = {
+    'baseline': SpectralGating
+}
+def evaluate_on_dataset(model_name, dataset_path, dataset_type, ideal):
+    model = MODELS[model_name]()
+    parser = PARSERS[dataset_type]
+    clean_wavs, noisy_wavs = parser(dataset_path)
+    metrics = Metrics()
+    mean_scores = {'PESQ': 0, 'STOI': 0}
+    for clean_path, noisy_path in tqdm(zip(clean_wavs, noisy_wavs), total=len(clean_wavs)):
+        clean_wav = load_wav(clean_path)
+        noisy_wav = load_wav(noisy_path)
+        denoised_wav = model(noisy_wav)
+        if ideal:
+            scores = metrics.calculate(noisy_wav, clean_wav)
+        else:
+            scores = metrics.calculate(noisy_wav, denoised_wav)
+        mean_scores['PESQ'] += scores['PESQ']
+        mean_scores['STOI'] += scores['STOI']
+    mean_scores['PESQ'] = mean_scores['PESQ'].numpy() / len(clean_wavs)
+    mean_scores['STOI'] = mean_scores['STOI'].numpy() / len(clean_wavs)
+    return mean_scores
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(prog='Program to evaluate denoising')
+    parser.add_argument('--dataset_path', type=str,
+                        default='/media/public/datasets/denoising/DS_10283_2791/',
+                        help='Path to dataset folder')
+    parser.add_argument('--dataset_type', type=str, required=True,
+                        choices=['valentini'])
+    parser.add_argument('--model_name', type=str,
+                        choices=['baseline'])
+    parser.add_argument('--ideal', type=bool, default=False,
+                        help="Evaluate metrics on testing data with ideal denoising")
+    args = parser.parse_args()
+    mean_scores = evaluate_on_dataset(model_name=args.model_name,
+                        dataset_path=args.dataset_path,
+                        dataset_type=args.dataset_type,
+                        ideal=args.ideal)
+    print(f"Metrics on {args.dataset_type} dataset with "
+          f"{args.model_name if args.model_name is not None else 'ideal denoising'} = {mean_scores}")

main.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import torch
+print(torch.__version__)
+from torchaudio.utils import download_asset
+def print_hi(name):
+    # Use a breakpoint in the code line below to debug your script.
+    print(f'Hi, {name}')  # Press Ctrl+F8 to toggle the breakpoint.
+# Press the green button in the gutter to run the script.
+if __name__ == '__main__':
+    print_hi('PyCharm')
+# See PyCharm help at https://www.jetbrains.com/help/pycharm/

metrics.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from torchmetrics.audio.pesq import PerceptualEvaluationSpeechQuality
+from torchmetrics.audio.stoi import ShortTimeObjectiveIntelligibility
+import torch
+import torchaudio
+class Metrics:
+    def __init__(self, rate=16000):
+        self.nb_pesq = PerceptualEvaluationSpeechQuality(rate, 'wb')
+        self.stoi = ShortTimeObjectiveIntelligibility(rate, False)
+    def calculate(self, preds, target):
+        return {'PESQ': self.nb_pesq(preds, target),
+                'STOI': self.stoi(preds, target)}

tutorial.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

utils.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import torchaudio
+import torch
+import matplotlib.pyplot as plt
+from pathlib import Path
+def collect_valentini_paths(dataset_path):
+    clean_path = Path(dataset_path) / 'clean_testset_wav'
+    noisy_path = Path(dataset_path) / 'noisy_testset_wav'
+    clean_wavs = list(clean_path.glob("*"))
+    noisy_wavs = list(noisy_path.glob("*"))
+    return clean_wavs, noisy_wavs
+def load_wav(path):
+    wav, org_sr = torchaudio.load(path)
+    wav = torchaudio.functional.resample(wav, orig_freq=org_sr, new_freq=16000)
+    return wav
+def plot_spectrogram(stft, title="Spectrogram", xlim=None):
+    magnitude = stft.abs()
+    spectrogram = 20 * torch.log10(magnitude + 1e-8).numpy()
+    figure, axis = plt.subplots(1, 1)
+    img = axis.imshow(spectrogram, cmap="viridis", vmin=-100, vmax=0, origin="lower", aspect="auto")
+    figure.suptitle(title)
+    plt.colorbar(img, ax=axis)
+    plt.show()
+def plot_mask(mask, title="Mask", xlim=None):
+    mask = mask.numpy()
+    figure, axis = plt.subplots(1, 1)
+    img = axis.imshow(mask, cmap="viridis", origin="lower", aspect="auto")
+    figure.suptitle(title)
+    plt.colorbar(img, ax=axis)
+    plt.show()
+def generate_mixture(waveform_clean, waveform_noise, target_snr):
+    power_clean_signal = waveform_clean.pow(2).mean()
+    power_noise_signal = waveform_noise.pow(2).mean()
+    current_snr = 10 * torch.log10(power_clean_signal / power_noise_signal)
+    waveform_noise *= 10 ** (-(target_snr - current_snr) / 20)
+    return waveform_clean + waveform_noise