Spaces:

dinhdat1110
/

diffusion-model

Runtime error

App Files Files Community

dinhdat1110 commited on Feb 15, 2024

Commit

9457143

verified ·

1 Parent(s): a2b808f

Upload folder using huggingface_hub

Browse files

Files changed (29) hide show

.gitignore +168 -0
Dockerfile +8 -0
LICENSE +21 -0
README.md +5 -9
app.py +53 -0
diffusion/README.md +0 -0
diffusion/__init__.py +3 -0
diffusion/dataset/__init__.py +3 -0
diffusion/dataset/celeba.py +72 -0
diffusion/dataset/cifar10.py +73 -0
diffusion/dataset/mnist.py +73 -0
diffusion/model/__init__.py +2 -0
diffusion/model/diffusion/__init__.py +4 -0
diffusion/model/diffusion/model.py +188 -0
diffusion/model/diffusion/sampling.py +82 -0
diffusion/model/diffusion/scheduler.py +20 -0
diffusion/model/diffusion/unet.py +227 -0
diffusion/model/ldm/__init__.py +1 -0
diffusion/model/ldm/model.py +5 -0
diffusion/model/ldm/tests/__init__.py +0 -0
diffusion/tests/__init__.py +0 -0
diffusion/train/__init__.py +0 -0
diffusion/train/__main__.py +164 -0
diffusion/utils/__init__.py +2 -0
diffusion/utils/callback.py +38 -0
diffusion/utils/ema.py +75 -0
poetry.lock +0 -0
pyproject.toml +22 -0
script/setup.sh +1 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,168 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+*.jpeg
+*.gz
+cifar-10-batches-py
+checkpoints
+MNIST
+*.ipynb
+data
+wandb

Dockerfile ADDED Viewed

	@@ -0,0 +1,8 @@

+FROM python:latest
+COPY . /app
+WORKDIR /app
+RUN pip install .
+CMD ["python", "-m", "diffusion.train"]

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 Võ Đình Đạt
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,12 +1,8 @@
 ---
-title: Diffusion Model
-emoji: 🦀
-colorFrom: blue
-colorTo: yellow
-sdk: gradio
-sdk_version: 4.19.0
 app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: diffusion-model
 app_file: app.py
+sdk: gradio
+sdk_version: 4.18.0
 ---
+# latent-diffusion-model
+Coming Soon!

app.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import torch
+import argparse
+import gradio as gr
+import diffusion
+from torchvision import transforms
+parser = argparse.ArgumentParser()
+parser.add_argument("--ckpt_path", type=str, default="./checkpoints/mnist.ckpt")
+parser.add_argument("--map_location", type=str, default="cpu")
+parser.add_argument("--share", action='store_true')
+args = parser.parse_args()
+if __name__ == "__main__":
+    model = diffusion.DiffusionModel.load_from_checkpoint(
+        args.ckpt_path, in_channels=1, map_location=args.map_location, num_classes=10
+    )
+    to_pil = transforms.ToPILImage()
+    def reset(image):
+        image = to_pil((torch.randn(1, 32, 32)*255).type(torch.uint8))
+        return image
+    def denoise(label):
+        labels = torch.tensor([label]).to(model.device)
+        for img in model.sampling_demo(labels=labels):
+            image = to_pil(img[0])
+            yield image
+    with gr.Blocks(theme=gr.themes.Soft(primary_hue="green")) as demo:
+        gr.Markdown("# Simple Diffusion Model")
+        gr.Markdown("## MNIST")
+        with gr.Row():
+            with gr.Column(scale=2):
+                label = gr.Dropdown(
+                    label='Label',
+                    choices=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
+                    value=0
+                )
+                with gr.Row():
+                    sample_btn = gr.Button("Sampling")
+                    reset_btn = gr.Button("Reset")
+            output = gr.Image(
+                value=to_pil((torch.randn(1, 32, 32)*255).type(torch.uint8)),
+                scale=2,
+                image_mode="L",
+                type='pil',
+            )
+            sample_btn.click(denoise, [label], outputs=output)
+            reset_btn.click(reset, [output], outputs=output)
+    demo.launch(share=args.share)

diffusion/README.md ADDED Viewed

File without changes

diffusion/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .model import *
+from .dataset import *
+from .utils import *

diffusion/dataset/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .cifar10 import *
+from .mnist import *
+from .celeba import *

diffusion/dataset/celeba.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import pytorch_lightning as pl
+import torch
+import os
+from PIL import Image
+from torch.utils.data import DataLoader, Dataset, random_split
+from torchvision import transforms
+from functools import partial
+class CelebADataset(Dataset):
+    def __init__(
+            self,
+            data_dir: str,
+    ):
+        self.list_path = os.listdir(data_dir)
+        self.data_dir = data_dir
+        self.transform = transforms.Compose(
+            [
+                transforms.Resize((64, 64)),
+                transforms.ToTensor(),
+                transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
+            ]
+        )
+    def __len__(self):
+        return len(self.list_path)
+    def __getitem__(self, index):
+        img = Image.open(os.path.join(self.data_dir, self.list_path[index]))
+        return self.transform(img)
+class CelebADataModule(pl.LightningDataModule):
+    def __init__(
+        self,
+        data_dir: str = "./",
+        batch_size: int = 32,
+        num_workers: int = 0,
+        seed: int = 42,
+        train_ratio: float = 0.99
+    ):
+        super().__init__()
+        self.data_dir = data_dir
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+        self.train_ratio = min(train_ratio, 0.99)
+        self.seed = seed
+        self.loader = partial(
+            DataLoader,
+            batch_size=self.batch_size,
+            pin_memory=True,
+            num_workers=self.num_workers,
+            persistent_workers=True
+        )
+    def setup(self, stage: str):
+        if stage == "fit":
+            dataset = CelebADataset(self.data_dir)
+            self.CelebA_train, self.CelebA_val, _ = random_split(
+                dataset=dataset,
+                lengths=[self.train_ratio, 0.01, 1 - 0.01 - self.train_ratio],
+                generator=torch.Generator().manual_seed(self.seed)
+            )
+        else:
+            pass
+    def train_dataloader(self):
+        return self.loader(dataset=self.CelebA_train)
+    def val_dataloader(self):
+        return self.loader(dataset=self.CelebA_val)

diffusion/dataset/cifar10.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import pytorch_lightning as pl
+import torch
+from torchvision.datasets import CIFAR10
+from torch.utils.data import DataLoader, random_split
+from torchvision import transforms
+from functools import partial
+class CIFAR10DataModule(pl.LightningDataModule):
+    def __init__(
+        self,
+        data_dir: str = "./",
+        batch_size: int = 32,
+        num_workers: int = 0,
+        seed: int = 42,
+        train_ratio: float = 0.99
+    ):
+        super().__init__()
+        self.data_dir = data_dir
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+        self.seed = seed
+        self.train_ratio = min(train_ratio, 0.99)
+        self.transform = transforms.Compose(
+            [
+                transforms.Resize((32, 32)),
+                transforms.ToTensor(),
+                transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
+            ]
+        )
+        self.loader = partial(
+            DataLoader,
+            batch_size=self.batch_size,
+            pin_memory=True,
+            num_workers=self.num_workers,
+            persistent_workers=True
+        )
+    def setup(self, stage: str):
+        cifar_partial = partial(
+            CIFAR10,
+            root=self.data_dir, transform=self.transform, download=True
+        )
+        if stage == "fit":
+            retrying = True
+            while retrying:
+                try:
+                    cifar_full = cifar_partial(train=True)
+                    retrying = False
+                except:
+                    pass
+            self.cifar_train, self.cifar_val, _ = random_split(
+                dataset=cifar_full,
+                lengths=[self.train_ratio, 0.01, 1 - 0.01 - self.train_ratio],
+                generator=torch.Generator().manual_seed(self.seed)
+            )
+        else:
+            retrying = True
+            while retrying:
+                try:
+                    self.cifar_test = cifar_partial(train=False)
+                    retrying = False
+                except:
+                    pass
+    def train_dataloader(self):
+        return self.loader(dataset=self.cifar_train)
+    def val_dataloader(self):
+        return self.loader(dataset=self.cifar_val)
+    def test_dataloader(self):
+        return self.loader(dataset=self.cifar_test)

diffusion/dataset/mnist.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import pytorch_lightning as pl
+import torch
+from torchvision.datasets import MNIST
+from torch.utils.data import DataLoader, random_split
+from torchvision import transforms
+from functools import partial
+class MNISTDataModule(pl.LightningDataModule):
+    def __init__(
+        self,
+        data_dir: str = "./",
+        batch_size: int = 32,
+        num_workers: int = 0,
+        seed: int = 42,
+        train_ratio: float = 0.99
+    ):
+        super().__init__()
+        self.data_dir = data_dir
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+        self.train_ratio = min(train_ratio, 0.99)
+        self.seed = seed
+        self.transform = transforms.Compose(
+            [
+                transforms.Resize((32, 32)),
+                transforms.ToTensor(),
+                transforms.Normalize(mean=(0.5), std=(0.5))
+            ]
+        )
+        self.loader = partial(
+            DataLoader,
+            batch_size=self.batch_size,
+            pin_memory=True,
+            num_workers=self.num_workers,
+            persistent_workers=True
+        )
+    def setup(self, stage: str):
+        mnist_partial = partial(
+            MNIST,
+            root=self.data_dir, transform=self.transform, download=True
+        )
+        if stage == "fit":
+            retrying = True
+            while retrying:
+                try:
+                    mnist_full = mnist_partial(train=True)
+                    retrying = False
+                except:
+                    pass
+            self.mnist_train, self.mnist_val, _ = random_split(
+                dataset=mnist_full,
+                lengths=[self.train_ratio, 0.01, 1 - 0.01 - self.train_ratio],
+                generator=torch.Generator().manual_seed(self.seed)
+            )
+        else:
+            retrying = True
+            while retrying:
+                try:
+                    self.mnist_test = mnist_partial(train=False)
+                    retrying = False
+                except:
+                    pass
+    def train_dataloader(self):
+        return self.loader(dataset=self.mnist_train)
+    def val_dataloader(self):
+        return self.loader(dataset=self.mnist_val)
+    def test_dataloader(self):
+        return self.loader(dataset=self.mnist_test)

diffusion/model/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .diffusion import *
2	+ from .ldm import *

diffusion/model/diffusion/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .unet import *
+from .model import *
+from .sampling import *
+from .scheduler import *

diffusion/model/diffusion/model.py ADDED Viewed

	@@ -0,0 +1,188 @@

+import torch
+import torch.nn as nn
+import numpy as np
+import pytorch_lightning as pl
+import diffusion
+import wandb
+from torchvision.utils import make_grid
+from torch.optim.lr_scheduler import OneCycleLR
+class DiffusionModel(pl.LightningModule):
+    def __init__(
+        self,
+        lr: float = 1e-4,
+        max_timesteps: int = 1000,
+        beta_1: float = 0.0001,
+        beta_2: float = 0.02,
+        in_channels: int = 3,
+        dim: int = 32,
+        num_classes: int | None = 10,
+        sample_per_epochs: int = 50,
+        n_samples: int = 16
+    ):
+        super().__init__()
+        self.save_hyperparameters()
+        self.model = diffusion.ConditionalUNet(
+            c_in=in_channels,
+            c_out=in_channels,
+            num_classes=num_classes
+        )
+        self.lr = lr
+        self.max_timesteps = max_timesteps
+        self.in_channels = in_channels
+        self.dim = dim
+        self.num_classes = num_classes
+        self.scheduler = diffusion.LinearScheduler(
+            max_timesteps, beta_1, beta_2
+        )
+        self.criterion = nn.MSELoss()
+        self.spe = sample_per_epochs
+        self.n_samples = n_samples
+        self.epoch_count = 0
+        self.train_loss = []
+        self.val_loss = []
+        self.sampling_kwargs = {
+            'model': self.model,
+            'scheduler': self.scheduler,
+            'max_timesteps': self.max_timesteps,
+            'in_channels': self.in_channels,
+            'dim': self.dim,
+        }
+    def _batch_index_select(
+        self,
+        x: torch.Tensor,
+        t: torch.Tensor,
+        device: torch.device
+    ):
+        # x.shape = [T,]
+        # t.shape = [B,]
+        if x.device != device:
+            x = x.to(device)
+        if t.device != device:
+            t = t.to(device)
+        x_select = x.gather(dim=-1, index=t)
+        return x_select[:, None, None, None]  # [B,1]
+    def noising(
+        self,
+        x_0: torch.Tensor,
+        t: torch.Tensor
+    ):
+        noise = torch.randn_like(x_0, device=x_0.device)
+        new_x = self.scheduler.get('sqrt_alpha_hat', t) * x_0
+        new_noise = self.scheduler.get('sqrt_one_minus_alpha_hat', t) * noise
+        return new_x + new_noise, noise
+    def sampling(self, labels=None, n_samples: int = 16):
+        return diffusion.ddpm_sampling(
+            n_samples=n_samples,
+            labels=labels,
+            **self.sampling_kwargs
+        )
+    def sampling_demo(self, labels=None, n_samples: int = 16):
+        return diffusion.ddpm_sampling_demo(
+            n_samples=n_samples,
+            labels=labels,
+            **self.sampling_kwargs
+        )
+    def forward(self, x_0, labels):
+        t = torch.randint(
+            low=0, high=self.max_timesteps, size=(x_0.shape[0],), device=x_0.device
+        )
+        x_noise, noise = self.noising(x_0, t)
+        noise_pred = self.model(x_noise, t, labels)
+        return noise, noise_pred
+    def training_step(self, batch, idx):
+        if isinstance(batch, torch.Tensor):
+            x_0 = batch
+            labels = None
+        else:
+            x_0, labels = batch
+        if np.random.random() < 0.1:
+            labels = None
+        noise, noise_pred = self(x_0, labels)
+        loss = self.criterion(noise, noise_pred)
+        self.train_loss.append(loss)
+        return loss
+    def validation_step(self, batch, idx):
+        if isinstance(batch, torch.Tensor):
+            x_0 = batch
+            labels = None
+        else:
+            x_0, labels = batch
+        noise, noise_pred = self(x_0, labels)
+        loss = self.criterion(noise, noise_pred)
+        self.val_loss.append(loss)
+        return loss
+    def on_train_epoch_end(self) -> None:
+        self.log_dict(
+            {
+                "train_loss": sum(self.train_loss) / len(self.train_loss)
+            },
+            sync_dist=True
+        )
+        self.train_loss.clear()
+        if self.epoch_count % self.spe == 0:
+            wandblog = self.logger.experiment
+            x_t = self.sampling(n_samples=self.n_samples)
+            img_array = [x_t[i] for i in range(x_t.shape[0])]
+            wandblog.log(
+                {
+                    "sampling": wandb.Image(
+                        make_grid(img_array, nrow=4).permute(1, 2, 0).cpu().numpy(),
+                        caption="Sampled Image!"
+                    )
+                }
+            )
+        self.epoch_count += 1
+    def on_validation_epoch_end(self):
+        self.log_dict(
+            {
+                "val_loss": sum(self.val_loss) / len(self.val_loss)
+            },
+            sync_dist=True
+        )
+        self.val_loss.clear()
+    def configure_optimizers(self):
+        optimizer = torch.optim.AdamW(
+            params=self.parameters(),
+            lr=self.lr,
+            weight_decay=0.001,
+            betas=(0.9, 0.999)
+        )
+        scheduler = OneCycleLR(
+            optimizer=optimizer,
+            max_lr=self.lr,
+            total_steps=self.trainer.estimated_stepping_batches,
+        )
+        return {
+            'optimizer': optimizer,
+            'lr_scheduler': scheduler
+        }
+if __name__ == "__main__":
+    a = torch.randn(32, 3, 32, 32)
+    model = DiffusionModel(max_timesteps=10)
+    n, n_pred = model(a)
+    print(n.shape, n_pred.shape)
+    print(torch.mean((n-n_pred)**2))
+    print(model.sampling(1))

diffusion/model/diffusion/sampling.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import torch
+def ddpm_sampling_timestep(
+    x_t,
+    model,
+    scheduler,
+    labels,
+    t,
+    n_samples: int = 16,
+    cfg_scale: int = 3,
+):
+    time = torch.full((n_samples,), fill_value=t, device=model.device)
+    pred_noise = model(x_t, time, labels)
+    if cfg_scale > 0:
+        uncond_pred_noise = model(x_t, time, None)
+        pred_noise = torch.lerp(uncond_pred_noise, pred_noise, cfg_scale)
+    alpha = scheduler.get('alpha', time)
+    sqrt_alpha = scheduler.get('sqrt_alpha', time)
+    somah = scheduler.get('sqrt_one_minus_alpha_hat', time)
+    sqrt_beta = scheduler.get('sqrt_beta', time)
+    if t > 0:
+        noise = torch.randn_like(x_t, device=model.device)
+    else:
+        noise = torch.zeros_like(x_t, device=model.device)
+    x_t_new = 1 / sqrt_alpha * (x_t - (1-alpha) / somah * pred_noise) + sqrt_beta * noise
+    return x_t_new.clamp(-1, 1)
+@torch.no_grad()
+def ddpm_sampling(
+    model,
+    scheduler,
+    n_samples: int = 16,
+    max_timesteps: int = 1000,
+    in_channels: int = 3,
+    dim: int = 32,
+    cfg_scale: int = 3,
+    labels=None
+):
+    if labels is not None:
+        n_samples = labels.shape[0]
+    x_t = torch.randn(
+        n_samples, in_channels, dim, dim, device=model.device
+    )
+    model.eval()
+    for t in range(max_timesteps-1, -1, -1):
+        x_t = ddpm_sampling_timestep(x_t=x_t, model=model, scheduler=scheduler,
+                                     labels=labels, t=t, n_samples=n_samples,
+                                     cfg_scale=cfg_scale)
+    model.train()
+    x_t = (x_t + 1) / 2 * 255.  # range [0,255]
+    return x_t.type(torch.uint8)
+@torch.no_grad()
+def ddpm_sampling_demo(
+    model,
+    scheduler,
+    n_samples: int = 16,
+    max_timesteps: int = 1000,
+    in_channels: int = 3,
+    dim: int = 32,
+    cfg_scale: int = 3,
+    labels=None
+):
+    if labels is not None:
+        n_samples = labels.shape[0]
+    x_t = torch.randn(
+        n_samples, in_channels, dim, dim, device=model.device
+    )
+    model.eval()
+    for t in range(max_timesteps-1, -1, -1):
+        x_t = ddpm_sampling_timestep(x_t=x_t, model=model, scheduler=scheduler,
+                                     labels=labels, t=t, n_samples=n_samples,
+                                     cfg_scale=cfg_scale)
+        yield ((x_t + 1) / 2 * 255).type(torch.uint8)

diffusion/model/diffusion/scheduler.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import torch
+class LinearScheduler:
+    def __init__(
+        self,
+        max_timesteps: int = 1000,
+        beta_1: int = 0.0001,
+        beta_2: int = 0.02
+    ) -> None:
+        self.beta = torch.linspace(beta_1, beta_2, max_timesteps)
+        self.sqrt_beta = torch.sqrt(self.beta)[:, None, None, None]
+        self.alpha = (1 - self.beta)[:, None, None, None]
+        self.sqrt_alpha = torch.sqrt(self.alpha)
+        self.alpha_hat = torch.cumprod(1 - self.beta, dim=0)[:, None, None, None]
+        self.sqrt_alpha_hat = torch.sqrt(self.alpha_hat)
+        self.sqrt_one_minus_alpha_hat = torch.sqrt(1 - self.alpha_hat)
+    def get(self, key: str, t: torch.Tensor):
+        return self.__dict__[key].to(t.device)[t]

diffusion/model/diffusion/unet.py ADDED Viewed

	@@ -0,0 +1,227 @@

+import torch
+import torch.nn as nn
+import pytorch_lightning as pl
+from einops import rearrange, repeat
+class SelfAttention(nn.Module):
+    def __init__(
+            self,
+            channels: int
+    ):
+        super(SelfAttention, self).__init__()
+        self.channels = channels
+        self.mha = nn.MultiheadAttention(channels, 4, batch_first=True)
+        self.ln = nn.LayerNorm([channels])
+        self.ff_self = nn.Sequential(
+            nn.LayerNorm([channels]),
+            nn.Linear(channels, channels),
+            nn.GELU(),
+            nn.Linear(channels, channels),
+        )
+    def forward(self, x):
+        B, C, H, W = x.shape
+        x = rearrange(x, 'b c h w -> b (h w) c').contiguous()
+        x_ln = self.ln(x)
+        attention_value, _ = self.mha(x_ln, x_ln, x_ln)
+        attention_value = attention_value + x
+        attention_value = self.ff_self(attention_value) + attention_value
+        return rearrange(attention_value, 'b (h w) c -> b c h w', h=H, w=W).contiguous()
+class DoubleConv(nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        mid_channels: int | None = None,
+        residual: bool = False
+    ):
+        super().__init__()
+        self.residual = residual
+        if not mid_channels:
+            mid_channels = out_channels
+        self.double_conv = nn.Sequential(
+            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
+            nn.GroupNorm(8, mid_channels),
+            nn.GELU(),
+            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
+            nn.GroupNorm(8, out_channels),
+        )
+    def forward(self, x):
+        if self.residual:
+            return (x + self.double_conv(x)) / 1.414
+        else:
+            return self.double_conv(x)
+class DownSample(nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        emb_dim: int = 256
+    ):
+        super().__init__()
+        self.maxpool_conv = nn.Sequential(
+            nn.MaxPool2d(2),
+            DoubleConv(in_channels, in_channels, residual=True),
+            DoubleConv(in_channels, out_channels),
+        )
+        self.emb_layer = nn.Sequential(
+            nn.SiLU(),
+            nn.Linear(emb_dim, out_channels),
+        )
+    def forward(self, x, t):
+        x = self.maxpool_conv(x)
+        _, _, H, W = x.shape
+        emb = repeat(self.emb_layer(t), 'b d -> b d h w', h=H, w=W).contiguous()
+        return x + emb
+class UpSample(nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        emb_dim: int = 256
+    ):
+        super().__init__()
+        self.up = nn.ConvTranspose2d(in_channels, out_channels, 2, stride=2)
+        self.conv = nn.Sequential(
+            DoubleConv(in_channels, in_channels, residual=True),
+            DoubleConv(in_channels, out_channels, in_channels // 2),
+        )
+        self.emb_layer = nn.Sequential(
+            nn.SiLU(),
+            nn.Linear(emb_dim, out_channels)
+        )
+    def forward(self, x, skip_x, t):
+        x = self.up(x)
+        x = torch.cat([skip_x, x], dim=1)
+        x = self.conv(x)
+        _, _, H, W = x.shape
+        emb = repeat(self.emb_layer(t), 'b d -> b d h w', h=H, w=W).contiguous()
+        return x + emb
+class UNet(pl.LightningModule):
+    def __init__(
+        self,
+        c_in: int = 3,
+        c_out: int = 3,
+        time_dim: int = 256
+    ):
+        super().__init__()
+        self.time_dim = time_dim
+        self.time_embed = nn.Sequential(
+            nn.Linear(time_dim, time_dim),
+            nn.SiLU(),
+            nn.Linear(time_dim, time_dim),
+        )
+        self.inc = DoubleConv(in_channels=c_in, out_channels=64)
+        self.down1 = DownSample(in_channels=64, out_channels=128)
+        self.sa1 = SelfAttention(channels=128)
+        self.down2 = DownSample(in_channels=128, out_channels=256)
+        self.sa2 = SelfAttention(channels=256)
+        self.down3 = DownSample(in_channels=256, out_channels=256)
+        self.sa3 = SelfAttention(channels=256)
+        self.mid1 = DoubleConv(in_channels=256, out_channels=512)
+        self.mid2 = DoubleConv(in_channels=512, out_channels=512)
+        self.up1 = UpSample(in_channels=512, out_channels=256)
+        self.sa4 = SelfAttention(channels=256)
+        self.up2 = UpSample(in_channels=256, out_channels=128)
+        self.sa5 = SelfAttention(channels=128)
+        self.up3 = UpSample(in_channels=128, out_channels=64)
+        self.sa6 = SelfAttention(channels=64)
+        self.outc = nn.Conv2d(64, c_out, kernel_size=1)
+    def pos_encoding(self, t, channels):
+        inv_freq = 1.0 / (
+            10000
+            ** (torch.arange(0, channels, 2).float().to(t.device) / channels)
+        ) * t.repeat(1, channels // 2)
+        pos_enc = torch.zeros((t.shape[0], channels), device=t.device)
+        pos_enc[:, 0::2] = torch.sin(inv_freq)
+        pos_enc[:, 1::2] = torch.cos(inv_freq)
+        return pos_enc
+    def forward_unet(self, x, t):
+        x1 = self.inc(x)
+        x2 = self.down1(x1, t)
+        x2 = self.sa1(x2)
+        x3 = self.down2(x2, t)
+        x3 = self.sa2(x3)
+        x4 = self.down3(x3, t)
+        x4 = self.sa3(x4)
+        x4 = self.mid1(x4)
+        x4 = self.mid2(x4)
+        x = self.up1(x4, x3, t)
+        x = self.sa4(x)
+        x = self.up2(x, x2, t)
+        x = self.sa5(x)
+        x = self.up3(x, x1, t)
+        x = self.sa6(x)
+        output = self.outc(x)
+        return output
+    def forward(
+        self,
+        x: torch.Tensor,
+        t: torch.Tensor
+    ):
+        t = t.unsqueeze(-1).type(torch.float)
+        t = self.pos_encoding(t, self.time_dim)
+        t = self.time_embed(t)
+        return self.forward_unet(x, t)
+class ConditionalUNet(UNet):
+    def __init__(
+        self,
+        c_in: int = 3,
+        c_out: int = 3,
+        time_dim: int = 256,
+        num_classes: int | None = None,
+    ):
+        super().__init__(c_in, c_out, time_dim)
+        self.num_classes = num_classes
+        if num_classes is not None:
+            self.cls_embed = nn.Embedding(num_classes, time_dim)
+    def forward(
+            self,
+            x: torch.Tensor,
+            t: torch.Tensor,
+            label: torch.Tensor | None = None
+    ):
+        t = t.unsqueeze(-1).type(torch.float)
+        t = self.pos_encoding(t, self.time_dim)
+        t = self.time_embed(t)
+        if label is not None and self.num_classes is not None:
+            t += self.cls_embed(label)
+        return self.forward_unet(x, t)
+if __name__ == '__main__':
+    net = ConditionalUNet()
+    print(sum([p.numel() for p in net.parameters()]))
+    x = torch.randn(2, 3, 32, 32)
+    t = x.new_tensor([500] * x.shape[0]).long()
+    print(t)
+    print(net(x, t).shape)

diffusion/model/ldm/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .model import *

diffusion/model/ldm/model.py ADDED Viewed

	@@ -0,0 +1,5 @@

+import torch
+import pytorch_lightning as pl
+class LatentDiffusionModel(pl.LightningModule):
+    pass

diffusion/model/ldm/tests/__init__.py ADDED Viewed

File without changes

diffusion/tests/__init__.py ADDED Viewed

File without changes

diffusion/train/__init__.py ADDED Viewed

File without changes

diffusion/train/__main__.py ADDED Viewed

	@@ -0,0 +1,164 @@

+from pytorch_lightning.loggers import WandbLogger
+import diffusion
+import torch
+import wandb
+import pytorch_lightning as pl
+import argparse
+import os
+torch.multiprocessing.set_sharing_strategy('file_system')
+def main():
+    # PARSERs
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--dataset', '-d', type=str, default='mnist',
+        help='choose dataset'
+    )
+    parser.add_argument(
+        '--data_dir', '-dd', type=str, default='./data/',
+        help='model name'
+    )
+    parser.add_argument(
+        '--max_epochs', '-me', type=int, default=200,
+        help='max epoch'
+    )
+    parser.add_argument(
+        '--batch_size', '-bs', type=int, default=32,
+        help='batch size'
+    )
+    parser.add_argument(
+        '--train_ratio', '-tr', type=float, default=0.99,
+        help='batch size'
+    )
+    parser.add_argument(
+        '--timesteps', '-ts', type=int, default=1000,
+        help='max timesteps diffusion'
+    )
+    parser.add_argument(
+        '--max_batch_size', '-mbs', type=int, default=32,
+        help='max batch size'
+    )
+    parser.add_argument(
+        '--lr', '-l', type=float, default=1e-4,
+        help='learning rate'
+    )
+    parser.add_argument(
+        '--num_workers', '-nw', type=int, default=4,
+        help='number of workers'
+    )
+    parser.add_argument(
+        '--seed', '-s', type=int, default=42,
+        help='seed'
+    )
+    parser.add_argument(
+        '--name', '-n', type=str, default=None,
+        help='name of the experiment'
+    )
+    parser.add_argument(
+        '--pbar', action='store_true',
+        help='progress bar'
+    )
+    parser.add_argument(
+        '--precision', '-p', type=str, default='32',
+        help='numerical precision'
+    )
+    parser.add_argument(
+        '--sample_per_epochs', '-spe', type=int, default=25,
+        help='sample every n epochs'
+    )
+    parser.add_argument(
+        '--n_samples', '-ns', type=int, default=4,
+        help='number of workers'
+    )
+    parser.add_argument(
+        '--monitor', '-m', type=str, default='val_loss',
+        help='callbacks monitor'
+    )
+    parser.add_argument(
+        '--wandb', '-wk', type=str, default=None,
+        help='wandb API key'
+    )
+    args = parser.parse_args()
+    # SEED
+    pl.seed_everything(args.seed, workers=True)
+    # WANDB (OPTIONAL)
+    if args.wandb is not None:
+        wandb.login(key=args.wandb)  # API KEY
+        name = args.name or f"diffusion-{args.max_epochs}-{args.batch_size}-{args.lr}"
+        logger = WandbLogger(
+            project="diffusion-model",
+            name=name,
+            log_model=False
+        )
+    else:
+        logger = None
+    # DATAMODULE
+    if args.dataset == "mnist":
+        DATAMODULE = diffusion.MNISTDataModule
+        img_dim = 32
+        num_classes = 10
+    elif args.dataset == "cifar10":
+        DATAMODULE = diffusion.CIFAR10DataModule
+        img_dim = 32
+        num_classes = 10
+    elif args.dataset == "celeba":
+        DATAMODULE = diffusion.CelebADataModule
+        img_dim = 64
+        num_classes = None
+    datamodule = DATAMODULE(
+        data_dir=args.data_dir,
+        batch_size=args.batch_size,
+        num_workers=args.num_workers,
+        seed=args.seed,
+        train_ratio=args.train_ratio
+    )
+    # MODEL
+    in_channels = 1 if args.dataset == "mnist" else 3
+    model = diffusion.DiffusionModel(
+        lr=args.lr,
+        in_channels=in_channels,
+        sample_per_epochs=args.sample_per_epochs,
+        max_timesteps=args.timesteps,
+        dim=img_dim,
+        num_classes=num_classes,
+        n_samples=args.n_samples
+    )
+    # CALLBACK
+    root_path = os.path.join(os.getcwd(), "checkpoints")
+    callback = diffusion.ModelCallback(
+        root_path=root_path,
+        ckpt_monitor=args.monitor
+    )
+    # STRATEGY
+    strategy = 'ddp_find_unused_parameters_true' if torch.cuda.is_available() else 'auto'
+    # TRAINER
+    trainer = pl.Trainer(
+        default_root_dir=root_path,
+        logger=logger,
+        callbacks=callback.get_callback(),
+        gradient_clip_val=0.5,
+        max_epochs=args.max_epochs,
+        enable_progress_bar=args.pbar,
+        deterministic=False,
+        precision=args.precision,
+        strategy=strategy,
+        accumulate_grad_batches=max(int(args.max_batch_size / args.batch_size), 1)
+    )
+    # FIT MODEL
+    trainer.fit(model=model, datamodule=datamodule)
+if __name__ == '__main__':
+    main()

diffusion/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .callback import *
2	+ from .ema import *

diffusion/utils/callback.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import os
+import diffusion
+from pytorch_lightning.callbacks import (
+    ModelCheckpoint,
+    LearningRateMonitor
+)
+class ModelCallback:
+    def __init__(
+        self,
+        root_path: str,
+        ckpt_monitor: str = "val_loss",
+        ckpt_mode: str = "min",
+    ):
+        ckpt_path = os.path.join(os.path.join(root_path, "model/"))
+        if not os.path.exists(root_path):
+            os.makedirs(root_path)
+        if not os.path.exists(ckpt_path):
+            os.makedirs(ckpt_path)
+        self.ckpt_callback = ModelCheckpoint(
+            monitor=ckpt_monitor,
+            dirpath=ckpt_path,
+            filename="model",
+            save_top_k=1,
+            mode=ckpt_mode,
+            save_weights_only=True
+        )
+        self.lr_callback = LearningRateMonitor("step")
+        self.ema_callback = diffusion.EMACallback(decay=0.995)
+    def get_callback(self):
+        return [
+            self.ckpt_callback, self.lr_callback, self.ema_callback
+        ]

diffusion/utils/ema.py ADDED Viewed

	@@ -0,0 +1,75 @@

+from pytorch_lightning.callbacks import Callback
+from timm.utils.model import get_state_dict, unwrap_model
+from timm.utils.model_ema import ModelEmaV2
+# Cell
+class EMACallback(Callback):
+    """
+    Model Exponential Moving Average. Empirically it has been found that using the moving average
+    of the trained parameters of a deep network is better than using its trained parameters directly.
+    If `use_ema_weights`, then the ema parameters of the network is set after training end.
+    """
+    def __init__(self, decay=0.9999, use_ema_weights: bool = True):
+        self.decay = decay
+        self.ema = None
+        self.use_ema_weights = use_ema_weights
+    def on_fit_start(self, trainer, pl_module, *args):
+        "Initialize `ModelEmaV2` from timm to keep a copy of the moving average of the weights"
+        self.ema = ModelEmaV2(pl_module, decay=self.decay, device=None)
+    def on_train_batch_end(
+        self, trainer, pl_module, *args
+    ):
+        "Update the stored parameters using a moving average"
+        # Update currently maintained parameters.
+        self.ema.update(pl_module)
+    def on_validation_epoch_start(self, trainer, pl_module, *args):
+        "do validation using the stored parameters"
+        # save original parameters before replacing with EMA version
+        self.store(pl_module.parameters())
+        # update the LightningModule with the EMA weights
+        # ~ Copy EMA parameters to LightningModule
+        self.copy_to(self.ema.module.parameters(), pl_module.parameters())
+    def on_validation_end(self, trainer, pl_module, *args):
+        "Restore original parameters to resume training later"
+        self.restore(pl_module.parameters())
+    def on_train_end(self, trainer, pl_module, *args):
+        # update the LightningModule with the EMA weights
+        if self.use_ema_weights:
+            self.copy_to(self.ema.module.parameters(), pl_module.parameters())
+            msg = "Model weights replaced with the EMA version."
+    def on_save_checkpoint(self, trainer, pl_module, checkpoint, *args):
+        if self.ema is not None:
+            return {"state_dict_ema": get_state_dict(self.ema, unwrap_model)}
+    def on_load_checkpoint(self, callback_state, *args):
+        if self.ema is not None:
+            self.ema.module.load_state_dict(callback_state["state_dict_ema"])
+    def store(self, parameters):
+        "Save the current parameters for restoring later."
+        self.collected_params = [param.clone() for param in parameters]
+    def restore(self, parameters):
+        """
+        Restore the parameters stored with the `store` method.
+        Useful to validate the model with EMA parameters without affecting the
+        original optimization process.
+        """
+        for c_param, param in zip(self.collected_params, parameters):
+            param.data.copy_(c_param.data)
+    def copy_to(self, shadow_parameters, parameters):
+        "Copy current parameters into given collection of parameters."
+        for s_param, param in zip(shadow_parameters, parameters):
+            if param.requires_grad:
+                param.data.copy_(s_param.data)

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,22 @@

+[tool.poetry]
+name = "diffusion"
+version = "0.1.0"
+description = ""
+authors = ["Võ Đình Đạt <[email protected]>"]
+readme = "README.md"
+[tool.poetry.dependencies]
+python = "^3.10"
+torch = "*"
+lightning = "*"
+einops = "^0.7.0"
+torchvision = "*"
+wandb = "^0.16.3"
+torchaudio = "*"
+tqdm = "^4.66.2"
+timm = "^0.9.12"
+gradio = "^4.18.0"
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"

script/setup.sh ADDED Viewed

	@@ -0,0 +1 @@


1	+ pip install .