Spaces:

dragynir
/

fashion_controlnet

Runtime error

App Files Files Community

dragynir commited on Jan 19, 2024

Commit

8ed2153

1 Parent(s): 432f235

add app

Browse files

Files changed (10) hide show

README.md +28 -0
app.py +38 -4
config.py +22 -0
requirements.txt +3 -0
src/__init__.py +0 -0
src/pipeline.py +95 -0
src/preprocess.py +20 -0
src/unet/__init__.py +0 -0
src/unet/network.py +559 -0
src/unet/predictor.py +157 -0

README.md CHANGED Viewed

@@ -11,3 +11,31 @@ license: mit
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+Example promts:
+a handsome man relaxing in a chair, shirt widely unbuttoned, eyes closed, close up, bright red and yellow sunrise, 4k resolution photo realistic
+# TODO
+0) изучить как обернуть в huggingface app
+    Пример как можно сделать апку: https://huggingface.co/spaces/wildoctopus/cloth-segmentation/tree/main
+1) запушить модель
+2) Написать UI с RGB цветами (чтобы можно было одежду по шаблону а не изображению генерить)
+3) Добавить seed в параметры
+4) Прокинуть остальные параметры
+5) redis кеширование ответа - ради прикола
+# Крупные планы
+1) Сделать huggingface space с этой моделью (Написать карточку модели) (RGB три маски с одеждой сделать (255, 0, 0), (0, 255, 0), (0, 0, 255))
+2) Сделать сервис на основе этой модели
+3) Перетренировать модель на 1024x1024 с лучшим позиционированием(обучение как в sdxl)
+4) Придумать другие condition на новых датасетах - которые могут быть полезными
+# Проект
+https://huggingface.co/spaces/dragynir/fashion_controlnet

app.py CHANGED Viewed

@@ -1,9 +1,43 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-iface = gr.Interface(fn=greet, inputs="text", outputs="text")
-iface.launch()

+import numpy as np
+import torch
 import gradio as gr
+from config import PipelineConfig
+from src.pipeline import FashionPipeline, PipelineOutput
+config = PipelineConfig()
+fashion_pipeline = FashionPipeline(config, device=torch.device('cuda'))
+def process(input_image: np.ndarray, prompt: str):
+    output: PipelineOutput = fashion_pipeline(
+        control_image=input_image,
+        prompt=prompt,
+    )
+    return [
+        output.control_image,
+        output.control_mask,
+        output.generated_image,
+    ]
+block = gr.Blocks().queue()
+with block:
+    with gr.Row():
+        gr.Markdown("## Control Stable Diffusion with Segmentation Maps")
+    with gr.Row():
+        with gr.Column():
+            input_image = gr.Image(type="numpy")
+            prompt = gr.Textbox(label="Prompt")
+            run_button = gr.Button(value="Run")
+        with gr.Column():
+            result_gallery = gr.Gallery(label='Output', show_label=False, elem_id="gallery")
+    ips = [input_image, prompt]
+    run_button.click(fn=process, inputs=ips, outputs=[result_gallery])
+block.launch()

config.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from dataclasses import dataclass
+import os
+import sys
+weights_path = os.path.join(sys.path[0], 'weights')
+@dataclass
+class PipelineConfig:
+    """Fashion Controlnet Pipeline Config."""
+    base_model_path: str = 'stabilityai/stable-diffusion-xl-base-1.0'
+    # /pub/home/korostelev/.cache/huggingface/hub/models--stabilityai--stable-diffusion-xl-base-1.0/snapshots/462165984030d82259a11f4367a4eed129e94a7b'
+    controlnet_path: str = r"C:\Users\dragynir\.cache\huggingface\hub\fashion_controlnet"
+    # https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
+    vae_path: str = 'madebyollin/sdxl-vae-fp16-fix'
+    segmentation_model_path: str = os.path.join(weights_path, 'cloth_segm.pth')

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+transformers
+diffusers
+accelerate

src/__init__.py ADDED Viewed

File without changes

src/pipeline.py ADDED Viewed

	@@ -0,0 +1,95 @@

+from dataclasses import dataclass
+from PIL import Image
+import numpy as np
+from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
+import torch
+from src.preprocess import HWC3
+from src.unet.predictor import generate_mask, load_seg_model
+from config import PipelineConfig
+@dataclass
+class PipelineOutput:
+    control_image: np.ndarray
+    control_mask: np.ndarray
+    generated_image: np.ndarray
+class FashionPipeline:
+    def __init__(
+        self,
+        config: PipelineConfig,
+        device: torch.device,
+    ):
+        self.config = config
+        self.device = device
+        self.segmentation_model = None
+        self.controlnet = None
+        self.pipeline = None
+        self.__init_pipeline()
+    def __call__(
+        self,
+        control_image: np.ndarray,
+        prompt: str,
+        resolution: int = 512,
+        num_inference_steps: int = 40,
+    ) -> PipelineOutput:
+        # check image format
+        control_image = HWC3(control_image)
+        # extract segmentation mask
+        control_mask = self.extract_mask(control_image).resize((resolution, resolution))
+        # generate image
+        generator = torch.manual_seed(0)
+        generated_image = self.pipeline(
+            image=control_mask,
+            prompt=prompt,
+            num_inference_steps=num_inference_steps,
+            generator=generator,
+        ).images[0]
+        return PipelineOutput(
+            control_image=control_image,
+            control_mask=control_mask,
+            generated_image=generated_image,
+        )
+    def extract_mask(self, control_image: np.ndarray) -> Image:
+        """Performs segmentation model to extract clothes parts mask."""
+        control_mask = generate_mask(control_image, self.segmentation_model, device=self.device)
+        control_mask = np.stack([control_mask] * 3, axis=-1)
+        control_mask = np.clip((control_mask.astype(np.float32) / 3.0) * 255, 0, 255)
+        return Image.fromarray(control_mask.astype('uint8'), 'RGB')
+    def __init_pipeline(self):
+        """Init models and SDXL pipeline."""
+        self.segmentation_model = load_seg_model(
+            self.config.segmentation_model_path,
+            device=self.device,
+        )
+        self.controlnet = ControlNetModel.from_pretrained(
+            self.config.controlnet_path,
+            torch_dtype=torch.float16,
+        )
+        self.pipeline = StableDiffusionXLControlNetPipeline.from_pretrained(
+            self.config.base_model_path,
+            controlnet=self.controlnet,
+            torch_dtype=torch.float16,
+        )
+        self.pipeline.scheduler = UniPCMultistepScheduler.from_config(self.pipeline.scheduler.config)
+        self.pipeline.enable_model_cpu_offload()

src/preprocess.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import numpy as np
+def HWC3(x):
+    assert x.dtype == np.uint8
+    if x.ndim == 2:
+        x = x[:, :, None]
+    assert x.ndim == 3
+    H, W, C = x.shape
+    assert C == 1 or C == 3 or C == 4
+    if C == 3:
+        return x
+    if C == 1:
+        return np.concatenate([x, x, x], axis=2)
+    if C == 4:
+        color = x[:, :, 0:3].astype(np.float32)
+        alpha = x[:, :, 3:4].astype(np.float32) / 255.0
+        y = color * alpha + 255.0 * (1.0 - alpha)
+        y = y.clip(0, 255).astype(np.uint8)
+        return y

src/unet/__init__.py ADDED Viewed

File without changes

src/unet/network.py ADDED Viewed

	@@ -0,0 +1,559 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class REBNCONV(nn.Module):
+    def __init__(self, in_ch=3, out_ch=3, dirate=1):
+        super(REBNCONV, self).__init__()
+        self.conv_s1 = nn.Conv2d(
+            in_ch, out_ch, 3, padding=1 * dirate, dilation=1 * dirate
+        )
+        self.bn_s1 = nn.BatchNorm2d(out_ch)
+        self.relu_s1 = nn.ReLU(inplace=True)
+    def forward(self, x):
+        hx = x
+        xout = self.relu_s1(self.bn_s1(self.conv_s1(hx)))
+        return xout
+## upsample tensor 'src' to have the same spatial size with tensor 'tar'
+def _upsample_like(src, tar):
+    src = F.upsample(src, size=tar.shape[2:], mode="bilinear")
+    return src
+### RSU-7 ###
+class RSU7(nn.Module):  # UNet07DRES(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU7, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool5 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv7 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv6d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx = self.pool4(hx4)
+        hx5 = self.rebnconv5(hx)
+        hx = self.pool5(hx5)
+        hx6 = self.rebnconv6(hx)
+        hx7 = self.rebnconv7(hx6)
+        hx6d = self.rebnconv6d(torch.cat((hx7, hx6), 1))
+        hx6dup = _upsample_like(hx6d, hx5)
+        hx5d = self.rebnconv5d(torch.cat((hx6dup, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        """
+        del hx1, hx2, hx3, hx4, hx5, hx6, hx7
+        del hx6d, hx5d, hx3d, hx2d
+        del hx2dup, hx3dup, hx4dup, hx5dup, hx6dup
+        """
+        return hx1d + hxin
+### RSU-6 ###
+class RSU6(nn.Module):  # UNet06DRES(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU6, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx = self.pool4(hx4)
+        hx5 = self.rebnconv5(hx)
+        hx6 = self.rebnconv6(hx5)
+        hx5d = self.rebnconv5d(torch.cat((hx6, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        """
+        del hx1, hx2, hx3, hx4, hx5, hx6
+        del hx5d, hx4d, hx3d, hx2d
+        del hx2dup, hx3dup, hx4dup, hx5dup
+        """
+        return hx1d + hxin
+### RSU-5 ###
+class RSU5(nn.Module):  # UNet05DRES(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU5, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx5 = self.rebnconv5(hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        """
+        del hx1, hx2, hx3, hx4, hx5
+        del hx4d, hx3d, hx2d
+        del hx2dup, hx3dup, hx4dup
+        """
+        return hx1d + hxin
+### RSU-4 ###
+class RSU4(nn.Module):  # UNet04DRES(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU4, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx4 = self.rebnconv4(hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        """
+        del hx1, hx2, hx3, hx4
+        del hx3d, hx2d
+        del hx2dup, hx3dup
+        """
+        return hx1d + hxin
+### RSU-4F ###
+class RSU4F(nn.Module):  # UNet04FRES(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU4F, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=4)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=8)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=4)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=2)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx2 = self.rebnconv2(hx1)
+        hx3 = self.rebnconv3(hx2)
+        hx4 = self.rebnconv4(hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4, hx3), 1))
+        hx2d = self.rebnconv2d(torch.cat((hx3d, hx2), 1))
+        hx1d = self.rebnconv1d(torch.cat((hx2d, hx1), 1))
+        """
+        del hx1, hx2, hx3, hx4
+        del hx3d, hx2d
+        """
+        return hx1d + hxin
+##### U^2-Net ####
+class U2NET(nn.Module):
+    def __init__(self, in_ch=3, out_ch=1):
+        super(U2NET, self).__init__()
+        self.stage1 = RSU7(in_ch, 32, 64)
+        self.pool12 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage2 = RSU6(64, 32, 128)
+        self.pool23 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage3 = RSU5(128, 64, 256)
+        self.pool34 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage4 = RSU4(256, 128, 512)
+        self.pool45 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage5 = RSU4F(512, 256, 512)
+        self.pool56 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage6 = RSU4F(512, 256, 512)
+        # decoder
+        self.stage5d = RSU4F(1024, 256, 512)
+        self.stage4d = RSU4(1024, 128, 256)
+        self.stage3d = RSU5(512, 64, 128)
+        self.stage2d = RSU6(256, 32, 64)
+        self.stage1d = RSU7(128, 16, 64)
+        self.side1 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side2 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side3 = nn.Conv2d(128, out_ch, 3, padding=1)
+        self.side4 = nn.Conv2d(256, out_ch, 3, padding=1)
+        self.side5 = nn.Conv2d(512, out_ch, 3, padding=1)
+        self.side6 = nn.Conv2d(512, out_ch, 3, padding=1)
+        self.outconv = nn.Conv2d(6 * out_ch, out_ch, 1)
+    def forward(self, x):
+        hx = x
+        # stage 1
+        hx1 = self.stage1(hx)
+        hx = self.pool12(hx1)
+        # stage 2
+        hx2 = self.stage2(hx)
+        hx = self.pool23(hx2)
+        # stage 3
+        hx3 = self.stage3(hx)
+        hx = self.pool34(hx3)
+        # stage 4
+        hx4 = self.stage4(hx)
+        hx = self.pool45(hx4)
+        # stage 5
+        hx5 = self.stage5(hx)
+        hx = self.pool56(hx5)
+        # stage 6
+        hx6 = self.stage6(hx)
+        hx6up = _upsample_like(hx6, hx5)
+        # -------------------- decoder --------------------
+        hx5d = self.stage5d(torch.cat((hx6up, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.stage4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.stage3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.stage2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.stage1d(torch.cat((hx2dup, hx1), 1))
+        # side output
+        d1 = self.side1(hx1d)
+        d2 = self.side2(hx2d)
+        d2 = _upsample_like(d2, d1)
+        d3 = self.side3(hx3d)
+        d3 = _upsample_like(d3, d1)
+        d4 = self.side4(hx4d)
+        d4 = _upsample_like(d4, d1)
+        d5 = self.side5(hx5d)
+        d5 = _upsample_like(d5, d1)
+        d6 = self.side6(hx6)
+        d6 = _upsample_like(d6, d1)
+        d0 = self.outconv(torch.cat((d1, d2, d3, d4, d5, d6), 1))
+        """
+        del hx1, hx2, hx3, hx4, hx5, hx6
+        del hx5d, hx4d, hx3d, hx2d, hx1d
+        del hx6up, hx5dup, hx4dup, hx3dup, hx2dup
+        """
+        return d0, d1, d2, d3, d4, d5, d6
+### U^2-Net small ###
+class U2NETP(nn.Module):
+    def __init__(self, in_ch=3, out_ch=1):
+        super(U2NETP, self).__init__()
+        self.stage1 = RSU7(in_ch, 16, 64)
+        self.pool12 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage2 = RSU6(64, 16, 64)
+        self.pool23 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage3 = RSU5(64, 16, 64)
+        self.pool34 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage4 = RSU4(64, 16, 64)
+        self.pool45 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage5 = RSU4F(64, 16, 64)
+        self.pool56 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage6 = RSU4F(64, 16, 64)
+        # decoder
+        self.stage5d = RSU4F(128, 16, 64)
+        self.stage4d = RSU4(128, 16, 64)
+        self.stage3d = RSU5(128, 16, 64)
+        self.stage2d = RSU6(128, 16, 64)
+        self.stage1d = RSU7(128, 16, 64)
+        self.side1 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side2 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side3 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side4 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side5 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side6 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.outconv = nn.Conv2d(6 * out_ch, out_ch, 1)
+    def forward(self, x):
+        hx = x
+        # stage 1
+        hx1 = self.stage1(hx)
+        hx = self.pool12(hx1)
+        # stage 2
+        hx2 = self.stage2(hx)
+        hx = self.pool23(hx2)
+        # stage 3
+        hx3 = self.stage3(hx)
+        hx = self.pool34(hx3)
+        # stage 4
+        hx4 = self.stage4(hx)
+        hx = self.pool45(hx4)
+        # stage 5
+        hx5 = self.stage5(hx)
+        hx = self.pool56(hx5)
+        # stage 6
+        hx6 = self.stage6(hx)
+        hx6up = _upsample_like(hx6, hx5)
+        # decoder
+        hx5d = self.stage5d(torch.cat((hx6up, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.stage4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.stage3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.stage2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.stage1d(torch.cat((hx2dup, hx1), 1))
+        # side output
+        d1 = self.side1(hx1d)
+        d2 = self.side2(hx2d)
+        d2 = _upsample_like(d2, d1)
+        d3 = self.side3(hx3d)
+        d3 = _upsample_like(d3, d1)
+        d4 = self.side4(hx4d)
+        d4 = _upsample_like(d4, d1)
+        d5 = self.side5(hx5d)
+        d5 = _upsample_like(d5, d1)
+        d6 = self.side6(hx6)
+        d6 = _upsample_like(d6, d1)
+        d0 = self.outconv(torch.cat((d1, d2, d3, d4, d5, d6), 1))
+        return d0, d1, d2, d3, d4, d5, d6

src/unet/predictor.py ADDED Viewed

	@@ -0,0 +1,157 @@

+from src.unet.network import U2NET
+import os
+from PIL import Image
+import argparse
+import numpy as np
+import torch
+import torch.nn.functional as F
+import torchvision.transforms as transforms
+from collections import OrderedDict
+def load_checkpoint(model, checkpoint_path):
+    if not os.path.exists(checkpoint_path):
+        print("----No checkpoints at given path----")
+        return
+    model_state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"))
+    new_state_dict = OrderedDict()
+    for k, v in model_state_dict.items():
+        name = k[7:]  # remove `module.`
+        new_state_dict[name] = v
+    model.load_state_dict(new_state_dict)
+    print("----checkpoints loaded from path: {}----".format(checkpoint_path))
+    return model
+def get_palette(num_cls):
+    """ Returns the color map for visualizing the segmentation mask.
+    Args:
+        num_cls: Number of classes
+    Returns:
+        The color map
+    """
+    n = num_cls
+    palette = [0] * (n * 3)
+    for j in range(0, n):
+        lab = j
+        palette[j * 3 + 0] = 0
+        palette[j * 3 + 1] = 0
+        palette[j * 3 + 2] = 0
+        i = 0
+        while lab:
+            palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i))
+            palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i))
+            palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i))
+            i += 1
+            lab >>= 3
+    return palette
+class Normalize_image(object):
+    """Normalize given tensor into given mean and standard dev
+    Args:
+        mean (float): Desired mean to substract from tensors
+        std (float): Desired std to divide from tensors
+    """
+    def __init__(self, mean, std):
+        assert isinstance(mean, (float))
+        if isinstance(mean, float):
+            self.mean = mean
+        if isinstance(std, float):
+            self.std = std
+        self.normalize_1 = transforms.Normalize(self.mean, self.std)
+        self.normalize_3 = transforms.Normalize([self.mean] * 3, [self.std] * 3)
+        self.normalize_18 = transforms.Normalize([self.mean] * 18, [self.std] * 18)
+    def __call__(self, image_tensor):
+        if image_tensor.shape[0] == 1:
+            return self.normalize_1(image_tensor)
+        elif image_tensor.shape[0] == 3:
+            return self.normalize_3(image_tensor)
+        elif image_tensor.shape[0] == 18:
+            return self.normalize_18(image_tensor)
+        else:
+            assert "Please set proper channels! Normlization implemented only for 1, 3 and 18"
+def apply_transform(img):
+    transforms_list = []
+    transforms_list += [transforms.ToTensor()]
+    transforms_list += [Normalize_image(0.5, 0.5)]
+    transform_rgb = transforms.Compose(transforms_list)
+    return transform_rgb(img)
+def generate_mask(input_image, net, palette=None, device='cpu'):
+    if isinstance(input_image, np.ndarray):
+        input_image = Image.fromarray(input_image)
+    img = input_image
+    img_size = img.size
+    img = img.resize((768, 768), Image.BICUBIC)
+    image_tensor = apply_transform(img)
+    image_tensor = torch.unsqueeze(image_tensor, 0)
+    with torch.no_grad():
+        output_tensor = net(image_tensor.to(device))
+        output_tensor = F.log_softmax(output_tensor[0], dim=1)
+        output_tensor = torch.max(output_tensor, dim=1, keepdim=True)[1]
+        output_tensor = torch.squeeze(output_tensor, dim=0)
+        output_arr = output_tensor.cpu().numpy()
+    # Save final cloth segmentations
+    mask = output_arr[0].astype(np.uint8)
+    if not palette:
+        return mask
+    mask_image_palette = Image.fromarray(mask, mode='P')
+    mask_image_palette.putpalette(palette)
+    mask_image_palette = mask_image_palette.resize(img_size, Image.BICUBIC)
+    return mask, mask_image_palette
+def load_seg_model(checkpoint_path, device='cpu'):
+    net = U2NET(in_ch=3, out_ch=4)
+    net = load_checkpoint(net, checkpoint_path)
+    net = net.to(device)
+    net = net.eval()
+    return net
+def main(args):
+    device = 'cuda:0' if args.cuda else 'cpu'
+    # Create an instance of your model
+    model = load_seg_model(args.checkpoint_path, device=device)
+    palette = get_palette(4)
+    img = Image.open(args.image).convert('RGB')
+    mask, mask_image_palette = generate_mask(img, net=model, palette=palette, device=device)
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Help to set arguments for Cloth Segmentation.')
+    parser.add_argument('--image', type=str, help='Path to the input image')
+    parser.add_argument('--cuda', action='store_true', help='Enable CUDA (default: False)')
+    parser.add_argument('--checkpoint_path', type=str, default='../models/cloth_segm.pth', help='Path to the checkpoint file')
+    args = parser.parse_args()
+    args.image = '/pub/home/korostelev/data/diffusion/test/804a460e4bd0d666d51e84adc70f5490.jpg'
+    main(args)