Spaces:

kairusama
/

img2sketch

Running

App Files Files Community

kairusann commited on May 18

Commit

6ee3369

1 Parent(s): 7a207c9

first working version

Browse files

Files changed (6) hide show

app.py +101 -0
models/netG.pth +3 -0
models/sk_model.pth +3 -0
requirements.txt +6 -0
sketch_models.py +191 -0
utils.py +83 -0

app.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import functools
+import cv2
+import gradio as gr
+import torch
+import torch.nn as nn
+import numpy as np
+from PIL import Image
+from einops import rearrange
+from sketch_models import SimpleGenerator, UnetGenerator
+from utils import common_input_validate, resize_image_with_pad, HWC3
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+def get_sketch(input_image, mode='anime', detect_resolution=512, output_type="pil", upscale_method="INTER_LANCZOS4", **kwargs):
+        input_image, output_type = common_input_validate(input_image, output_type, **kwargs)
+        detected_map, remove_pad = resize_image_with_pad(input_image, detect_resolution, upscale_method)
+        H, W, C = input_image.shape
+        Hn = 256 * int(np.ceil(float(H) / 256.0))
+        Wn = 256 * int(np.ceil(float(W) / 256.0))
+        assert detected_map.ndim == 3
+        if mode == 'realistic':
+            model = SimpleGenerator(3,1,3).to(device)
+            model.load_state_dict(torch.load("models/sk_model.pth", map_location=device))
+            model.eval()
+            with torch.no_grad():
+                image = torch.from_numpy(detected_map).float().to(device)
+                image = image / 255.0
+                image = rearrange(image, 'h w c -> 1 c h w')
+                line = model(image)[0][0]
+                line = line.cpu().numpy()
+                line = (line * 255.0).clip(0, 255).astype(np.uint8)
+            detected_map = HWC3(line)
+            detected_map = remove_pad(detected_map)
+            detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LANCZOS4)
+        elif mode == 'anime':
+            norm_layer = functools.partial(nn.InstanceNorm2d, affine=False, track_running_stats=False)
+            model = UnetGenerator(3, 1, 8, 64, norm_layer=norm_layer, use_dropout=False).to(device)
+            ckpt = torch.load("models/netG.pth", map_location=device)
+            for key in list(ckpt.keys()):
+                if 'module.' in key:
+                    ckpt[key.replace('module.', '')] = ckpt[key]
+                    del ckpt[key]
+            model.load_state_dict(ckpt)
+            model.eval()
+            input_image = cv2.resize(input_image, (Wn, Hn), interpolation=cv2.INTER_LANCZOS4)
+            with torch.no_grad():
+                image_feed = torch.from_numpy(input_image).float().to(device)
+                image_feed = image_feed / 127.5 - 1.0
+                image_feed = rearrange(image_feed, 'h w c -> 1 c h w')
+                line = model(image_feed)[0, 0] * 127.5 + 127.5
+                line = line.cpu().numpy()
+                line = line.clip(0, 255).astype(np.uint8)
+            #A1111 uses INTER AREA for downscaling so ig that is the best choice
+            detected_map = remove_pad(255 - detected_map)
+            detected_map = cv2.resize(HWC3(line), (W, H), interpolation=cv2.INTER_LANCZOS4)
+        else: # standard
+            guassian_sigma=6.0
+            intensity_threshold=8
+            x = detected_map.astype(np.float32)
+            g = cv2.GaussianBlur(x, (0, 0), guassian_sigma)
+            intensity = np.min(g - x, axis=2).clip(0, 255)
+            intensity /= max(16, np.median(intensity[intensity > intensity_threshold]))
+            intensity *= 127
+            detected_map = intensity.clip(0, 255).astype(np.uint8)
+            detected_map = remove_pad(255 - detected_map)
+            detected_map = cv2.resize(HWC3(detected_map), (W, H), interpolation=cv2.INTER_LANCZOS4)
+        if output_type == "pil":
+            detected_map = Image.fromarray(detected_map)
+        return detected_map
+iface = gr.Interface(
+    fn=get_sketch,
+    inputs=[
+         gr.Image(type="numpy", label="Upload Image"),
+         gr.Radio(["anime", "realistic", "standard"], label="Mode", info="Process methods"),
+    ],
+    outputs=gr.Image(type="numpy", label="Sketch Output"),
+    title="Get a Sketch",
+    description="Upload an image and get a simplified sketch"
+)
+if __name__ == "__main__":
+    iface.launch()

models/netG.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ccabdcc3f5cf3c07cf65d58776acb21df7dfda825cdc70c9766a93fd62bfc488
+size 217631959

models/sk_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c686ced2a666b4850b4bb6ccf0748031c3eda9f822de73a34b8979970d90f0c6
+size 17173511

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+torch
+gradio
+opencv-python
+numpy
+einops
+pillow

sketch_models.py ADDED Viewed

	@@ -0,0 +1,191 @@

+import functools
+import torch
+import torch.nn as nn
+norm_layer = nn.InstanceNorm2d
+class ResidualBlock(nn.Module):
+    def __init__(self, in_features):
+        super(ResidualBlock, self).__init__()
+        conv_block = [
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(in_features, in_features, 3),
+            norm_layer(in_features),
+            nn.ReLU(inplace=True),
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(in_features, in_features, 3),
+            norm_layer(in_features)
+        ]
+        self.conv_block = nn.Sequential(*conv_block)
+    def forward(self, x):
+        return x + self.conv_block(x)
+class UnetSkipConnectionBlock(nn.Module):
+    """Defines the Unet submodule with skip connection.
+        X -------------------identity----------------------
+        |-- downsampling -- |submodule| -- upsampling --|
+    """
+    def __init__(self, outer_nc, inner_nc, input_nc=None,
+                 submodule=None, outermost=False, innermost=False, norm_layer=nn.BatchNorm2d, use_dropout=False):
+        """Construct a Unet submodule with skip connections.
+        Parameters:
+            outer_nc (int) -- the number of filters in the outer conv layer
+            inner_nc (int) -- the number of filters in the inner conv layer
+            input_nc (int) -- the number of channels in input images/features
+            submodule (UnetSkipConnectionBlock) -- previously defined submodules
+            outermost (bool)    -- if this module is the outermost module
+            innermost (bool)    -- if this module is the innermost module
+            norm_layer          -- normalization layer
+            use_dropout (bool)  -- if use dropout layers.
+        """
+        super(UnetSkipConnectionBlock, self).__init__()
+        self.outermost = outermost
+        if type(norm_layer) == functools.partial:
+            use_bias = norm_layer.func == nn.InstanceNorm2d
+        else:
+            use_bias = norm_layer == nn.InstanceNorm2d
+        if input_nc is None:
+            input_nc = outer_nc
+        downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=4,
+                             stride=2, padding=1, bias=use_bias)
+        downrelu = nn.LeakyReLU(0.2, True)
+        downnorm = norm_layer(inner_nc)
+        uprelu = nn.ReLU(True)
+        upnorm = norm_layer(outer_nc)
+        if outermost:
+            upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc,
+                                        kernel_size=4, stride=2,
+                                        padding=1)
+            down = [downconv]
+            up = [uprelu, upconv, nn.Tanh()]
+            model = down + [submodule] + up
+        elif innermost:
+            upconv = nn.ConvTranspose2d(inner_nc, outer_nc,
+                                        kernel_size=4, stride=2,
+                                        padding=1, bias=use_bias)
+            down = [downrelu, downconv]
+            up = [uprelu, upconv, upnorm]
+            model = down + up
+        else:
+            upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc,
+                                        kernel_size=4, stride=2,
+                                        padding=1, bias=use_bias)
+            down = [downrelu, downconv, downnorm]
+            up = [uprelu, upconv, upnorm]
+            if use_dropout:
+                model = down + [submodule] + up + [nn.Dropout(0.5)]
+            else:
+                model = down + [submodule] + up
+        self.model = nn.Sequential(*model)
+    def forward(self, x):
+        if self.outermost:
+            return self.model(x)
+        else:   # add skip connections
+            return torch.cat([x, self.model(x)], 1)
+class SimpleGenerator(nn.Module):
+    def __init__(self, input_nc, output_nc, n_residual_blocks=9, sigmoid=True):
+        super(SimpleGenerator, self).__init__()
+        # Initial convolution block
+        model0 = [
+            nn.ReflectionPad2d(3),
+            nn.Conv2d(input_nc, 64, 7),
+            norm_layer(64),
+            nn.ReLU(inplace=True)
+        ]
+        self.model0 = nn.Sequential(*model0)
+        # Downsampling
+        model1 = []
+        in_features = 64
+        out_features = in_features*2
+        for _ in range(2):
+            model1 += [
+                nn.Conv2d(in_features, out_features, 3, stride=2, padding=1),
+                norm_layer(out_features),
+                nn.ReLU(inplace=True)
+            ]
+            in_features = out_features
+            out_features = in_features*2
+        self.model1 = nn.Sequential(*model1)
+        model2 = []
+        # Residual blocks
+        for _ in range(n_residual_blocks):
+            model2 += [ResidualBlock(in_features)]
+        self.model2 = nn.Sequential(*model2)
+        # Upsampling
+        model3 = []
+        out_features = in_features//2
+        for _ in range(2):
+            model3 += [
+                nn.ConvTranspose2d(in_features, out_features, 3, stride=2, padding=1, output_padding=1),
+                norm_layer(out_features),
+                nn.ReLU(inplace=True)
+            ]
+            in_features = out_features
+            out_features = in_features//2
+        self.model3 = nn.Sequential(*model3)
+        # Output layer
+        model4 = [
+            nn.ReflectionPad2d(3),
+            nn.Conv2d(64, output_nc, 7)
+        ]
+        if sigmoid:
+            model4 += [nn.Sigmoid()]
+        self.model4 = nn.Sequential(*model4)
+    def forward(self, x, cond=None):
+        out = self.model0(x)
+        out = self.model1(out)
+        out = self.model2(out)
+        out = self.model3(out)
+        out = self.model4(out)
+        return out
+class UnetGenerator(nn.Module):
+    """Create a Unet-based generator"""
+    def __init__(self, input_nc, output_nc, num_downs, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False):
+        """Construct a Unet generator
+        Parameters:
+            input_nc (int)  -- the number of channels in input images
+            output_nc (int) -- the number of channels in output images
+            num_downs (int) -- the number of downsamplings in UNet. For example, # if |num_downs| == 7,
+                                image of size 128x128 will become of size 1x1 # at the bottleneck
+            ngf (int)       -- the number of filters in the last conv layer
+            norm_layer      -- normalization layer
+        We construct the U-Net from the innermost layer to the outermost layer.
+        It is a recursive process.
+        """
+        super(UnetGenerator, self).__init__()
+        # construct unet structure
+        unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=None, norm_layer=norm_layer, innermost=True)  # add the innermost layer
+        for _ in range(num_downs - 5):          # add intermediate layers with ngf * 8 filters
+            unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer, use_dropout=use_dropout)
+        # gradually reduce the number of filters from ngf * 8 to ngf
+        unet_block = UnetSkipConnectionBlock(ngf * 4, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
+        unet_block = UnetSkipConnectionBlock(ngf * 2, ngf * 4, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
+        unet_block = UnetSkipConnectionBlock(ngf, ngf * 2, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
+        self.model = UnetSkipConnectionBlock(output_nc, ngf, input_nc=input_nc, submodule=unet_block, outermost=True, norm_layer=norm_layer)  # add the outermost layer
+    def forward(self, input):
+        """Standard forward"""
+        return self.model(input)

utils.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import warnings
+import numpy as np
+import cv2
+UPSCALE_METHODS = ["INTER_NEAREST", "INTER_LINEAR", "INTER_AREA", "INTER_CUBIC", "INTER_LANCZOS4"]
+def HWC3(x):
+    assert x.dtype == np.uint8
+    if x.ndim == 2:
+        x = x[:, :, None]
+    assert x.ndim == 3
+    H, W, C = x.shape
+    assert C == 1 or C == 3 or C == 4
+    if C == 3:
+        return x
+    if C == 1:
+        return np.concatenate([x, x, x], axis=2)
+    if C == 4:
+        color = x[:, :, 0:3].astype(np.float32)
+        alpha = x[:, :, 3:4].astype(np.float32) / 255.0
+        y = color * alpha + 255.0 * (1.0 - alpha)
+        y = y.clip(0, 255).astype(np.uint8)
+        return y
+def safer_memory(x):
+    # Fix many MAC/AMD problems
+    return np.ascontiguousarray(x.copy()).copy()
+def get_upscale_method(method_str):
+    assert method_str in UPSCALE_METHODS, f"Method {method_str} not found in {UPSCALE_METHODS}"
+    return getattr(cv2, method_str)
+def pad64(x):
+    return int(np.ceil(float(x) / 64.0) * 64 - x)
+# https://github.com/Mikubill/sd-webui-controlnet/blob/main/scripts/processor.py#L17
+# Added upscale_method, mode params
+def resize_image_with_pad(input_image, resolution, upscale_method = "", skip_hwc3=False, mode='edge'):
+    if skip_hwc3:
+        img = input_image
+    else:
+        img = HWC3(input_image)
+    H_raw, W_raw, _ = img.shape
+    if resolution == 0:
+        return img, lambda x: x
+    k = float(resolution) / float(min(H_raw, W_raw))
+    H_target = int(np.round(float(H_raw) * k))
+    W_target = int(np.round(float(W_raw) * k))
+    img = cv2.resize(img, (W_target, H_target), interpolation=get_upscale_method(upscale_method) if k > 1 else cv2.INTER_AREA)
+    H_pad, W_pad = pad64(H_target), pad64(W_target)
+    img_padded = np.pad(img, [[0, H_pad], [0, W_pad], [0, 0]], mode=mode)
+    def remove_pad(x):
+        return safer_memory(x[:H_target, :W_target, ...])
+    return safer_memory(img_padded), remove_pad
+def common_input_validate(input_image, output_type, **kwargs):
+    if "img" in kwargs:
+            warnings.warn("img is deprecated, please use `input_image=...` instead.", DeprecationWarning)
+            input_image = kwargs.pop("img")
+    if "return_pil" in kwargs:
+            warnings.warn("return_pil is deprecated. Use output_type instead.", DeprecationWarning)
+            output_type = "pil" if kwargs["return_pil"] else "np"
+    if type(output_type) is bool:
+        warnings.warn("Passing `True` or `False` to `output_type` is deprecated and will raise an error in future versions")
+        if output_type:
+            output_type = "pil"
+    if input_image is None:
+        raise ValueError("input_image must be defined.")
+    if not isinstance(input_image, np.ndarray):
+        input_image = np.array(input_image, dtype=np.uint8)
+        output_type = output_type or "pil"
+    else:
+        output_type = output_type or "np"
+    return (input_image, output_type)