karimbenharrak
/

stable-diffusion-inpainting-with-handler

@@ -24,13 +24,30 @@ class EndpointHandler():
         #     "stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True
         # )
         # self.smooth_pipe.to("cuda")
         self.controlnet = ControlNetModel.from_pretrained(
             "lllyasviel/control_v11p_sd15_inpaint", torch_dtype=torch.float16
         )
         self.pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
-            "runwayml/stable-diffusion-v1-5", controlnet=self.controlnet, torch_dtype=torch.float16
         )
         self.pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(self.pipe.scheduler.config)
@@ -158,18 +175,50 @@ class EndpointHandler():
         control_image = self.make_inpaint_condition(image, mask_image)
         # generate image
         image = self.pipe(
             prompt=prompt,
             negative_prompt=negative_prompt,
             num_inference_steps=num_inference_steps,
             eta=1.0,
-            image=image,
-            mask_image=mask_image,
-            control_image=control_image,
-            guidance_scale=guidance_scale,
             strength=strength,
-            controlnet_conditioning_scale=0.3
         ).images[0]
         return image
@@ -190,4 +239,4 @@ class EndpointHandler():
         image[image_mask > 0.5] = -1.0  # set as masked pixel
         image = np.expand_dims(image, 0).transpose(0, 3, 1, 2)
         image = torch.from_numpy(image)
-        return image

         #     "stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True
         # )
         # self.smooth_pipe.to("cuda")
+        self.canny_pipe = StableDiffusionPipeline.from_pretrained(
+            "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16
+        )
+        self.canny_pipe = self.canny_pipe.to("cuda")
+        self.canny_pipe.enable_model_cpu_offload()
+        self.canny_pipe.enable_xformers_memory_efficient_attention()
+        self.controlnets = [
+            ControlNetModel.from_pretrained(
+                "diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16, use_safetensors=True
+            ),
+            ControlNetModel.from_pretrained(
+                "lllyasviel/control_v11p_sd15_inpaint", torch_dtype=torch.float16
+            )
+        ]
+        """
         self.controlnet = ControlNetModel.from_pretrained(
             "lllyasviel/control_v11p_sd15_inpaint", torch_dtype=torch.float16
         )
+        """
         self.pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
+            "runwayml/stable-diffusion-v1-5", controlnet=self.controlnets, torch_dtype=torch.float16
         )
         self.pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(self.pipe.scheduler.config)
         control_image = self.make_inpaint_condition(image, mask_image)
+        low_threshold = 100
+        high_threshold = 200
+        # generate a first version of the prompt for the canny image
+        gen_canny_img = self.canny_pipe(prompt).images[0]
+        gen_canny_img = np.array(gen_canny_img)
+        help_image = cv2.Canny(gen_canny_img, low_threshold, high_threshold)
+        # get bounding box from selected area in mask image
+        # make help_image fit exactly into the bounding box
+        # create black image with canny edges only in the selected area
+        # get bounding box from selected area in mask image
+        mask_image = np.array(mask_image)
+        mask_image = cv2.cvtColor(mask_image, cv2.COLOR_RGB2GRAY)
+        mask_image = cv2.bitwise_not(mask_image)
+        contours, _ = cv2.findContours(mask_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        x, y, w, h = cv2.boundingRect(contours[0])
+        # create a completely black image with the same size as the mask image
+        black_image = np.zeros_like(mask_image)
+        # put the canny edges into the black image but resize it to the bounding box
+        help_image = cv2.resize(help_image, (w, h))
+        black_image[y:y+h, x:x+w] = help_image
+        canny_image = Image.fromarray(black_image)
+        input_images = [canny_image.resize((1024, 1024)), image.resize((1024, 1024))]
         # generate image
         image = self.pipe(
             prompt=prompt,
             negative_prompt=negative_prompt,
             num_inference_steps=num_inference_steps,
             eta=1.0,
+            image=input_images,
+            # mask_image=mask_image,
+            # control_image=control_image,
+            # guidance_scale=guidance_scale,
             strength=strength,
+            controlnet_conditioning_scale=[0.8, 1.0]
         ).images[0]
         return image
         image[image_mask > 0.5] = -1.0  # set as masked pixel
         image = np.expand_dims(image, 0).transpose(0, 3, 1, 2)
         image = torch.from_numpy(image)
+        return image