feat: add sdxl with controlnet

Files changed (3) hide show

handler.py +41 -32
request.json +0 -0
requirements.txt +1 -1

handler.py CHANGED Viewed

@@ -1,8 +1,8 @@
-from typing import  Dict, List, Any
 import base64
 from PIL import Image
 from io import BytesIO
-from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
 import torch
@@ -15,7 +15,16 @@ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 if device.type != 'cuda':
     raise ValueError("need to run on GPU")
 # set mixed precision dtype
-dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
 # controlnet mapping for controlnet id and control hinter
 CONTROLNET_MAPPING = {
@@ -58,14 +67,16 @@ class EndpointHandler():
     def __init__(self, path=""):
         # define default controlnet id and load controlnet
         self.control_type = "normal"
-        self.controlnet = ControlNetModel.from_pretrained(CONTROLNET_MAPPING[self.control_type]["model_id"],torch_dtype=dtype).to(device)
-        # Load StableDiffusionControlNetPipeline
         self.stable_diffusion_id = "runwayml/stable-diffusion-v1-5"
-        self.pipe = StableDiffusionControlNetPipeline.from_pretrained(self.stable_diffusion_id,
-                                                                      controlnet=self.controlnet,
-                                                                      torch_dtype=dtype,
-                                                                      safety_checker=None).to(device)
         # makes inference much faster
         # self.pipe.scheduler = UniPCMultistepScheduler.from_config(self.pipe.scheduler.config)
         # Define Generator with seed
@@ -78,55 +89,53 @@ class EndpointHandler():
         """
         prompt = data.pop("inputs", None)
         image = data.pop("image", None)
-        num_of_images = data.pop("numOfImages", None)
         controlnet_type = data.pop("controlnet_type", None)
         # Check if neither prompt nor image is provided
         if prompt is None and image is None:
             return {"error": "Please provide a prompt and base64 encoded image."}
         if num_of_images is None:
             num_of_images = 1
         # Check if a new controlnet is provided
         if controlnet_type is not None and controlnet_type != self.control_type:
-            print(f"changing controlnet from {self.control_type} to {controlnet_type} using {CONTROLNET_MAPPING[controlnet_type]['model_id']} model")
             self.control_type = controlnet_type
-            self.controlnet = ControlNetModel.from_pretrained(CONTROLNET_MAPPING[self.control_type]["model_id"],
                                                               torch_dtype=dtype).to(device)
             self.pipe.controlnet = self.controlnet
         # hyperparamters
         num_inference_steps = data.pop("num_inference_steps", 30)
         guidance_scale = data.pop("guidance_scale", 7.5)
         negative_prompt = data.pop("negative_prompt", None)
         height = data.pop("height", None)
         width = data.pop("width", None)
-        controlnet_conditioning_scale = data.pop("controlnet_conditioning_scale", 1.0)
         # process image
         image = self.decode_base64_image(image)
-        control_image = CONTROLNET_MAPPING[self.control_type]["hinter"](image)
         # run inference pipeline
-        out = self.pipe(
-            prompt=prompt,
             negative_prompt=negative_prompt,
             image=control_image,
-            num_inference_steps=num_inference_steps,
             guidance_scale=guidance_scale,
             num_images_per_prompt=num_of_images,
-            height=height,
-            width=width,
             controlnet_conditioning_scale=controlnet_conditioning_scale,
             generator=self.generator
-        )
-        # return the list of generated images
-        return out.images
     # helper to decode input image
     def decode_base64_image(self, image_string):
         base64_image = base64.b64decode(image_string)

+from typing import Dict, List, Any
 import base64
 from PIL import Image
 from io import BytesIO
+from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel, AutoencoderKL, UniPCMultistepScheduler
 import torch
 if device.type != 'cuda':
     raise ValueError("need to run on GPU")
 # set mixed precision dtype
+dtype = torch.bfloat16 if torch.cuda.get_device_capability()[
+    0] == 8 else torch.float16
+# for the moment, support only canny edge
+SDXLCONTROLNET_MAPPING = {
+    "canny_edge": {
+        "model_id": "diffusers/controlnet-canny-sdxl-1.0",
+        "hinter": controlnet_hinter.hint_canny
+    }
+}
 # controlnet mapping for controlnet id and control hinter
 CONTROLNET_MAPPING = {
     def __init__(self, path=""):
         # define default controlnet id and load controlnet
         self.control_type = "normal"
+        self.controlnet = ControlNetModel.from_pretrained(
+            SDXLCONTROLNET_MAPPING[self.control_type]["model_id"], torch_dtype=dtype).to(device)
+        # Load StableDiffusionControlNetPipeline
+        self.sdxl_id = "stabilityai/stable-diffusion-xl-base-1.0"
         self.stable_diffusion_id = "runwayml/stable-diffusion-v1-5"
+        self.pipe = StableDiffusionXLControlNetPipeline.from_pretrained(self.sdxl_id,
+                                                                        controlnet=self.controlnet,
+                                                                        torch_dtype=dtype,
+                                                                        safety_checker=None).to(device)
         # makes inference much faster
         # self.pipe.scheduler = UniPCMultistepScheduler.from_config(self.pipe.scheduler.config)
         # Define Generator with seed
         """
         prompt = data.pop("inputs", None)
         image = data.pop("image", None)
+        num_of_images = data.pop("num_of_images", None)
         controlnet_type = data.pop("controlnet_type", None)
         # Check if neither prompt nor image is provided
         if prompt is None and image is None:
             return {"error": "Please provide a prompt and base64 encoded image."}
         if num_of_images is None:
             num_of_images = 1
         # Check if a new controlnet is provided
         if controlnet_type is not None and controlnet_type != self.control_type:
+            print(
+                f"changing controlnet from {self.control_type} to {controlnet_type} using {SDXLCONTROLNET_MAPPING[controlnet_type]['model_id']} model")
             self.control_type = controlnet_type
+            self.controlnet = ControlNetModel.from_pretrained(SDXLCONTROLNET_MAPPING[self.control_type]["model_id"],
                                                               torch_dtype=dtype).to(device)
             self.pipe.controlnet = self.controlnet
         # hyperparamters
         num_inference_steps = data.pop("num_inference_steps", 30)
         guidance_scale = data.pop("guidance_scale", 7.5)
         negative_prompt = data.pop("negative_prompt", None)
         height = data.pop("height", None)
         width = data.pop("width", None)
+        controlnet_conditioning_scale = data.pop(
+            "controlnet_conditioning_scale", 1.0)
         # process image
         image = self.decode_base64_image(image)
+        control_image = SDXLCONTROLNET_MAPPING[self.control_type]["hinter"](
+            image, width=1024, height=1024)
         # run inference pipeline
+        images = self.pipe(
+            prompt=prompt,
             negative_prompt=negative_prompt,
             image=control_image,
+            num_inference_steps=num_inference_steps,
             guidance_scale=guidance_scale,
             num_images_per_prompt=num_of_images,
             controlnet_conditioning_scale=controlnet_conditioning_scale,
             generator=self.generator
+        ).images[0]
+        return images
     # helper to decode input image
     def decode_base64_image(self, image_string):
         base64_image = base64.b64decode(image_string)

request.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-diffusers==0.19.3
 safetensors
 opencv-python
 controlnet_hinter==0.0.5

+diffusers==0.20.0
 safetensors
 opencv-python
 controlnet_hinter==0.0.5