Cognomen's picture
fix pipeline
a3ba152
raw
history blame
2.14 kB
import gradio as gr
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
from diffusers import UniPCMultistepScheduler
import torch
import torchvision.transforms as T
import torchvision.transforms.v2 as T2
import cv2
from PIL import Image
output_res = (768,768)
conditioning_image_transforms = T.Compose(
[
T2.ScaleJitter(target_size=output_res, scale_range=(0.5, 3.0)),
T2.RandomCrop(size=output_res, pad_if_needed=True, padding_mode="symmetric"),
T.ToTensor(),
T.Normalize([0.5], [0.5]),
]
)
cnet = ControlNetModel.from_pretrained("./models/catcon-controlnet-wd", torch_dtype=torch.float16, from_flax=True)
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"./models/wd-1-5-b2",
controlnet=cnet,
torch_dtype=torch.float16,
)
generator = torch.manual_seed(0)
# inference function takes prompt, negative prompt and image
def infer(prompt, negative_prompt, image):
# implement your inference function here
cond_input = conditioning_image_transforms(image)
output = pipe(
prompt,
cond_input,
generator=generator,
num_images_per_prompt=1,
num_inference_steps=20
)
return output[0]
# you need to pass inputs and outputs according to inference function
gr.Interface(fn = infer, inputs = ["text", "text", "image"], outputs = "image").launch()
title = "Categorical Conditioning Controlnet for One-Shot Image Stylization."
description = "This is a demo on ControlNet which generates images based on the style of the conditioning input."
# you need to pass your examples according to your inputs
# each inner list is one example, each element in the list corresponding to a component in the `inputs`.
examples = [["1girl, green hair, sweater, looking at viewer, upper body, beanie, outdoors, watercolor, night, turtleneck", "low quality", "wikipe_cond_1.png"]]
gr.Interface(fn = infer, inputs = ["text", "text", "image"], outputs = "image",
title = title, description = description, examples = examples, theme='gradio/soft').launch()