Spaces:
Runtime error
Runtime error
File size: 2,138 Bytes
29958a2 c528020 a3ba152 29958a2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import gradio as gr
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
from diffusers import UniPCMultistepScheduler
import torch
import torchvision.transforms as T
import torchvision.transforms.v2 as T2
import cv2
from PIL import Image
output_res = (768,768)
conditioning_image_transforms = T.Compose(
[
T2.ScaleJitter(target_size=output_res, scale_range=(0.5, 3.0)),
T2.RandomCrop(size=output_res, pad_if_needed=True, padding_mode="symmetric"),
T.ToTensor(),
T.Normalize([0.5], [0.5]),
]
)
cnet = ControlNetModel.from_pretrained("./models/catcon-controlnet-wd", torch_dtype=torch.float16, from_flax=True)
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"./models/wd-1-5-b2",
controlnet=cnet,
torch_dtype=torch.float16,
)
generator = torch.manual_seed(0)
# inference function takes prompt, negative prompt and image
def infer(prompt, negative_prompt, image):
# implement your inference function here
cond_input = conditioning_image_transforms(image)
output = pipe(
prompt,
cond_input,
generator=generator,
num_images_per_prompt=1,
num_inference_steps=20
)
return output[0]
# you need to pass inputs and outputs according to inference function
gr.Interface(fn = infer, inputs = ["text", "text", "image"], outputs = "image").launch()
title = "Categorical Conditioning Controlnet for One-Shot Image Stylization."
description = "This is a demo on ControlNet which generates images based on the style of the conditioning input."
# you need to pass your examples according to your inputs
# each inner list is one example, each element in the list corresponding to a component in the `inputs`.
examples = [["1girl, green hair, sweater, looking at viewer, upper body, beanie, outdoors, watercolor, night, turtleneck", "low quality", "wikipe_cond_1.png"]]
gr.Interface(fn = infer, inputs = ["text", "text", "image"], outputs = "image",
title = title, description = description, examples = examples, theme='gradio/soft').launch()
|