Spaces:

Cognomen
/

CatCon-Controlnet-WD-1-5-b2

Runtime error

File size: 2,138 Bytes

import gradio as gr
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
from diffusers import UniPCMultistepScheduler
import torch
import torchvision.transforms as T
import torchvision.transforms.v2 as T2
import cv2
from PIL import Image

output_res = (768,768)

conditioning_image_transforms = T.Compose(
    [
        T2.ScaleJitter(target_size=output_res, scale_range=(0.5, 3.0)),
        T2.RandomCrop(size=output_res, pad_if_needed=True, padding_mode="symmetric"),
        T.ToTensor(),
        T.Normalize([0.5], [0.5]),
    ]
)

cnet = ControlNetModel.from_pretrained("./models/catcon-controlnet-wd", torch_dtype=torch.float16, from_flax=True)
pipe = StableDiffusionControlNetPipeline.from_pretrained(
        "./models/wd-1-5-b2",
        controlnet=cnet,
        torch_dtype=torch.float16,
        )

generator = torch.manual_seed(0)

# inference function takes prompt, negative prompt and image
def infer(prompt, negative_prompt, image):
    # implement your inference function here

    cond_input = conditioning_image_transforms(image)
    
    output = pipe(
        prompt,
        cond_input,
        generator=generator,
        num_images_per_prompt=1,
        num_inference_steps=20
            )

    return output[0]

    # you need to pass inputs and outputs according to inference function
    gr.Interface(fn = infer, inputs = ["text", "text", "image"], outputs = "image").launch()

title = "Categorical Conditioning Controlnet for One-Shot Image Stylization."
description = "This is a demo on ControlNet which generates images based on the style of the conditioning input." 
# you need to pass your examples according to your inputs
# each inner list is one example, each element in the list corresponding to a component in the `inputs`.
examples = [["1girl, green hair, sweater, looking at viewer, upper body, beanie, outdoors, watercolor, night, turtleneck", "low quality", "wikipe_cond_1.png"]]
gr.Interface(fn = infer, inputs = ["text", "text", "image"], outputs = "image",
                    title = title, description = description, examples = examples, theme='gradio/soft').launch()