Spaces:

Cognomen
/

CatCon-Controlnet-WD-1-5-b2

Runtime error

App Files Files Community

CatCon-Controlnet-WD-1-5-b2 / app.py

Cognomen

fix pipeline

a3ba152 almost 2 years ago

raw

history blame

2.14 kB

	import gradio as gr
	from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
	from diffusers import UniPCMultistepScheduler
	import torch
	import torchvision.transforms as T
	import torchvision.transforms.v2 as T2
	import cv2
	from PIL import Image

	output_res = (768,768)

	conditioning_image_transforms = T.Compose(
	[
	T2.ScaleJitter(target_size=output_res, scale_range=(0.5, 3.0)),
	T2.RandomCrop(size=output_res, pad_if_needed=True, padding_mode="symmetric"),
	T.ToTensor(),
	T.Normalize([0.5], [0.5]),
	]
	)

	cnet = ControlNetModel.from_pretrained("./models/catcon-controlnet-wd", torch_dtype=torch.float16, from_flax=True)
	pipe = StableDiffusionControlNetPipeline.from_pretrained(
	"./models/wd-1-5-b2",
	controlnet=cnet,
	torch_dtype=torch.float16,
	)

	generator = torch.manual_seed(0)

	# inference function takes prompt, negative prompt and image
	def infer(prompt, negative_prompt, image):
	# implement your inference function here

	cond_input = conditioning_image_transforms(image)

	output = pipe(
	prompt,
	cond_input,
	generator=generator,
	num_images_per_prompt=1,
	num_inference_steps=20
	)

	return output[0]

	# you need to pass inputs and outputs according to inference function
	gr.Interface(fn = infer, inputs = ["text", "text", "image"], outputs = "image").launch()

	title = "Categorical Conditioning Controlnet for One-Shot Image Stylization."
	description = "This is a demo on ControlNet which generates images based on the style of the conditioning input."
	# you need to pass your examples according to your inputs
	# each inner list is one example, each element in the list corresponding to a component in the `inputs`.
	examples = [["1girl, green hair, sweater, looking at viewer, upper body, beanie, outdoors, watercolor, night, turtleneck", "low quality", "wikipe_cond_1.png"]]
	gr.Interface(fn = infer, inputs = ["text", "text", "image"], outputs = "image",
	title = title, description = description, examples = examples, theme='gradio/soft').launch()