Cognomen's picture
ToTensor instead of PILToTensor
4184307
raw
history blame
2.43 kB
import gradio as gr
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
from diffusers import UniPCMultistepScheduler
import torch
import torchvision
import torchvision.transforms as T
#from torchvision.transforms import v2 as T2
import cv2
import PIL
from PIL import Image
import numpy as np
output_res = (768,768)
conditioning_image_transforms = T.Compose(
[
#T2.ScaleJitter(target_size=output_res, scale_range=(0.5, 3.0))),
T.RandomCrop(size=output_res, pad_if_needed=True, padding_mode="symmetric"),
T.ToTensor(),
T.Normalize([0.5], [0.5]),
]
)
cnet = ControlNetModel.from_pretrained("./models/catcon-controlnet-wd", torch_dtype=torch.float16, from_flax=True)
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"./models/wd-1-5-b2",
controlnet=cnet,
torch_dtype=torch.float16,
)
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
#pipe.enable_model_cpu_offload()
#pipe.enable_xformers_memory_efficient_attention()
generator = torch.manual_seed(0)
# inference function takes prompt, negative prompt and image
def infer(prompt, negative_prompt, image):
# implement your inference function here
inp = Image.fromarray(image)
cond_input = conditioning_image_transforms(inp)
#cond_input = T.ToPILImage(cond_input)
output = pipe(
prompt,
cond_input[0],
generator=generator,
num_images_per_prompt=4,
num_inference_steps=20
)
return output.images
gr.Interface(
infer,
inputs=[
gr.Textbox(
label="Enter prompt",
max_lines=1,
placeholder="1girl, green hair, sweater, looking at viewer, upper body, beanie, outdoors, watercolor, night, turtleneck",
),
gr.Textbox(
label="Enter negative prompt",
max_lines=1,
placeholder="low quality",
),
gr.Image(),
],
outputs=gr.Gallery().style(grid=[2], height="auto"),
title="Generate controlled outputs with Categorical Conditioning on Waifu Diffusion 1.5 beta 2.",
description="This Space uses image examples as style conditioning.",
examples=[["1girl, green hair, sweater, looking at viewer, upper body, beanie, outdoors, watercolor, night, turtleneck", "low quality", "wikipe_cond_1.png"]],
allow_flagging=False,
).launch(enable_queue=True)