Spaces:

rbanfield
/

ControlNetV1.1

Runtime error

App Files Files Community

ControlNetV1.1 / app.py

rbanfield

Upload folder using huggingface_hub

cc579f2 over 1 year ago

raw

history blame

7.8 kB

	#!/usr/bin/env python

	import cv2
	import numpy as np
	import torch
	import random
	import base64
	import json
	import threading
	import uuid
	import math

	import io
	from PIL import Image

	from diffusers import AutoencoderKL, StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler,StableDiffusionControlNetImg2ImgPipeline,StableDiffusionXLControlNetPipeline,DiffusionPipeline
	from diffusers.utils import load_image
	from transformers import pipeline

	import gradio as gr

	vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16)


	canny_controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16)
	canny_pipe = StableDiffusionControlNetPipeline.from_pretrained(
	"SG161222/Realistic_Vision_V3.0_VAE", controlnet=canny_controlnet, torch_dtype=torch.float16, use_safetensors=True
	)

	canny_controlnet_tile = ControlNetModel.from_pretrained("lllyasviel/control_v11f1e_sd15_tile", torch_dtype=torch.float16)
	canny_pipe_img2img = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
	"SG161222/Realistic_Vision_V3.0_VAE", controlnet=canny_controlnet_tile, torch_dtype=torch.float16, use_safetensors=True
	)
	canny_pipe_img2img.enable_model_cpu_offload()
	canny_pipe_img2img.enable_xformers_memory_efficient_attention()


	canny_pipe.scheduler = UniPCMultistepScheduler.from_config(canny_pipe.scheduler.config)
	canny_pipe.enable_model_cpu_offload()
	canny_pipe.enable_xformers_memory_efficient_attention()

	controlnet_xl = ControlNetModel.from_pretrained(
	"diffusers/controlnet-canny-sdxl-1.0",
	torch_dtype=torch.float16
	)
	vae_xl = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
	pipe_xl = StableDiffusionXLControlNetPipeline.from_pretrained(
	"stabilityai/stable-diffusion-xl-base-1.0",
	controlnet=controlnet_xl,
	vae=vae_xl,
	torch_dtype=torch.float16,
	use_safetensors=True,
	variant="fp16",
	)
	pipe_xl.scheduler = UniPCMultistepScheduler.from_config(pipe_xl.scheduler.config)
	pipe_xl.enable_xformers_memory_efficient_attention()
	pipe_xl.enable_model_cpu_offload()

	refiner = DiffusionPipeline.from_pretrained(
	"stabilityai/stable-diffusion-xl-refiner-1.0",
	text_encoder_2=pipe_xl.text_encoder_2,
	vae=pipe_xl.vae,
	torch_dtype=torch.float16,
	use_safetensors=True,
	variant="fp16",
	)
	refiner.enable_xformers_memory_efficient_attention()
	refiner.enable_model_cpu_offload()

	def resize_image_output(im, width, height):
	im = np.array(im)
	newSize = (width,height)
	img = cv2.resize(im, newSize, interpolation=cv2.INTER_CUBIC)
	img = Image.fromarray(img)
	return img

	def resize_image(im, max_size = 590000):
	[x,y,z] = im.shape
	new_size = [0,0]


	min_size = 262144
	if x*y > max_size:
	scale_ratio = math.sqrt((x*y)/max_size)
	new_size[0] = int(x / scale_ratio)
	new_size[1] = int(y / scale_ratio)
	elif x*y <= min_size:
	scale_ratio = math.sqrt((x*y)/min_size)
	new_size[0] = int(x / scale_ratio)
	new_size[1] = int(y / scale_ratio)
	else:
	new_size[0] = int(x)
	new_size[1] = int(y)

	height = (new_size[0] // 8) * 8
	width = (new_size[1] // 8) * 8

	newSize = (width,height)
	img = cv2.resize(im, newSize, interpolation=cv2.INTER_CUBIC)
	return img

	def process_canny_tile(input_image,control_image, x ,y, prompt, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, guess_mode, strength_conditioning, scale, seed, eta, low_threshold, high_threshold):

	image = input_image

	return canny_pipe_img2img(
	prompt = '',
	image=image,
	control_image = image,
	num_inference_steps=20,
	guidance_scale=4,
	strength = 0.3,
	guess_mode = True,
	negative_prompt=n_prompt,
	num_images_per_prompt=1,
	eta=eta,
	generator=torch.Generator(device="cpu").manual_seed(seed)
	)

	def process_canny(input_image,x ,y, prompt, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, guess_mode, strength, scale, seed, eta, low_threshold, high_threshold):

	image = input_image

	print(strength)


	return canny_pipe(
	prompt=','.join([prompt,a_prompt]),
	image=image,
	height=x,
	width=y,
	num_inference_steps=ddim_steps,
	guidance_scale=scale,
	negative_prompt=n_prompt,
	num_images_per_prompt=num_samples,
	eta=eta,
	controlnet_conditioning_scale=strength,
	generator=torch.Generator(device="cpu").manual_seed(seed)
	)

	def process_canny_sdxl(input_image,x ,y, prompt, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, guess_mode, strength, scale, seed, eta, low_threshold, high_threshold):

	image = input_image

	image = pipe_xl(
	prompt=','.join([prompt,a_prompt]),
	image=image,
	height=x,
	width=y,
	num_inference_steps=ddim_steps,
	guidance_scale=scale,
	negative_prompt=n_prompt,
	num_images_per_prompt=num_samples,
	eta=eta,
	controlnet_conditioning_scale=strength,
	generator=torch.Generator(device="cpu").manual_seed(seed),
	output_type="latent"
	).images

	return refiner(
	prompt=prompt,
	num_inference_steps=ddim_steps,
	num_images_per_prompt=num_samples,
	denoising_start=0.8,
	image=image,
	)


	def process(image, prompt, a_prompt, n_prompt, ddim_steps, strength, scale, seed, eta, low_threshold, high_threshold):
	image = load_image(image)
	image = np.array(image)
	[x_orig,y_orig,z_orig] = image.shape
	image = resize_image(image)
	[x,y,z] = image.shape

	image = cv2.Canny(image, low_threshold, high_threshold)
	image = image[:, :, None]
	image = np.concatenate([image, image, image], axis=2)
	image = Image.fromarray(image)

	return process_canny(image,x,y, prompt, a_prompt, n_prompt, 1, None, ddim_steps, False, float(strength), scale, seed, eta, low_threshold, high_threshold)


	demo = gr.Blocks().queue()

	with demo:
	with gr.Row():
	gr.Markdown("## Control Stable Diffusion with Canny Edge Maps")

	with gr.Row():
	with gr.Column():
	input_image = gr.Image(type="pil", label="Input Image")
	input_prompt = gr.Textbox()
	run_button = gr.Button(label="Run")

	with gr.Accordion("Advanced Options"):
	strength = gr.Slider(label="Control Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01)
	low_threshold = gr.Slider(label="Canny low threshold", minimum=1, maximum=255, value=100, step=1)
	high_threshold = gr.Slider(label="Canny high threshold", minimum=1, maximum=255, value=200, step=1)
	ddim_steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=20, step=1)
	scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=7.5, step=0.1) # default value was 9.0
	seed = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, step=1, randomize=True)
	eta = gr.Number(label="eta (DDIM)", value=0.0)
	a_prompt = gr.Textbox(label="Added Prompt", value='best quality, extremely detailed')
	n_prompt = gr.Textbox(label="Negative Prompt",
	value='longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality')

	with gr.Column():
	result = gr.outputs.Image(label='Output', type="pil")

	ips = [input_image, input_prompt, a_prompt, n_prompt, ddim_steps, strength, scale, seed, eta, low_threshold, high_threshold]
	run_button.click(fn=process, inputs=ips, outputs=[result])


	demo.launch()