Spaces:
Runtime error
Runtime error
#!/usr/bin/env python | |
import cv2 | |
import numpy as np | |
import torch | |
import random | |
import base64 | |
import json | |
import threading | |
import uuid | |
import math | |
import io | |
from PIL import Image | |
from diffusers import AutoencoderKL, StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler,StableDiffusionControlNetImg2ImgPipeline,StableDiffusionXLControlNetPipeline,DiffusionPipeline | |
from diffusers.utils import load_image | |
from transformers import pipeline | |
import gradio as gr | |
vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16) | |
canny_controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16) | |
canny_pipe = StableDiffusionControlNetPipeline.from_pretrained( | |
"SG161222/Realistic_Vision_V3.0_VAE", controlnet=canny_controlnet, torch_dtype=torch.float16, use_safetensors=True | |
) | |
canny_controlnet_tile = ControlNetModel.from_pretrained("lllyasviel/control_v11f1e_sd15_tile", torch_dtype=torch.float16) | |
canny_pipe_img2img = StableDiffusionControlNetImg2ImgPipeline.from_pretrained( | |
"SG161222/Realistic_Vision_V3.0_VAE", controlnet=canny_controlnet_tile, torch_dtype=torch.float16, use_safetensors=True | |
) | |
canny_pipe_img2img.enable_model_cpu_offload() | |
canny_pipe_img2img.enable_xformers_memory_efficient_attention() | |
canny_pipe.scheduler = UniPCMultistepScheduler.from_config(canny_pipe.scheduler.config) | |
canny_pipe.enable_model_cpu_offload() | |
canny_pipe.enable_xformers_memory_efficient_attention() | |
controlnet_xl = ControlNetModel.from_pretrained( | |
"diffusers/controlnet-canny-sdxl-1.0", | |
torch_dtype=torch.float16 | |
) | |
vae_xl = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16) | |
pipe_xl = StableDiffusionXLControlNetPipeline.from_pretrained( | |
"stabilityai/stable-diffusion-xl-base-1.0", | |
controlnet=controlnet_xl, | |
vae=vae_xl, | |
torch_dtype=torch.float16, | |
use_safetensors=True, | |
variant="fp16", | |
) | |
pipe_xl.scheduler = UniPCMultistepScheduler.from_config(pipe_xl.scheduler.config) | |
pipe_xl.enable_xformers_memory_efficient_attention() | |
pipe_xl.enable_model_cpu_offload() | |
refiner = DiffusionPipeline.from_pretrained( | |
"stabilityai/stable-diffusion-xl-refiner-1.0", | |
text_encoder_2=pipe_xl.text_encoder_2, | |
vae=pipe_xl.vae, | |
torch_dtype=torch.float16, | |
use_safetensors=True, | |
variant="fp16", | |
) | |
refiner.enable_xformers_memory_efficient_attention() | |
refiner.enable_model_cpu_offload() | |
def resize_image_output(im, width, height): | |
im = np.array(im) | |
newSize = (width,height) | |
img = cv2.resize(im, newSize, interpolation=cv2.INTER_CUBIC) | |
img = Image.fromarray(img) | |
return img | |
def resize_image(im, max_size = 590000): | |
[x,y,z] = im.shape | |
new_size = [0,0] | |
min_size = 262144 | |
if x*y > max_size: | |
scale_ratio = math.sqrt((x*y)/max_size) | |
new_size[0] = int(x / scale_ratio) | |
new_size[1] = int(y / scale_ratio) | |
elif x*y <= min_size: | |
scale_ratio = math.sqrt((x*y)/min_size) | |
new_size[0] = int(x / scale_ratio) | |
new_size[1] = int(y / scale_ratio) | |
else: | |
new_size[0] = int(x) | |
new_size[1] = int(y) | |
height = (new_size[0] // 8) * 8 | |
width = (new_size[1] // 8) * 8 | |
newSize = (width,height) | |
img = cv2.resize(im, newSize, interpolation=cv2.INTER_CUBIC) | |
return img | |
def process_canny_tile(input_image,control_image, x ,y, prompt, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, guess_mode, strength_conditioning, scale, seed, eta, low_threshold, high_threshold): | |
image = input_image | |
return canny_pipe_img2img( | |
prompt = '', | |
image=image, | |
control_image = image, | |
num_inference_steps=20, | |
guidance_scale=4, | |
strength = 0.3, | |
guess_mode = True, | |
negative_prompt=n_prompt, | |
num_images_per_prompt=1, | |
eta=eta, | |
generator=torch.Generator(device="cpu").manual_seed(seed) | |
) | |
def process_canny(input_image,x ,y, prompt, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, guess_mode, strength, scale, seed, eta, low_threshold, high_threshold): | |
image = input_image | |
print(strength) | |
return canny_pipe( | |
prompt=','.join([prompt,a_prompt]), | |
image=image, | |
height=x, | |
width=y, | |
num_inference_steps=ddim_steps, | |
guidance_scale=scale, | |
negative_prompt=n_prompt, | |
num_images_per_prompt=num_samples, | |
eta=eta, | |
controlnet_conditioning_scale=strength, | |
generator=torch.Generator(device="cpu").manual_seed(seed) | |
) | |
def process_canny_sdxl(input_image,x ,y, prompt, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, guess_mode, strength, scale, seed, eta, low_threshold, high_threshold): | |
image = input_image | |
image = pipe_xl( | |
prompt=','.join([prompt,a_prompt]), | |
image=image, | |
height=x, | |
width=y, | |
num_inference_steps=ddim_steps, | |
guidance_scale=scale, | |
negative_prompt=n_prompt, | |
num_images_per_prompt=num_samples, | |
eta=eta, | |
controlnet_conditioning_scale=strength, | |
generator=torch.Generator(device="cpu").manual_seed(seed), | |
output_type="latent" | |
).images | |
return refiner( | |
prompt=prompt, | |
num_inference_steps=ddim_steps, | |
num_images_per_prompt=num_samples, | |
denoising_start=0.8, | |
image=image, | |
) | |
def process(image, prompt, a_prompt, n_prompt, ddim_steps, strength, scale, seed, eta, low_threshold, high_threshold): | |
image = load_image(image) | |
image = np.array(image) | |
[x_orig,y_orig,z_orig] = image.shape | |
image = resize_image(image) | |
[x,y,z] = image.shape | |
image = cv2.Canny(image, low_threshold, high_threshold) | |
image = image[:, :, None] | |
image = np.concatenate([image, image, image], axis=2) | |
image = Image.fromarray(image) | |
return process_canny(image,x,y, prompt, a_prompt, n_prompt, 1, None, ddim_steps, False, float(strength), scale, seed, eta, low_threshold, high_threshold) | |
demo = gr.Blocks().queue() | |
with demo: | |
with gr.Row(): | |
gr.Markdown("## Control Stable Diffusion with Canny Edge Maps") | |
with gr.Row(): | |
with gr.Column(): | |
input_image = gr.Image(type="pil", label="Input Image") | |
input_prompt = gr.Textbox() | |
run_button = gr.Button(label="Run") | |
with gr.Accordion("Advanced Options"): | |
strength = gr.Slider(label="Control Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01) | |
low_threshold = gr.Slider(label="Canny low threshold", minimum=1, maximum=255, value=100, step=1) | |
high_threshold = gr.Slider(label="Canny high threshold", minimum=1, maximum=255, value=200, step=1) | |
ddim_steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=20, step=1) | |
scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=7.5, step=0.1) # default value was 9.0 | |
seed = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, step=1, randomize=True) | |
eta = gr.Number(label="eta (DDIM)", value=0.0) | |
a_prompt = gr.Textbox(label="Added Prompt", value='best quality, extremely detailed') | |
n_prompt = gr.Textbox(label="Negative Prompt", | |
value='longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality') | |
with gr.Column(): | |
result = gr.outputs.Image(label='Output', type="pil") | |
ips = [input_image, input_prompt, a_prompt, n_prompt, ddim_steps, strength, scale, seed, eta, low_threshold, high_threshold] | |
run_button.click(fn=process, inputs=ips, outputs=[result]) | |
demo.launch() | |