Video-Diffusion-WebUI

Running

App Files Files Community

Video-Diffusion-WebUI / video_diffusion /inpaint_zoom /zoom_in_app.py

piton13332

Update video_diffusion/inpaint_zoom/zoom_in_app.py

d4d0128 verified 23 days ago

raw

history blame contribute delete

7.2 kB

	import os

	import gradio as gr
	import numpy as np
	import torch
	from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
	from PIL import Image

	from video_diffusion.inpaint_zoom.utils.zoom_in_utils import dummy, image_grid, shrink_and_paste_on_blank, write_video

	os.environ["CUDA_VISIBLE_DEVICES"] = "0"


	stable_paint_model_list = [
	"stabilityai/stable-diffusion-2-inpainting",
	"runwayml/stable-diffusion-inpainting",
	"SG161222/Realistic_Vision_V5.1_noVAE",
	"SimianLuo/LCM_Dreamshaper_v7"
	]

	stable_paint_prompt_list = [
	"children running in the forest , sunny, bright, by studio ghibli painting, superior quality, masterpiece, traditional Japanese colors, by Grzegorz Rutkowski, concept art",
	"A beautiful landscape of a mountain range with a lake in the foreground",
	]

	stable_paint_negative_prompt_list = [
	"lurry, bad art, blurred, text, watermark",
	]


	class StableDiffusionZoomIn:
	def __init__(self):
	self.pipe = None

	def load_model(self, model_id):
	if self.pipe is None:
	self.pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float32)
	self.pipe.scheduler = DPMSolverMultistepScheduler.from_config(self.pipe.scheduler.config)
	self.pipe = self.pipe.to("cpu")
	self.pipe.safety_checker = dummy
	self.pipe.enable_attention_slicing()
	#self.pipe.enable_xformers_memory_efficient_attention()
	#self.g_cuda = torch.Generator(device="cuda")

	return self.pipe

	def generate_video(
	self,
	model_id,
	prompt,
	negative_prompt,
	guidance_scale,
	num_inference_steps,
	):
	pipe = self.load_model(model_id)

	num_init_images = 2
	seed = 42
	height = 512
	width = height

	current_image = Image.new(mode="RGBA", size=(height, width))
	mask_image = np.array(current_image)[:, :, 3]
	mask_image = Image.fromarray(255 - mask_image).convert("RGB")
	current_image = current_image.convert("RGB")

	init_images = pipe(
	prompt=[prompt] * num_init_images,
	negative_prompt=[negative_prompt] * num_init_images,
	image=current_image,
	guidance_scale=guidance_scale,
	height=height,
	width=width,
	generator=self.g_cuda.manual_seed(seed),
	mask_image=mask_image,
	num_inference_steps=num_inference_steps,
	)[0]

	image_grid(init_images, rows=1, cols=num_init_images)

	init_image_selected = 1 # @param
	if num_init_images == 1:
	init_image_selected = 0
	else:
	init_image_selected = init_image_selected - 1

	num_outpainting_steps = 20 # @param
	mask_width = 128 # @param
	num_interpol_frames = 30 # @param

	current_image = init_images[init_image_selected]
	all_frames = []
	all_frames.append(current_image)

	for i in range(num_outpainting_steps):
	print("Generating image: " + str(i + 1) + " / " + str(num_outpainting_steps))

	prev_image_fix = current_image

	prev_image = shrink_and_paste_on_blank(current_image, mask_width)

	current_image = prev_image

	# create mask (black image with white mask_width width edges)
	mask_image = np.array(current_image)[:, :, 3]
	mask_image = Image.fromarray(255 - mask_image).convert("RGB")

	# inpainting step
	current_image = current_image.convert("RGB")
	images = pipe(
	prompt=prompt,
	negative_prompt=negative_prompt,
	image=current_image,
	guidance_scale=guidance_scale,
	height=height,
	width=width,
	# this can make the whole thing deterministic but the output less exciting
	# generator = g_cuda.manual_seed(seed),
	mask_image=mask_image,
	num_inference_steps=num_inference_steps,
	)[0]
	current_image = images[0]
	current_image.paste(prev_image, mask=prev_image)

	# interpolation steps bewteen 2 inpainted images (=sequential zoom and crop)
	for j in range(num_interpol_frames - 1):
	interpol_image = current_image
	interpol_width = round(
	(1 - (1 - 2 * mask_width / height) ** (1 - (j + 1) / num_interpol_frames)) * height / 2
	)
	interpol_image = interpol_image.crop(
	(interpol_width, interpol_width, width - interpol_width, height - interpol_width)
	)

	interpol_image = interpol_image.resize((height, width))

	# paste the higher resolution previous image in the middle to avoid drop in quality caused by zooming
	interpol_width2 = round((1 - (height - 2 * mask_width) / (height - 2 * interpol_width)) / 2 * height)
	prev_image_fix_crop = shrink_and_paste_on_blank(prev_image_fix, interpol_width2)
	interpol_image.paste(prev_image_fix_crop, mask=prev_image_fix_crop)

	all_frames.append(interpol_image)

	all_frames.append(current_image)

	video_file_name = "infinite_zoom_out"
	fps = 30
	save_path = video_file_name + ".mp4"
	write_video(save_path, all_frames, fps)
	return save_path

	def app():
	with gr.Blocks():
	with gr.Row():
	with gr.Column():
	text2image_in_model_path = gr.Dropdown(
	choices=stable_paint_model_list, value=stable_paint_model_list[0], label="Text-Image Model Id"
	)

	text2image_in_prompt = gr.Textbox(lines=2, value=stable_paint_prompt_list[0], label="Prompt")

	text2image_in_negative_prompt = gr.Textbox(
	lines=1, value=stable_paint_negative_prompt_list[0], label="Negative Prompt"
	)

	with gr.Row():
	with gr.Column():
	text2image_in_guidance_scale = gr.Slider(
	minimum=0.1, maximum=15, step=0.1, value=7.5, label="Guidance Scale"
	)

	text2image_in_num_inference_step = gr.Slider(
	minimum=1, maximum=100, step=1, value=50, label="Num Inference Step"
	)

	text2image_in_predict = gr.Button(value="Generator")

	with gr.Column():
	output_image = gr.Video(label="Output")

	text2image_in_predict.click(
	fn=StableDiffusionZoomIn().generate_video,
	inputs=[
	text2image_in_model_path,
	text2image_in_prompt,
	text2image_in_negative_prompt,
	text2image_in_guidance_scale,
	text2image_in_num_inference_step,
	],
	outputs=output_image,
	)