update
Browse files
app.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
|
2 |
+
from utils import write_video, dummy
|
3 |
+
from PIL import Image
|
4 |
+
import numpy as np
|
5 |
+
import os
|
6 |
+
os.environ["CUDA_VISIBLE_DEVICES"]="0"
|
7 |
+
import torch
|
8 |
+
import gradio as gr
|
9 |
+
|
10 |
+
|
11 |
+
def stable_diffusion_zoom_out(
|
12 |
+
repo_id="stabilityai/stable-diffusion-2-inpainting",
|
13 |
+
original_prompt="a dog",
|
14 |
+
negative_prompt="a cat",
|
15 |
+
steps=32,
|
16 |
+
num_frames=10,
|
17 |
+
):
|
18 |
+
pipe = DiffusionPipeline.from_pretrained(repo_id, torch_dtype=torch.float16, revision="fp16")
|
19 |
+
pipe.set_use_memory_efficient_attention_xformers(True)
|
20 |
+
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
|
21 |
+
pipe = pipe.to("cuda")
|
22 |
+
pipe.safety_checker = dummy
|
23 |
+
|
24 |
+
current_image = Image.new(mode="RGBA", size=(512, 512))
|
25 |
+
mask_image = np.array(current_image)[:,:,3] # assume image has alpha mask (use .mode to check for "RGBA")
|
26 |
+
mask_image = Image.fromarray(255-mask_image).convert("RGB")
|
27 |
+
current_image = current_image.convert("RGB")
|
28 |
+
|
29 |
+
num_images = 1
|
30 |
+
prompt = [original_prompt] * num_images
|
31 |
+
negative_prompt = [negative_prompt] * num_images
|
32 |
+
|
33 |
+
images = pipe(prompt=prompt, negative_prompt=negative_prompt, image=current_image, mask_image=mask_image, num_inference_steps=25)[0]
|
34 |
+
current_image = images[0]
|
35 |
+
|
36 |
+
all_frames = []
|
37 |
+
all_frames.append(current_image)
|
38 |
+
|
39 |
+
for i in range(num_frames):
|
40 |
+
next_image = np.array(current_image.convert("RGBA"))*0
|
41 |
+
prev_image = current_image.resize((512-2*steps,512-2*steps))
|
42 |
+
prev_image = prev_image.convert("RGBA")
|
43 |
+
prev_image = np.array(prev_image)
|
44 |
+
next_image[:, :, 3] = 1
|
45 |
+
next_image[steps:512-steps,steps:512-steps,:] = prev_image
|
46 |
+
prev_image = Image.fromarray(next_image)
|
47 |
+
current_image = prev_image
|
48 |
+
mask_image = np.array(current_image)[:,:,3] # assume image has alpha mask (use .mode to check for "RGBA")
|
49 |
+
mask_image = Image.fromarray(255-mask_image).convert("RGB")
|
50 |
+
current_image = current_image.convert("RGB")
|
51 |
+
images = pipe(prompt=prompt, negative_prompt=negative_prompt, image=current_image, mask_image=mask_image, num_inference_steps=25)[0]
|
52 |
+
current_image = images[0]
|
53 |
+
current_image.paste(prev_image, mask=prev_image)
|
54 |
+
all_frames.append(current_image)
|
55 |
+
|
56 |
+
save_path = "infinite_zoom_out.mp4"
|
57 |
+
write_video(save_path, all_frames, fps=16)
|
58 |
+
return save_path
|
59 |
+
|
60 |
+
inputs = [
|
61 |
+
gr.Dropdown(["stabilityai/stable-diffusion-2-inpainting"], label="Model"),
|
62 |
+
gr.inputs.Textbox(lines=1, default="a dog", label="Prompt"),
|
63 |
+
gr.inputs.Textbox(lines=1, default="a cat", label="Negative Prompt"),
|
64 |
+
gr.inputs.Slider(minimum=1, maximum=64, default=32, label="Steps"),
|
65 |
+
gr.inputs.Slider(minimum=1, maximum=100, default=10, label="Frames"),
|
66 |
+
]
|
67 |
+
output = gr.outputs.Video()
|
68 |
+
title = "Stable Diffusion Infinite Zoom Out"
|
69 |
+
|
70 |
+
demo_app = gr.Interface(
|
71 |
+
fn=stable_diffusion_zoom_out,
|
72 |
+
inputs=inputs,
|
73 |
+
outputs=output,
|
74 |
+
title=title,
|
75 |
+
theme='huggingface',
|
76 |
+
)
|
77 |
+
demo_app.launch(debug=True, enable_queue=True)
|
utils.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import cv2
|
3 |
+
|
4 |
+
def write_video(file_path, frames, fps):
|
5 |
+
"""
|
6 |
+
Writes frames to an mp4 video file
|
7 |
+
:param file_path: Path to output video, must end with .mp4
|
8 |
+
:param frames: List of PIL.Image objects
|
9 |
+
:param fps: Desired frame rate
|
10 |
+
"""
|
11 |
+
|
12 |
+
w, h = frames[0].size
|
13 |
+
fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
|
14 |
+
writer = cv2.VideoWriter(file_path, fourcc, fps, (w, h))
|
15 |
+
|
16 |
+
for frame in frames:
|
17 |
+
np_frame = np.array(frame.convert('RGB'))
|
18 |
+
cv_frame = cv2.cvtColor(np_frame, cv2.COLOR_RGB2BGR)
|
19 |
+
writer.write(cv_frame)
|
20 |
+
|
21 |
+
writer.release()
|
22 |
+
|
23 |
+
|
24 |
+
def dummy(images, **kwargs):
|
25 |
+
return images, False
|