|
import gradio as gr |
|
import numpy as np |
|
import torch |
|
|
|
from video_diffusion.stable_diffusion_video.stable_diffusion_pipeline import StableDiffusionWalkPipeline |
|
from video_diffusion.utils.model_list import stable_model_list |
|
|
|
|
|
class StableDiffusionText2VideoGenerator: |
|
def __init__(self): |
|
self.pipe = None |
|
|
|
def load_model( |
|
self, |
|
model_path, |
|
): |
|
if self.pipe is None: |
|
self.pipe = StableDiffusionWalkPipeline.from_pretrained( |
|
model_path, |
|
torch_dtype=torch.float16, |
|
revision="fp16", |
|
) |
|
|
|
self.pipe.to("cuda") |
|
self.pipe.enable_xformers_memory_efficient_attention() |
|
self.pipe.enable_attention_slicing() |
|
|
|
return self.pipe |
|
|
|
def generate_video( |
|
self, |
|
model_path: str, |
|
first_prompts: str, |
|
second_prompts: str, |
|
negative_prompt: str, |
|
num_interpolation_steps: int, |
|
guidance_scale: int, |
|
num_inference_step: int, |
|
height: int, |
|
width: int, |
|
upsample: bool, |
|
fps=int, |
|
): |
|
first_seed = np.random.randint(0, 100000) |
|
second_seed = np.random.randint(0, 100000) |
|
seeds = [first_seed, second_seed] |
|
prompts = [first_prompts, second_prompts] |
|
pipe = self.load_model(model_path=model_path) |
|
|
|
output_video = pipe.walk( |
|
prompts=prompts, |
|
num_interpolation_steps=int(num_interpolation_steps), |
|
height=height, |
|
width=width, |
|
guidance_scale=guidance_scale, |
|
num_inference_steps=num_inference_step, |
|
negative_prompt=negative_prompt, |
|
seeds=seeds, |
|
upsample=upsample, |
|
fps=fps, |
|
) |
|
|
|
return output_video |
|
|
|
def app(): |
|
with gr.Blocks(): |
|
with gr.Row(): |
|
with gr.Column(): |
|
stable_text2video_first_prompt = gr.Textbox( |
|
lines=1, |
|
placeholder="First Prompt", |
|
show_label=False, |
|
) |
|
stable_text2video_second_prompt = gr.Textbox( |
|
lines=1, |
|
placeholder="Second Prompt", |
|
show_label=False, |
|
) |
|
stable_text2video_negative_prompt = gr.Textbox( |
|
lines=1, |
|
placeholder="Negative Prompt ", |
|
show_label=False, |
|
) |
|
with gr.Row(): |
|
with gr.Column(): |
|
stable_text2video_model_path = gr.Dropdown( |
|
choices=stable_model_list, |
|
label="Stable Model List", |
|
value=stable_model_list[0], |
|
) |
|
stable_text2video_guidance_scale = gr.Slider( |
|
minimum=0, |
|
maximum=15, |
|
step=1, |
|
value=8.5, |
|
label="Guidance Scale", |
|
) |
|
stable_text2video_num_inference_steps = gr.Slider( |
|
minimum=1, |
|
maximum=100, |
|
step=1, |
|
value=30, |
|
label="Number of Inference Steps", |
|
) |
|
stable_text2video_fps = gr.Slider( |
|
minimum=1, |
|
maximum=60, |
|
step=1, |
|
value=10, |
|
label="Fps", |
|
) |
|
with gr.Row(): |
|
with gr.Column(): |
|
stable_text2video_num_interpolation_steps = gr.Number( |
|
value=10, |
|
label="Number of Interpolation Steps", |
|
) |
|
stable_text2video_height = gr.Slider( |
|
minimum=1, |
|
maximum=1000, |
|
step=1, |
|
value=512, |
|
label="Height", |
|
) |
|
stable_text2video_width = gr.Slider( |
|
minimum=1, |
|
maximum=1000, |
|
step=1, |
|
value=512, |
|
label="Width", |
|
) |
|
stable_text2video_upsample = gr.Checkbox( |
|
label="Upsample", |
|
default=False, |
|
) |
|
|
|
text2video_generate = gr.Button(value="Generator") |
|
|
|
with gr.Column(): |
|
text2video_output = gr.Video(label="Output") |
|
|
|
text2video_generate.click( |
|
fn=StableDiffusionText2VideoGenerator().generate_video, |
|
inputs=[ |
|
stable_text2video_model_path, |
|
stable_text2video_first_prompt, |
|
stable_text2video_second_prompt, |
|
stable_text2video_negative_prompt, |
|
stable_text2video_num_interpolation_steps, |
|
stable_text2video_guidance_scale, |
|
stable_text2video_num_inference_steps, |
|
stable_text2video_height, |
|
stable_text2video_width, |
|
stable_text2video_upsample, |
|
stable_text2video_fps, |
|
], |
|
outputs=text2video_output, |
|
) |
|
|