K00B404's picture
Create app_2.py
a623d98 verified
import torch
from diffusers import (
AnimateDiffControlNetPipeline, AutoencoderKL,
ControlNetModel, MotionAdapter, LCMScheduler
)
from diffusers.utils import export_to_gif, load_video
from controlnet_aux import MidasDetector # Faster than ZoeDetector
# Load depth-based ControlNet (in diffusers format)
controlnet = ControlNetModel.from_pretrained(
"lllyasviel/sd-controlnet-depth", torch_dtype=torch.float16
)
# Load AnimateDiff Motion Adapter (AnimateLCM)
motion_adapter = MotionAdapter.from_pretrained("wangfuyun/AnimateLCM")
# Load VAE for SD 1.5
vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16)
# Load AnimateDiff pipeline with ControlNet
pipe = AnimateDiffControlNetPipeline.from_pretrained(
"SG161222/Realistic_Vision_V5.1_noVAE",
motion_adapter=motion_adapter,
controlnet=controlnet,
vae=vae,
).to(device="cuda", dtype=torch.float16)
# Use LCM Scheduler (optimized for AnimateLCM)
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
# Load AnimateLCM LoRA
pipe.load_lora_weights(
"wangfuyun/AnimateLCM",
weight_name="AnimateLCM_sd15_t2v_lora.safetensors",
adapter_name="lcm-lora"
)
pipe.set_adapters(["lcm-lora"], adapter_scales=[0.8])
# Use MiDaS for depth extraction (faster)
depth_detector = MidasDetector.from_pretrained("lllyasviel/Annotators").to("cuda")
# Load input video for depth-based conditioning
video = load_video("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-vid2vid-input-1.gif")
conditioning_frames = []
# Process video frames into depth maps
for frame in video:
conditioning_frames.append(depth_detector(frame))
# Define prompts
prompt = "a panda, playing a guitar, sitting in a pink boat, in the ocean, mountains in background, realistic, high quality"
negative_prompt = "blurry, deformed, distorted, bad quality"
# Generate animated output
output = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
num_frames=len(video),
num_inference_steps=10,
guidance_scale=2.0,
conditioning_frames=conditioning_frames,
generator=torch.manual_seed(42),
).frames[0]
# Save animation as GIF
export_to_gif(output, "animatediff_controlnet.gif", fps=8)