Spaces:
Runtime error
Runtime error
Upload 6 files
Browse files- .gitattributes +38 -35
- README.md +12 -14
- app.py +165 -64
- cat.png +3 -0
- peng.png +3 -0
- requirements.txt +10 -1
.gitattributes
CHANGED
@@ -1,35 +1,38 @@
|
|
1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
forg.jpg filter=lfs diff=lfs merge=lfs -text
|
37 |
+
peng.png filter=lfs diff=lfs merge=lfs -text
|
38 |
+
cat.png filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
@@ -1,14 +1,12 @@
|
|
1 |
-
---
|
2 |
-
title: Wan2
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
-
sdk: gradio
|
7 |
-
sdk_version: 5.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
|
|
|
1 |
+
---
|
2 |
+
title: Wan2.2 Fast
|
3 |
+
emoji: π₯π¨
|
4 |
+
colorFrom: purple
|
5 |
+
colorTo: pink
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 5.30.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
---
|
11 |
+
|
12 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
app.py
CHANGED
@@ -1,64 +1,165 @@
|
|
1 |
-
import
|
2 |
-
from
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from diffusers import AutoencoderKLWan, WanPipeline, WanImageToVideoPipeline, UniPCMultistepScheduler
|
3 |
+
from diffusers.utils import export_to_video
|
4 |
+
import gradio as gr
|
5 |
+
import tempfile
|
6 |
+
import spaces
|
7 |
+
import numpy as np
|
8 |
+
from PIL import Image
|
9 |
+
import random
|
10 |
+
|
11 |
+
MODEL_ID = "FastVideo/FastWan2.2-TI2V-5B-FullAttn-Diffusers"
|
12 |
+
vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
|
13 |
+
|
14 |
+
# Initialize pipelines
|
15 |
+
text_to_video_pipe = WanPipeline.from_pretrained(MODEL_ID, vae=vae, torch_dtype=torch.bfloat16)
|
16 |
+
image_to_video_pipe = WanImageToVideoPipeline.from_pretrained(MODEL_ID, vae=vae, torch_dtype=torch.bfloat16)
|
17 |
+
|
18 |
+
for pipe in [text_to_video_pipe, image_to_video_pipe]:
|
19 |
+
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=8.0)
|
20 |
+
pipe.to("cuda")
|
21 |
+
|
22 |
+
# Constants
|
23 |
+
MOD_VALUE = 32
|
24 |
+
DEFAULT_H_SLIDER_VALUE = 896
|
25 |
+
DEFAULT_W_SLIDER_VALUE = 896
|
26 |
+
NEW_FORMULA_MAX_AREA = 720 * 1024
|
27 |
+
SLIDER_MIN_H, SLIDER_MAX_H = 256, 1024
|
28 |
+
SLIDER_MIN_W, SLIDER_MAX_W = 256, 1024
|
29 |
+
MAX_SEED = np.iinfo(np.int32).max
|
30 |
+
FIXED_FPS = 24
|
31 |
+
MIN_FRAMES_MODEL = 25
|
32 |
+
MAX_FRAMES_MODEL = 193
|
33 |
+
|
34 |
+
default_prompt_i2v = "make this image come alive, cinematic motion, smooth animation"
|
35 |
+
default_negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards, watermark, text, signature"
|
36 |
+
|
37 |
+
def _calculate_new_dimensions_wan(pil_image, mod_val, calculation_max_area, min_slider_h, max_slider_h, min_slider_w, max_slider_w, default_h, default_w):
|
38 |
+
orig_w, orig_h = pil_image.size
|
39 |
+
if orig_w <= 0 or orig_h <= 0:
|
40 |
+
return default_h, default_w
|
41 |
+
aspect_ratio = orig_h / orig_w
|
42 |
+
|
43 |
+
calc_h = round(np.sqrt(calculation_max_area * aspect_ratio))
|
44 |
+
calc_w = round(np.sqrt(calculation_max_area / aspect_ratio))
|
45 |
+
calc_h = max(mod_val, (calc_h // mod_val) * mod_val)
|
46 |
+
calc_w = max(mod_val, (calc_w // mod_val) * mod_val)
|
47 |
+
|
48 |
+
new_h = int(np.clip(calc_h, min_slider_h, (max_slider_h // mod_val) * mod_val))
|
49 |
+
new_w = int(np.clip(calc_w, min_slider_w, (max_slider_w // mod_val) * mod_val))
|
50 |
+
|
51 |
+
return new_h, new_w
|
52 |
+
|
53 |
+
def handle_image_upload_for_dims_wan(uploaded_pil_image, current_h_val, current_w_val):
|
54 |
+
if uploaded_pil_image is None:
|
55 |
+
return gr.update(value=DEFAULT_H_SLIDER_VALUE), gr.update(value=DEFAULT_W_SLIDER_VALUE)
|
56 |
+
try:
|
57 |
+
new_h, new_w = _calculate_new_dimensions_wan(
|
58 |
+
uploaded_pil_image, MOD_VALUE, NEW_FORMULA_MAX_AREA,
|
59 |
+
SLIDER_MIN_H, SLIDER_MAX_H, SLIDER_MIN_W, SLIDER_MAX_W,
|
60 |
+
DEFAULT_H_SLIDER_VALUE, DEFAULT_W_SLIDER_VALUE
|
61 |
+
)
|
62 |
+
return gr.update(value=new_h), gr.update(value=new_w)
|
63 |
+
except Exception as e:
|
64 |
+
gr.Warning("Error attempting to calculate new dimensions")
|
65 |
+
return gr.update(value=DEFAULT_H_SLIDER_VALUE), gr.update(value=DEFAULT_W_SLIDER_VALUE)
|
66 |
+
|
67 |
+
def get_duration(input_image, prompt, height, width,
|
68 |
+
negative_prompt, duration_seconds,
|
69 |
+
guidance_scale, steps,
|
70 |
+
seed, randomize_seed,
|
71 |
+
progress):
|
72 |
+
if steps > 4 and duration_seconds > 4:
|
73 |
+
return 90
|
74 |
+
elif steps > 4 or duration_seconds > 4:
|
75 |
+
return 75
|
76 |
+
else:
|
77 |
+
return 60
|
78 |
+
|
79 |
+
@spaces.GPU(duration=get_duration)
|
80 |
+
def generate_video(input_image, prompt, height, width, negative_prompt=default_negative_prompt, duration_seconds=2, guidance_scale=0, steps=4, seed=44, randomize_seed=False, progress=gr.Progress(track_tqdm=True)):
|
81 |
+
target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
|
82 |
+
target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
|
83 |
+
|
84 |
+
num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
|
85 |
+
|
86 |
+
current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
|
87 |
+
|
88 |
+
if input_image is not None:
|
89 |
+
resized_image = input_image.resize((target_w, target_h))
|
90 |
+
with torch.inference_mode():
|
91 |
+
output_frames_list = image_to_video_pipe(
|
92 |
+
image=resized_image, prompt=prompt, negative_prompt=negative_prompt,
|
93 |
+
height=target_h, width=target_w, num_frames=num_frames,
|
94 |
+
guidance_scale=float(guidance_scale), num_inference_steps=int(steps),
|
95 |
+
generator=torch.Generator(device="cuda").manual_seed(current_seed)
|
96 |
+
).frames[0]
|
97 |
+
else:
|
98 |
+
with torch.inference_mode():
|
99 |
+
output_frames_list = text_to_video_pipe(
|
100 |
+
prompt=prompt, negative_prompt=negative_prompt,
|
101 |
+
height=target_h, width=target_w, num_frames=num_frames,
|
102 |
+
guidance_scale=float(guidance_scale), num_inference_steps=int(steps),
|
103 |
+
generator=torch.Generator(device="cuda").manual_seed(current_seed)
|
104 |
+
).frames[0]
|
105 |
+
|
106 |
+
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
|
107 |
+
video_path = tmpfile.name
|
108 |
+
export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
|
109 |
+
return video_path, current_seed
|
110 |
+
|
111 |
+
with gr.Blocks() as demo:
|
112 |
+
gr.Markdown("# Fast Wan 2.2 TI2V 5B Demo")
|
113 |
+
gr.Markdown("""This Demo is using [FastWan2.2-TI2V-5B](https://huggingface.co/FastVideo/FastWan2.2-TI2V-5B-FullAttn-Diffusers) which is fine-tuned with Sparse-distill method which allows wan to generate high quality videos in 3-5 steps.""")
|
114 |
+
|
115 |
+
with gr.Row():
|
116 |
+
with gr.Column():
|
117 |
+
input_image_component = gr.Image(type="pil", label="Input Image (optional, auto-resized to target H/W)")
|
118 |
+
prompt_input = gr.Textbox(label="Prompt", value=default_prompt_i2v)
|
119 |
+
duration_seconds_input = gr.Slider(minimum=round(MIN_FRAMES_MODEL/FIXED_FPS,1), maximum=round(MAX_FRAMES_MODEL/FIXED_FPS,1), step=0.1, value=2, label="Duration (seconds)", info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps.")
|
120 |
+
|
121 |
+
with gr.Accordion("Advanced Settings", open=False):
|
122 |
+
negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
|
123 |
+
seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
|
124 |
+
randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
|
125 |
+
with gr.Row():
|
126 |
+
height_input = gr.Slider(minimum=SLIDER_MIN_H, maximum=SLIDER_MAX_H, step=MOD_VALUE, value=DEFAULT_H_SLIDER_VALUE, label=f"Output Height (multiple of {MOD_VALUE})")
|
127 |
+
width_input = gr.Slider(minimum=SLIDER_MIN_W, maximum=SLIDER_MAX_W, step=MOD_VALUE, value=DEFAULT_W_SLIDER_VALUE, label=f"Output Width (multiple of {MOD_VALUE})")
|
128 |
+
steps_slider = gr.Slider(minimum=1, maximum=8, step=1, value=4, label="Inference Steps")
|
129 |
+
guidance_scale_input = gr.Slider(minimum=0.0, maximum=5.0, step=0.01, value=0.0, label="Guidance Scale")
|
130 |
+
generate_button = gr.Button("Generate Video", variant="primary")
|
131 |
+
with gr.Column():
|
132 |
+
video_output = gr.Video(label="Generated Video", autoplay=True, interactive=False)
|
133 |
+
|
134 |
+
input_image_component.upload(
|
135 |
+
fn=handle_image_upload_for_dims_wan,
|
136 |
+
inputs=[input_image_component, height_input, width_input],
|
137 |
+
outputs=[height_input, width_input]
|
138 |
+
)
|
139 |
+
|
140 |
+
input_image_component.clear(
|
141 |
+
fn=handle_image_upload_for_dims_wan,
|
142 |
+
inputs=[input_image_component, height_input, width_input],
|
143 |
+
outputs=[height_input, width_input]
|
144 |
+
)
|
145 |
+
|
146 |
+
ui_inputs = [
|
147 |
+
input_image_component, prompt_input, height_input, width_input,
|
148 |
+
negative_prompt_input, duration_seconds_input,
|
149 |
+
guidance_scale_input, steps_slider, seed_input, randomize_seed_checkbox
|
150 |
+
]
|
151 |
+
generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
|
152 |
+
|
153 |
+
gr.Examples(
|
154 |
+
examples=[
|
155 |
+
[None, "A person eating spaghetti", 1024, 720],
|
156 |
+
["cat.png", "The cat removes the glasses from its eyes.", 1088, 800],
|
157 |
+
[None, "a penguin playfully dancing in the snow, Antarctica", 1024, 720],
|
158 |
+
["peng.png", "a penguin running towards camera joyfully, Antarctica", 896, 512],
|
159 |
+
],
|
160 |
+
|
161 |
+
inputs=[input_image_component, prompt_input, height_input, width_input], outputs=[video_output, seed_input], fn=generate_video, cache_examples="lazy"
|
162 |
+
)
|
163 |
+
|
164 |
+
if __name__ == "__main__":
|
165 |
+
demo.queue().launch()
|
cat.png
ADDED
![]() |
Git LFS Details
|
peng.png
ADDED
![]() |
Git LFS Details
|
requirements.txt
CHANGED
@@ -1 +1,10 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
git+https://github.com/huggingface/diffusers.git
|
2 |
+
transformers
|
3 |
+
accelerate
|
4 |
+
safetensors
|
5 |
+
sentencepiece
|
6 |
+
peft
|
7 |
+
ftfy
|
8 |
+
imageio-ffmpeg
|
9 |
+
opencv-python
|
10 |
+
einops
|