Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,44 +1,40 @@
|
|
1 |
import os
|
2 |
-
|
3 |
-
if os.getcwd() != '/home/user/app':
|
4 |
-
os.chdir('/home/user/app')
|
5 |
-
|
6 |
import sys
|
7 |
-
import spaces
|
8 |
import subprocess
|
9 |
import asyncio
|
|
|
10 |
from typing import Sequence, Mapping, Any, Union
|
11 |
|
|
|
12 |
print("Importing ComfyUI's main.py for setup...")
|
13 |
import main
|
14 |
print("ComfyUI main imported.")
|
15 |
|
16 |
-
|
17 |
import torch
|
18 |
import gradio as gr
|
19 |
from huggingface_hub import hf_hub_download
|
20 |
from comfy import model_management
|
|
|
21 |
from PIL import Image
|
22 |
import random
|
23 |
-
import nodes
|
24 |
-
|
25 |
|
26 |
-
# --- Manually trigger the node initialization ---
|
27 |
-
# This step is normally done inside main.start_comfyui(), but we do it here.
|
28 |
-
# It loads all built-in, extra, and custom nodes into the NODE_CLASS_MAPPINGS.
|
29 |
print("Initializing ComfyUI nodes...")
|
30 |
loop = asyncio.new_event_loop()
|
31 |
asyncio.set_event_loop(loop)
|
32 |
loop.run_until_complete(nodes.init_extra_nodes())
|
33 |
print("Nodes initialized.")
|
34 |
|
35 |
-
# --- Helper function
|
36 |
def get_value_at_index(obj: Union[Sequence, Mapping], index: int) -> Any:
|
37 |
try:
|
38 |
return obj[index]
|
39 |
except KeyError:
|
40 |
return obj["result"][index]
|
41 |
|
|
|
42 |
# --- Model Downloads ---
|
43 |
print("Downloading models from Hugging Face Hub...")
|
44 |
hf_hub_download(repo_id="Comfy-Org/Wan_2.1_ComfyUI_repackaged", filename="split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors", local_dir="models/text_encoders")
|
@@ -52,7 +48,6 @@ print("Downloads complete.")
|
|
52 |
|
53 |
|
54 |
# --- ZeroGPU: Pre-load models and instantiate nodes globally ---
|
55 |
-
# This part will now work because NODE_CLASS_MAPPINGS is correctly populated.
|
56 |
cliploader = nodes.NODE_CLASS_MAPPINGS["CLIPLoader"]()
|
57 |
cliptextencode = nodes.NODE_CLASS_MAPPINGS["CLIPTextEncode"]()
|
58 |
unetloader = nodes.NODE_CLASS_MAPPINGS["UNETLoader"]()
|
@@ -68,7 +63,6 @@ ksampleradvanced = nodes.NODE_CLASS_MAPPINGS["KSamplerAdvanced"]()
|
|
68 |
vaedecode = nodes.NODE_CLASS_MAPPINGS["VAEDecode"]()
|
69 |
createvideo = nodes.NODE_CLASS_MAPPINGS["CreateVideo"]()
|
70 |
savevideo = nodes.NODE_CLASS_MAPPINGS["SaveVideo"]()
|
71 |
-
imageresize = nodes.NODE_CLASS_MAPPINGS["ImageResize+"]()
|
72 |
|
73 |
cliploader_38 = cliploader.load_clip(clip_name="umt5_xxl_fp8_e4m3fn_scaled.safetensors", type="wan", device="cpu")
|
74 |
unetloader_37_low_noise = unetloader.load_unet(unet_name="wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", weight_dtype="default")
|
@@ -88,8 +82,7 @@ valid_models = [getattr(loader[0], 'patcher', loader[0]) for loader in model_loa
|
|
88 |
model_management.load_models_gpu(valid_models)
|
89 |
|
90 |
# --- App Logic ---
|
91 |
-
def calculate_dimensions(
|
92 |
-
with Image.open(image_path) as img: width, height = img.size
|
93 |
if width == height: return 480, 480
|
94 |
if width > height: new_width, new_height = 832, int(height * (832 / width))
|
95 |
else: new_height, new_width = 832, int(width * (832 / height))
|
@@ -97,23 +90,46 @@ def calculate_dimensions(image_path):
|
|
97 |
|
98 |
@spaces.GPU(duration=120)
|
99 |
def generate_video(prompt, first_image_path, last_image_path, duration_seconds):
|
|
|
|
|
|
|
|
|
100 |
with torch.inference_mode():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
FPS, MAX_FRAMES = 16, 81
|
102 |
length_in_frames = max(1, min(int(duration_seconds * FPS), MAX_FRAMES))
|
103 |
print(f"Requested duration: {duration_seconds}s. Calculated frames: {length_in_frames}")
|
104 |
-
|
105 |
-
|
106 |
-
loaded_first_image = loadimage.load_image(image=
|
107 |
-
|
108 |
-
|
109 |
-
resized_last_image = imageresize.execute(width=target_width, height=target_height, interpolation="bicubic", method="stretch", image=get_value_at_index(loaded_last_image, 0))
|
110 |
-
|
111 |
cliptextencode_6 = cliptextencode.encode(text=prompt, clip=get_value_at_index(cliploader_38, 0))
|
112 |
cliptextencode_7_negative = cliptextencode.encode(text="low quality, worst quality, jpeg artifacts, ugly, deformed, blurry", clip=get_value_at_index(cliploader_38, 0))
|
113 |
-
clipvisionencode_51 = clipvisionencode.encode(crop="none", clip_vision=get_value_at_index(clipvisionloader_49, 0), image=get_value_at_index(
|
114 |
-
clipvisionencode_87 = clipvisionencode.encode(crop="none", clip_vision=get_value_at_index(clipvisionloader_49, 0), image=get_value_at_index(
|
115 |
|
116 |
-
wanfirstlastframetovideo_83 = wanfirstlastframetovideo.EXECUTE_NORMALIZED(width=target_width, height=target_height, length=length_in_frames, batch_size=1, positive=get_value_at_index(cliptextencode_6, 0), negative=get_value_at_index(cliptextencode_7_negative, 0), vae=get_value_at_index(vaeloader_39, 0), clip_vision_start_image=get_value_at_index(clipvisionencode_51, 0), clip_vision_end_image=get_value_at_index(clipvisionencode_87, 0), start_image=get_value_at_index(
|
117 |
|
118 |
ksampler_positive = get_value_at_index(wanfirstlastframetovideo_83, 0)
|
119 |
ksampler_negative = get_value_at_index(wanfirstlastframetovideo_83, 1)
|
@@ -128,7 +144,7 @@ def generate_video(prompt, first_image_path, last_image_path, duration_seconds):
|
|
128 |
|
129 |
return f"output/{savevideo_103['ui']['videos'][0]['filename']}"
|
130 |
|
131 |
-
# --- Gradio Interface
|
132 |
with gr.Blocks() as app:
|
133 |
gr.Markdown("# Wan 2.2 First/Last Frame to Video")
|
134 |
gr.Markdown("Provide a starting image, an ending image, a text prompt, and a desired duration to generate a video transitioning between them.")
|
@@ -149,4 +165,7 @@ if __name__ == "__main__":
|
|
149 |
if not os.path.exists("examples"): os.makedirs("examples")
|
150 |
if not os.path.exists("examples/start.png"): Image.new('RGB', (512, 512), color='red').save('examples/start.png')
|
151 |
if not os.path.exists("examples/end.png"): Image.new('RGB', (512, 512), color='blue').save('examples/end.png')
|
|
|
|
|
|
|
152 |
app.launch()
|
|
|
1 |
import os
|
|
|
|
|
|
|
|
|
2 |
import sys
|
|
|
3 |
import subprocess
|
4 |
import asyncio
|
5 |
+
import uuid
|
6 |
from typing import Sequence, Mapping, Any, Union
|
7 |
|
8 |
+
# --- 2. Let ComfyUI's main.py handle all initial setup ---
|
9 |
print("Importing ComfyUI's main.py for setup...")
|
10 |
import main
|
11 |
print("ComfyUI main imported.")
|
12 |
|
13 |
+
# --- 3. Now we can import the rest of the necessary modules ---
|
14 |
import torch
|
15 |
import gradio as gr
|
16 |
from huggingface_hub import hf_hub_download
|
17 |
from comfy import model_management
|
18 |
+
import spaces
|
19 |
from PIL import Image
|
20 |
import random
|
21 |
+
import nodes
|
|
|
22 |
|
23 |
+
# --- 4. Manually trigger the node initialization ---
|
|
|
|
|
24 |
print("Initializing ComfyUI nodes...")
|
25 |
loop = asyncio.new_event_loop()
|
26 |
asyncio.set_event_loop(loop)
|
27 |
loop.run_until_complete(nodes.init_extra_nodes())
|
28 |
print("Nodes initialized.")
|
29 |
|
30 |
+
# --- Helper function ---
|
31 |
def get_value_at_index(obj: Union[Sequence, Mapping], index: int) -> Any:
|
32 |
try:
|
33 |
return obj[index]
|
34 |
except KeyError:
|
35 |
return obj["result"][index]
|
36 |
|
37 |
+
|
38 |
# --- Model Downloads ---
|
39 |
print("Downloading models from Hugging Face Hub...")
|
40 |
hf_hub_download(repo_id="Comfy-Org/Wan_2.1_ComfyUI_repackaged", filename="split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors", local_dir="models/text_encoders")
|
|
|
48 |
|
49 |
|
50 |
# --- ZeroGPU: Pre-load models and instantiate nodes globally ---
|
|
|
51 |
cliploader = nodes.NODE_CLASS_MAPPINGS["CLIPLoader"]()
|
52 |
cliptextencode = nodes.NODE_CLASS_MAPPINGS["CLIPTextEncode"]()
|
53 |
unetloader = nodes.NODE_CLASS_MAPPINGS["UNETLoader"]()
|
|
|
63 |
vaedecode = nodes.NODE_CLASS_MAPPINGS["VAEDecode"]()
|
64 |
createvideo = nodes.NODE_CLASS_MAPPINGS["CreateVideo"]()
|
65 |
savevideo = nodes.NODE_CLASS_MAPPINGS["SaveVideo"]()
|
|
|
66 |
|
67 |
cliploader_38 = cliploader.load_clip(clip_name="umt5_xxl_fp8_e4m3fn_scaled.safetensors", type="wan", device="cpu")
|
68 |
unetloader_37_low_noise = unetloader.load_unet(unet_name="wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", weight_dtype="default")
|
|
|
82 |
model_management.load_models_gpu(valid_models)
|
83 |
|
84 |
# --- App Logic ---
|
85 |
+
def calculate_dimensions(width, height):
|
|
|
86 |
if width == height: return 480, 480
|
87 |
if width > height: new_width, new_height = 832, int(height * (832 / width))
|
88 |
else: new_height, new_width = 832, int(width * (832 / height))
|
|
|
90 |
|
91 |
@spaces.GPU(duration=120)
|
92 |
def generate_video(prompt, first_image_path, last_image_path, duration_seconds):
|
93 |
+
# Create a temporary directory for resized images
|
94 |
+
temp_dir = f"temp_resized_{uuid.uuid4().hex}"
|
95 |
+
os.makedirs(temp_dir, exist_ok=True)
|
96 |
+
|
97 |
with torch.inference_mode():
|
98 |
+
# --- Python Image Preprocessing using Pillow ---
|
99 |
+
print("Preprocessing images with Pillow...")
|
100 |
+
with Image.open(first_image_path) as img:
|
101 |
+
orig_width, orig_height = img.size
|
102 |
+
|
103 |
+
target_width, target_height = calculate_dimensions(orig_width, orig_height)
|
104 |
+
|
105 |
+
# Resize first image
|
106 |
+
with Image.open(first_image_path) as img:
|
107 |
+
img_resized = img.resize((target_width, target_height), Image.Resampling.LANCZOS)
|
108 |
+
resized_first_path = os.path.join(temp_dir, "first_frame_resized.png")
|
109 |
+
img_resized.save(resized_first_path)
|
110 |
+
|
111 |
+
# Resize second image to match the target dimensions
|
112 |
+
with Image.open(last_image_path) as img:
|
113 |
+
img_resized = img.resize((target_width, target_height), Image.Resampling.LANCZOS)
|
114 |
+
resized_last_path = os.path.join(temp_dir, "last_frame_resized.png")
|
115 |
+
img_resized.save(resized_last_path)
|
116 |
+
print(f"Images resized to {target_width}x{target_height} and saved temporarily.")
|
117 |
+
# --- End Preprocessing ---
|
118 |
+
|
119 |
FPS, MAX_FRAMES = 16, 81
|
120 |
length_in_frames = max(1, min(int(duration_seconds * FPS), MAX_FRAMES))
|
121 |
print(f"Requested duration: {duration_seconds}s. Calculated frames: {length_in_frames}")
|
122 |
+
|
123 |
+
# Load the pre-processed images into ComfyUI
|
124 |
+
loaded_first_image = loadimage.load_image(image=os.path.basename(resized_first_path))
|
125 |
+
loaded_last_image = loadimage.load_image(image=os.path.basename(resized_last_path))
|
126 |
+
|
|
|
|
|
127 |
cliptextencode_6 = cliptextencode.encode(text=prompt, clip=get_value_at_index(cliploader_38, 0))
|
128 |
cliptextencode_7_negative = cliptextencode.encode(text="low quality, worst quality, jpeg artifacts, ugly, deformed, blurry", clip=get_value_at_index(cliploader_38, 0))
|
129 |
+
clipvisionencode_51 = clipvisionencode.encode(crop="none", clip_vision=get_value_at_index(clipvisionloader_49, 0), image=get_value_at_index(loaded_first_image, 0))
|
130 |
+
clipvisionencode_87 = clipvisionencode.encode(crop="none", clip_vision=get_value_at_index(clipvisionloader_49, 0), image=get_value_at_index(loaded_last_image, 0))
|
131 |
|
132 |
+
wanfirstlastframetovideo_83 = wanfirstlastframetovideo.EXECUTE_NORMALIZED(width=target_width, height=target_height, length=length_in_frames, batch_size=1, positive=get_value_at_index(cliptextencode_6, 0), negative=get_value_at_index(cliptextencode_7_negative, 0), vae=get_value_at_index(vaeloader_39, 0), clip_vision_start_image=get_value_at_index(clipvisionencode_51, 0), clip_vision_end_image=get_value_at_index(clipvisionencode_87, 0), start_image=get_value_at_index(loaded_first_image, 0), end_image=get_value_at_index(loaded_last_image, 0))
|
133 |
|
134 |
ksampler_positive = get_value_at_index(wanfirstlastframetovideo_83, 0)
|
135 |
ksampler_negative = get_value_at_index(wanfirstlastframetovideo_83, 1)
|
|
|
144 |
|
145 |
return f"output/{savevideo_103['ui']['videos'][0]['filename']}"
|
146 |
|
147 |
+
# --- Gradio Interface ---
|
148 |
with gr.Blocks() as app:
|
149 |
gr.Markdown("# Wan 2.2 First/Last Frame to Video")
|
150 |
gr.Markdown("Provide a starting image, an ending image, a text prompt, and a desired duration to generate a video transitioning between them.")
|
|
|
165 |
if not os.path.exists("examples"): os.makedirs("examples")
|
166 |
if not os.path.exists("examples/start.png"): Image.new('RGB', (512, 512), color='red').save('examples/start.png')
|
167 |
if not os.path.exists("examples/end.png"): Image.new('RGB', (512, 512), color='blue').save('examples/end.png')
|
168 |
+
# Set the input directory for LoadImage to find the temp files
|
169 |
+
import folder_paths
|
170 |
+
folder_paths.add_model_folder_path("input", "temp_resized")
|
171 |
app.launch()
|