Spaces:

multimodalart
/

wan-2-2-first-last-frame

Running on Zero

App Files Files Community

multimodalart HF Staff commited on 23 days ago

Commit

bfc6564

verified ·

1 Parent(s): f9d266f

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -191

app.py CHANGED Viewed

@@ -1,119 +1,81 @@
 import os
-import random
 import sys
 from typing import Sequence, Mapping, Any, Union
-import spaces
 import torch
 import gradio as gr
 from huggingface_hub import hf_hub_download
 from comfy import model_management
-import comfy_extras.nodes_model_advanced
-import comfy_extras.nodes_custom_sampler
 from PIL import Image
-# --- Helper Functions from original script ---
 def get_value_at_index(obj: Union[Sequence, Mapping], index: int) -> Any:
     try:
         return obj[index]
     except KeyError:
         return obj["result"][index]
-def find_path(name: str, path: str = None) -> str:
-    if path is None:
-        path = os.getcwd()
-    if name in os.listdir(path):
-        path_name = os.path.join(path, name)
-        print(f"{name} found: {path_name}")
-        return path_name
-    parent_directory = os.path.dirname(path)
-    if parent_directory == path:
-        return None
-    return find_path(name, parent_directory)
-def add_comfyui_directory_to_sys_path() -> None:
-    comfyui_path = os.getcwd()
-    if "main.py" in os.listdir(comfyui_path):
-        if comfyui_path not in sys.path:
-            sys.path.append(comfyui_path)
-            print(f"'{comfyui_path}' added to sys.path")
-def add_extra_model_paths() -> None:
-    try:
-        from main import load_extra_path_config
-    except (ImportError, ModuleNotFoundError):
-        print("Could not import from main.py, trying utils...")
-        try:
-            from utils.extra_config import load_extra_path_config
-        except (ImportError, ModuleNotFoundError):
-            print("Could not find load_extra_path_config function.")
-            return
-    extra_model_paths = find_path("extra_model_paths.yaml")
-    if extra_model_paths:
-        load_extra_path_config(extra_model_paths)
-def import_custom_nodes() -> None:
-    import asyncio
-    import execution
-    from nodes import init_extra_nodes
-    import server
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-    server_instance = server.PromptServer(loop)
-    execution.PromptQueue(server_instance)
-    init_extra_nodes()
-# --- Setup and Model Downloads ---
-add_comfyui_directory_to_sys_path()
-add_extra_model_paths()
-import_custom_nodes()
-from nodes import NODE_CLASS_MAPPINGS
-print("Downlading models from Hugging Face Hub...")
-# Text Encoder
-hf_hub_download(repo_id="Comfy-Org/Wan_2.1_ComfyUI_repackaged", filename="split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors", local_dir="models/text_encoders")
-# UNETs
-hf_hub_download(repo_id="Comfy-Org/Wan_2.2_ComfyUI_Repackaged", filename="split_files/diffusion_models/wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", local_dir="models/unet")
-hf_hub_download(repo_id="Comfy-Org/Wan_2.2_ComfyUI_Repackaged", filename="split_files/diffusion_models/wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors", local_dir="models/unet")
-# VAE
 hf_hub_download(repo_id="Comfy-Org/Wan_2.1_ComfyUI_repackaged", filename="split_files/vae/wan_2.1_vae.safetensors", local_dir="models/vae")
-# CLIP Vision
 hf_hub_download(repo_id="Comfy-Org/Wan_2.1_ComfyUI_repackaged", filename="split_files/clip_vision/clip_vision_h.safetensors", local_dir="models/clip_vision")
-# LoRAs
 hf_hub_download(repo_id="Kijai/WanVideo_comfy", filename="Wan22-Lightning/Wan2.2-Lightning_I2V-A14B-4steps-lora_HIGH_fp16.safetensors", local_dir="models/loras")
 hf_hub_download(repo_id="Kijai/WanVideo_comfy", filename="Wan22-Lightning/Wan2.2-Lightning_I2V-A14B-4steps-lora_LOW_fp16.safetensors", local_dir="models/loras")
 print("Downloads complete.")
-# --- ZeroGPU: Pre-load models and instantiate nodes globally ---
-# Instantiate Nodes
-cliploader = NODE_CLASS_MAPPINGS["CLIPLoader"]()
-cliptextencode = NODE_CLASS_MAPPINGS["CLIPTextEncode"]()
-unetloader = NODE_CLASS_MAPPINGS["UNETLoader"]()
-vaeloader = NODE_CLASS_MAPPINGS["VAELoader"]()
-clipvisionloader = NODE_CLASS_MAPPINGS["CLIPVisionLoader"]()
-loadimage = NODE_CLASS_MAPPINGS["LoadImage"]()
-clipvisionencode = NODE_CLASS_MAPPINGS["CLIPVisionEncode"]()
-loraloadermodelonly = NODE_CLASS_MAPPINGS["LoraLoaderModelOnly"]()
-modelsamplingsd3 = NODE_CLASS_MAPPINGS["ModelSamplingSD3"]()
-pathchsageattentionkj = NODE_CLASS_MAPPINGS["PathchSageAttentionKJ"]()
-wanfirstlastframetovideo = NODE_CLASS_MAPPINGS["WanFirstLastFrameToVideo"]()
-ksampleradvanced = NODE_CLASS_MAPPINGS["KSamplerAdvanced"]()
-vaedecode = NODE_CLASS_MAPPINGS["VAEDecode"]()
-createvideo = NODE_CLASS_MAPPINGS["CreateVideo"]()
-savevideo = NODE_CLASS_MAPPINGS["SaveVideo"]()
-imageresize = NODE_CLASS_MAPPINGS["ImageResize+"]() # For dynamic resizing
-# Load Models
 cliploader_38 = cliploader.load_clip(clip_name="umt5_xxl_fp8_e4m3fn_scaled.safetensors", type="wan", device="cpu")
 unetloader_37_low_noise = unetloader.load_unet(unet_name="wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", weight_dtype="default")
 unetloader_91_high_noise = unetloader.load_unet(unet_name="wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors", weight_dtype="default")
 vaeloader_39 = vaeloader.load_vae(vae_name="wan_2.1_vae.safetensors")
 clipvisionloader_49 = clipvisionloader.load_clip(clip_name="clip_vision_h.safetensors")
-# Apply LoRAs and Patches
 loraloadermodelonly_94_high = loraloadermodelonly.load_lora_model_only(lora_name="Wan2.2-Lightning_I2V-A14B-4steps-lora_HIGH_fp16.safetensors", strength_model=0.8, model=get_value_at_index(unetloader_91_high_noise, 0))
 loraloadermodelonly_95_low = loraloadermodelonly.load_lora_model_only(lora_name="Wan2.2-Lightning_I2V-A14B-4steps-lora_LOW_fp16.safetensors", strength_model=0.8, model=get_value_at_index(unetloader_37_low_noise, 0))
 modelsamplingsd3_93_low = modelsamplingsd3.patch(shift=8, model=get_value_at_index(loraloadermodelonly_95_low, 0))
@@ -121,141 +83,70 @@ pathchsageattentionkj_98_low = pathchsageattentionkj.patch(sage_attention="auto"
 modelsamplingsd3_79_high = modelsamplingsd3.patch(shift=8, model=get_value_at_index(loraloadermodelonly_94_high, 0))
 pathchsageattentionkj_96_high = pathchsageattentionkj.patch(sage_attention="auto", model=get_value_at_index(modelsamplingsd3_79_high, 0))
-# Pre-load models to GPU
 model_loaders = [cliploader_38, unetloader_37_low_noise, unetloader_91_high_noise, vaeloader_39, clipvisionloader_49, loraloadermodelonly_94_high, loraloadermodelonly_95_low]
 valid_models = [getattr(loader[0], 'patcher', loader[0]) for loader in model_loaders if not isinstance(loader[0], dict) and not isinstance(getattr(loader[0], 'patcher', None), dict)]
 model_management.load_models_gpu(valid_models)
-# --- Custom Logic for this App ---
 def calculate_dimensions(image_path):
-    with Image.open(image_path) as img:
-        width, height = img.size
-    if width == height:
-        return 480, 480
-    if width > height:
-        new_width = 832
-        new_height = int(height * (832 / width))
-    else:
-        new_height = 832
-        new_width = int(width * (832 / height))
-    # Ensure dimensions are multiples of 16
-    new_width = (new_width // 16) * 16
-    new_height = (new_height // 16) * 16
-    return new_width, new_height
-# --- Main Generation Function ---
 @spaces.GPU(duration=120)
-def generate_video(prompt, first_image_path, last_image_path):
-    # This function now only handles per-request logic
     with torch.inference_mode():
-        # Calculate target dimensions based on the first image
         target_width, target_height = calculate_dimensions(first_image_path)
-        # 1. Load and resize images
-        # Since LoadImage returns a tensor, we pass it to the resize node
         loaded_first_image = loadimage.load_image(image=first_image_path)
-        resized_first_image = imageresize.execute(
-            width=target_width, height=target_height, interpolation="bicubic",
-            method="stretch", condition="always", multiple_of=1,
-            image=get_value_at_index(loaded_first_image, 0)
-        )
         loaded_last_image = loadimage.load_image(image=last_image_path)
-        resized_last_image = imageresize.execute(
-            width=target_width, height=target_height, interpolation="bicubic",
-            method="stretch", condition="always", multiple_of=1,
-            image=get_value_at_index(loaded_last_image, 0)
-        )
-        # 2. Encode text and images
         cliptextencode_6 = cliptextencode.encode(text=prompt, clip=get_value_at_index(cliploader_38, 0))
-        cliptextencode_7_negative = cliptextencode.encode(
-            text="low quality, worst quality, jpeg artifacts, ugly, deformed, blurry",
-            clip=get_value_at_index(cliploader_38, 0),
-        )
         clipvisionencode_51 = clipvisionencode.encode(crop="none", clip_vision=get_value_at_index(clipvisionloader_49, 0), image=get_value_at_index(resized_first_image, 0))
         clipvisionencode_87 = clipvisionencode.encode(crop="none", clip_vision=get_value_at_index(clipvisionloader_49, 0), image=get_value_at_index(resized_last_image, 0))
-        # 3. Prepare latents for video generation
-        wanfirstlastframetovideo_83 = wanfirstlastframetovideo.EXECUTE_NORMALIZED(
-            width=target_width, height=target_height, length=33, batch_size=1,
-            positive=get_value_at_index(cliptextencode_6, 0),
-            negative=get_value_at_index(cliptextencode_7_negative, 0),
-            vae=get_value_at_index(vaeloader_39, 0),
-            clip_vision_start_image=get_value_at_index(clipvisionencode_51, 0),
-            clip_vision_end_image=get_value_at_index(clipvisionencode_87, 0),
-            start_image=get_value_at_index(resized_first_image, 0),
-            end_image=get_value_at_index(resized_last_image, 0),
-        )
-        # 4. KSampler pipeline
-        ksampleradvanced_101 = ksampleradvanced.sample(
-            add_noise="enable", noise_seed=random.randint(1, 2**64), steps=8, cfg=1,
-            sampler_name="euler", scheduler="simple", start_at_step=0, end_at_step=4,
-            return_with_leftover_noise="enable", model=get_value_at_index(pathchsageattentionkj_96_high, 0),
-            positive=get_value_at_index(wanfirstlastframetovideo_83, 0),
-            negative=get_value_at_index(wanfirstlastframetovideo_83, 1),
-            latent_image=get_value_at_index(wanfirstlastframetovideo_83, 2),
-        )
-        ksampleradvanced_102 = ksampleradvanced.sample(
-            add_noise="disable", noise_seed=random.randint(1, 2**64), steps=8, cfg=1,
-            sampler_name="euler", scheduler="simple", start_at_step=4, end_at_step=10000,
-            return_with_leftover_noise="disable", model=get_value_at_index(pathchsageattentionkj_98_low, 0),
-            positive=get_value_at_index(wanfirstlastframetovideo_83, 0),
-            negative=get_value_at_index(wanfirstlastframetovideo_83, 1),
-            latent_image=get_value_at_index(ksampleradvanced_101, 0),
-        )
-        # 5. Decode and save video
         vaedecode_8 = vaedecode.decode(samples=get_value_at_index(ksampleradvanced_102, 0), vae=get_value_at_index(vaeloader_39, 0))
         createvideo_104 = createvideo.create_video(fps=16, images=get_value_at_index(vaedecode_8, 0))
         savevideo_103 = savevideo.save_video(filename_prefix="ComfyUI_Video", format="mp4", codec="libx264", video=get_value_at_index(createvideo_104, 0))
-        # Return the path to the saved video
-        video_filename = savevideo_103['ui']['videos'][0]['filename']
-        return f"output/{video_filename}"
-# --- Gradio Interface ---
 with gr.Blocks() as app:
     gr.Markdown("# Wan 2.2 First/Last Frame to Video")
-    gr.Markdown("Provide a starting image, an ending image, and a text prompt to generate a video transitioning between them.")
     with gr.Row():
         with gr.Column(scale=1):
-            prompt_input = gr.Textbox(label="Prompt", value="the guy turns")
-            first_image = gr.Image(label="First Frame", type="filepath")
-            last_image = gr.Image(label="Last Frame", type="filepath")
             generate_btn = gr.Button("Generate Video")
         with gr.Column(scale=2):
             output_video = gr.Video(label="Generated Video")
-    generate_btn.click(
-        fn=generate_video,
-        inputs=[prompt_input, first_image, last_image],
-        outputs=[output_video]
-    )
-    gr.Examples(
-        examples=[
-            ["a beautiful woman, cinematic", "examples/start.png", "examples/end.png"]
-        ],
-        inputs=[prompt_input, first_image, last_image]
-    )
 if __name__ == "__main__":
-    # Create example images if they don't exist
-    if not os.path.exists("examples"):
-        os.makedirs("examples")
-    if not os.path.exists("examples/start.png"):
-        Image.new('RGB', (512, 512), color = 'red').save('examples/start.png')
-    if not os.path.exists("examples/end.png"):
-        Image.new('RGB', (512, 512), color = 'blue').save('examples/end.png')
     app.launch()

 import os
+if os.getcwd() != '/home/user/app':
+    os.chdir('/home/user/app')
 import sys
+import subprocess
+import asyncio
 from typing import Sequence, Mapping, Any, Union
+print("Importing ComfyUI's main.py for setup...")
+import main
+print("ComfyUI main imported.")
 import torch
 import gradio as gr
 from huggingface_hub import hf_hub_download
 from comfy import model_management
+import spaces
 from PIL import Image
+import random
+import nodes # Import nodes after main has set everything up
+# --- Manually trigger the node initialization ---
+# This step is normally done inside main.start_comfyui(), but we do it here.
+# It loads all built-in, extra, and custom nodes into the NODE_CLASS_MAPPINGS.
+print("Initializing ComfyUI nodes...")
+loop = asyncio.new_event_loop()
+asyncio.set_event_loop(loop)
+loop.run_until_complete(nodes.init_extra_nodes())
+print("Nodes initialized.")
+# --- Helper function from the original script ---
 def get_value_at_index(obj: Union[Sequence, Mapping], index: int) -> Any:
     try:
         return obj[index]
     except KeyError:
         return obj["result"][index]
+# --- Model Downloads ---
+print("Downloading models from Hugging Face Hub...")
+hf_hub_download(repo_id="Comfy-Org/Wan_2.1_ComfyUI_repackaged", filename="split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safensors", local_dir="models/text_encoders")
+hf_hub_download(repo_id="Comfy-Org/Wan_2.2_ComfyUI_Repackaged", filename="split_files/diffusion_models/wan2.2_i2v_low_noise_14B_fp8_scaled.safensors", local_dir="models/diffusion_models")
+hf_hub_download(repo_id="Comfy-Org/Wan_2.2_ComfyUI_Repackaged", filename="split_files/diffusion_models/wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors", local_dir="models/diffusion_models")
 hf_hub_download(repo_id="Comfy-Org/Wan_2.1_ComfyUI_repackaged", filename="split_files/vae/wan_2.1_vae.safetensors", local_dir="models/vae")
 hf_hub_download(repo_id="Comfy-Org/Wan_2.1_ComfyUI_repackaged", filename="split_files/clip_vision/clip_vision_h.safetensors", local_dir="models/clip_vision")
 hf_hub_download(repo_id="Kijai/WanVideo_comfy", filename="Wan22-Lightning/Wan2.2-Lightning_I2V-A14B-4steps-lora_HIGH_fp16.safetensors", local_dir="models/loras")
 hf_hub_download(repo_id="Kijai/WanVideo_comfy", filename="Wan22-Lightning/Wan2.2-Lightning_I2V-A14B-4steps-lora_LOW_fp16.safetensors", local_dir="models/loras")
 print("Downloads complete.")
+# --- ZeroGPU: Pre-load models and instantiate nodes globally ---
+# This part will now work because NODE_CLASS_MAPPINGS is correctly populated.
+cliploader = nodes.NODE_CLASS_MAPPINGS["CLIPLoader"]()
+cliptextencode = nodes.NODE_CLASS_MAPPINGS["CLIPTextEncode"]()
+unetloader = nodes.NODE_CLASS_MAPPINGS["UNETLoader"]()
+vaeloader = nodes.NODE_CLASS_MAPPINGS["VAELoader"]()
+clipvisionloader = nodes.NODE_CLASS_MAPPINGS["CLIPVisionLoader"]()
+loadimage = nodes.NODE_CLASS_MAPPINGS["LoadImage"]()
+clipvisionencode = nodes.NODE_CLASS_MAPPINGS["CLIPVisionEncode"]()
+loraloadermodelonly = nodes.NODE_CLASS_MAPPINGS["LoraLoaderModelOnly"]()
+modelsamplingsd3 = nodes.NODE_CLASS_MAPPINGS["ModelSamplingSD3"]()
+pathchsageattentionkj = nodes.NODE_CLASS_MAPPINGS["PathchSageAttentionKJ"]()
+wanfirstlastframetovideo = nodes.NODE_CLASS_MAPPINGS["WanFirstLastFrameToVideo"]()
+ksampleradvanced = nodes.NODE_CLASS_MAPPINGS["KSamplerAdvanced"]()
+vaedecode = nodes.NODE_CLASS_MAPPINGS["VAEDecode"]()
+createvideo = nodes.NODE_CLASS_MAPPINGS["CreateVideo"]()
+savevideo = nodes.NODE_CLASS_MAPPINGS["SaveVideo"]()
+imageresize = nodes.NODE_CLASS_MAPPINGS["ImageResize+"]()
 cliploader_38 = cliploader.load_clip(clip_name="umt5_xxl_fp8_e4m3fn_scaled.safetensors", type="wan", device="cpu")
 unetloader_37_low_noise = unetloader.load_unet(unet_name="wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", weight_dtype="default")
 unetloader_91_high_noise = unetloader.load_unet(unet_name="wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors", weight_dtype="default")
 vaeloader_39 = vaeloader.load_vae(vae_name="wan_2.1_vae.safetensors")
 clipvisionloader_49 = clipvisionloader.load_clip(clip_name="clip_vision_h.safetensors")
 loraloadermodelonly_94_high = loraloadermodelonly.load_lora_model_only(lora_name="Wan2.2-Lightning_I2V-A14B-4steps-lora_HIGH_fp16.safetensors", strength_model=0.8, model=get_value_at_index(unetloader_91_high_noise, 0))
 loraloadermodelonly_95_low = loraloadermodelonly.load_lora_model_only(lora_name="Wan2.2-Lightning_I2V-A14B-4steps-lora_LOW_fp16.safetensors", strength_model=0.8, model=get_value_at_index(unetloader_37_low_noise, 0))
 modelsamplingsd3_93_low = modelsamplingsd3.patch(shift=8, model=get_value_at_index(loraloadermodelonly_95_low, 0))
 modelsamplingsd3_79_high = modelsamplingsd3.patch(shift=8, model=get_value_at_index(loraloadermodelonly_94_high, 0))
 pathchsageattentionkj_96_high = pathchsageattentionkj.patch(sage_attention="auto", model=get_value_at_index(modelsamplingsd3_79_high, 0))
 model_loaders = [cliploader_38, unetloader_37_low_noise, unetloader_91_high_noise, vaeloader_39, clipvisionloader_49, loraloadermodelonly_94_high, loraloadermodelonly_95_low]
 valid_models = [getattr(loader[0], 'patcher', loader[0]) for loader in model_loaders if not isinstance(loader[0], dict) and not isinstance(getattr(loader[0], 'patcher', None), dict)]
 model_management.load_models_gpu(valid_models)
+# --- App Logic ---
 def calculate_dimensions(image_path):
+    with Image.open(image_path) as img: width, height = img.size
+    if width == height: return 480, 480
+    if width > height: new_width, new_height = 832, int(height * (832 / width))
+    else: new_height, new_width = 832, int(width * (832 / height))
+    return (new_width // 16) * 16, (new_height // 16) * 16
 @spaces.GPU(duration=120)
+def generate_video(prompt, first_image_path, last_image_path, duration_seconds):
     with torch.inference_mode():
+        FPS, MAX_FRAMES = 16, 81
+        length_in_frames = max(1, min(int(duration_seconds * FPS), MAX_FRAMES))
+        print(f"Requested duration: {duration_seconds}s. Calculated frames: {length_in_frames}")
         target_width, target_height = calculate_dimensions(first_image_path)
         loaded_first_image = loadimage.load_image(image=first_image_path)
+        resized_first_image = imageresize.execute(width=target_width, height=target_height, interpolation="bicubic", method="stretch", image=get_value_at_index(loaded_first_image, 0))
         loaded_last_image = loadimage.load_image(image=last_image_path)
+        resized_last_image = imageresize.execute(width=target_width, height=target_height, interpolation="bicubic", method="stretch", image=get_value_at_index(loaded_last_image, 0))
         cliptextencode_6 = cliptextencode.encode(text=prompt, clip=get_value_at_index(cliploader_38, 0))
+        cliptextencode_7_negative = cliptextencode.encode(text="low quality, worst quality, jpeg artifacts, ugly, deformed, blurry", clip=get_value_at_index(cliploader_38, 0))
         clipvisionencode_51 = clipvisionencode.encode(crop="none", clip_vision=get_value_at_index(clipvisionloader_49, 0), image=get_value_at_index(resized_first_image, 0))
         clipvisionencode_87 = clipvisionencode.encode(crop="none", clip_vision=get_value_at_index(clipvisionloader_49, 0), image=get_value_at_index(resized_last_image, 0))
+        wanfirstlastframetovideo_83 = wanfirstlastframetovideo.EXECUTE_NORMALIZED(width=target_width, height=target_height, length=length_in_frames, batch_size=1, positive=get_value_at_index(cliptextencode_6, 0), negative=get_value_at_index(cliptextencode_7_negative, 0), vae=get_value_at_index(vaeloader_39, 0), clip_vision_start_image=get_value_at_index(clipvisionencode_51, 0), clip_vision_end_image=get_value_at_index(clipvisionencode_87, 0), start_image=get_value_at_index(resized_first_image, 0), end_image=get_value_at_index(resized_last_image, 0))
+        ksampler_positive = get_value_at_index(wanfirstlastframetovideo_83, 0)
+        ksampler_negative = get_value_at_index(wanfirstlastframetovideo_83, 1)
+        ksampler_latent = get_value_at_index(wanfirstlastframetovideo_83, 2)
+        ksampleradvanced_101 = ksampleradvanced.sample(add_noise="enable", noise_seed=random.randint(1, 2**64), steps=8, cfg=1, sampler_name="euler", scheduler="simple", start_at_step=0, end_at_step=4, return_with_leftover_noise="enable", model=get_value_at_index(pathchsageattentionkj_96_high, 0), positive=ksampler_positive, negative=ksampler_negative, latent_image=ksampler_latent)
+        ksampleradvanced_102 = ksampleradvanced.sample(add_noise="disable", noise_seed=random.randint(1, 2**64), steps=8, cfg=1, sampler_name="euler", scheduler="simple", start_at_step=4, end_at_step=10000, return_with_leftover_noise="disable", model=get_value_at_index(pathchsageattentionkj_98_low, 0), positive=ksampler_positive, negative=ksampler_negative, latent_image=get_value_at_index(ksampleradvanced_101, 0))
         vaedecode_8 = vaedecode.decode(samples=get_value_at_index(ksampleradvanced_102, 0), vae=get_value_at_index(vaeloader_39, 0))
         createvideo_104 = createvideo.create_video(fps=16, images=get_value_at_index(vaedecode_8, 0))
         savevideo_103 = savevideo.save_video(filename_prefix="ComfyUI_Video", format="mp4", codec="libx264", video=get_value_at_index(createvideo_104, 0))
+        return f"output/{savevideo_103['ui']['videos'][0]['filename']}"
+# --- Gradio Interface (no changes needed) ---
 with gr.Blocks() as app:
     gr.Markdown("# Wan 2.2 First/Last Frame to Video")
+    gr.Markdown("Provide a starting image, an ending image, a text prompt, and a desired duration to generate a video transitioning between them.")
     with gr.Row():
         with gr.Column(scale=1):
+            prompt_input = gr.Textbox(label="Prompt", value="a man dancing in the street, cinematic")
+            duration_slider = gr.Slider(minimum=1.0, maximum=5.0, value=2.0, step=0.1, label="Video Duration (seconds)")
+            with gr.Row():
+                first_image = gr.Image(label="First Frame", type="filepath")
+                last_image = gr.Image(label="Last Frame", type="filepath")
             generate_btn = gr.Button("Generate Video")
         with gr.Column(scale=2):
             output_video = gr.Video(label="Generated Video")
+    generate_btn.click(fn=generate_video, inputs=[prompt_input, first_image, last_image, duration_slider], outputs=[output_video])
+    gr.Examples(examples=[["a beautiful woman, cinematic", "examples/start.png", "examples/end.png", 2.5]], inputs=[prompt_input, first_image, last_image, duration_slider])
 if __name__ == "__main__":
+    if not os.path.exists("examples"): os.makedirs("examples")
+    if not os.path.exists("examples/start.png"): Image.new('RGB', (512, 512), color='red').save('examples/start.png')
+    if not os.path.exists("examples/end.png"): Image.new('RGB', (512, 512), color='blue').save('examples/end.png')
     app.launch()