ltx-video-distilled

Runtime error

App Files Files Community

KingNish commited on Jun 28

Commit

2942983

verified ·

1 Parent(s): f1f0f08

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -49

app.py CHANGED Viewed

@@ -32,14 +32,12 @@ LTX_REPO = "Lightricks/LTX-Video"
 MAX_IMAGE_SIZE = 1440
 MAX_NUM_FRAMES = 257
 FPS = 24.0
 # Default values
 DEFAULT_NEGATIVE_PROMPT = "worst quality, inconsistent motion, blurry, jittery, distorted"
 DEFAULT_GUIDANCE_SCALE = PIPELINE_CONFIG_YAML.get("first_pass", {}).get("guidance_scale", 1.0)
 DEFAULT_SEED = 42
 DEFAULT_IMPROVE_TEXTURE = True
 TARGET_FIXED_SIDE = 768
 # Global variables for loaded models
 pipeline_instance = None
 latent_upsampler_instance = None
@@ -72,7 +70,6 @@ def calculate_new_dimensions(orig_w, orig_h):
     """
     if orig_w == 0 or orig_h == 0:
         return TARGET_FIXED_SIDE, TARGET_FIXED_SIDE
     # Step 1: Handle dimensions > 1024
     new_w, new_h = orig_w, orig_h
     if max(orig_w, orig_h) > MAX_IMAGE_SIZE:
@@ -80,18 +77,14 @@ def calculate_new_dimensions(orig_w, orig_h):
         scale = MAX_IMAGE_SIZE / max_dim
         new_w = int(orig_w * scale)
         new_h = int(orig_h * scale)
     # Step 2: Round to nearest multiples of 32
     def round_to_multiple(x, multiple=32):
         return round(x / multiple) * multiple
     new_w = round_to_multiple(new_w)
     new_h = round_to_multiple(new_h)
     # Step 3: Ensure within bounds
     new_w = max(256, min(new_w, MAX_IMAGE_SIZE))
     new_h = max(256, min(new_h, MAX_IMAGE_SIZE))
     return new_h, new_w
 def resize_and_squash_image(image_path, target_width, target_height):
@@ -102,7 +95,6 @@ def resize_and_squash_image(image_path, target_width, target_height):
     img = Image.open(image_path)
     # Resize to exact dimensions, possibly distorting aspect ratio
     img = img.resize((target_width, target_height), Image.LANCZOS)
     # Save to temporary file
     temp_path = os.path.join(tempfile.gettempdir(), f"resized_{os.path.basename(image_path)}")
     img.save(temp_path)
@@ -150,13 +142,14 @@ def initialize_models():
         latent_upsampler_instance.to(target_inference_device)
 @spaces.GPU(duration=60)
-def generate(prompt, input_image_url=None, final_image_url=None, duration_ui=2, progress=gr.Progress(track_tqdm=True)):
     """Generate video from image(s) and prompt"""
     # Validate input - at least one image must be provided
-    if input_image_url is None and final_image_url is None:
-        raise gr.Error("Please provide at least one input image (either first frame or last frame)")
     input_image_filepath = input_image_url
     final_image_filepath = final_image_url
     # Set default values
@@ -168,7 +161,6 @@ def generate(prompt, input_image_url=None, final_image_url=None, duration_ui=2,
     if randomize_seed:
         seed_ui = random.randint(0, 2**32 - 1)
     seed_everething(int(seed_ui))
     # Calculate target frames
@@ -178,30 +170,22 @@ def generate(prompt, input_image_url=None, final_image_url=None, duration_ui=2,
     actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(n_val * 8 + 1)))
     # Calculate dimensions based on the provided image(s)
     if input_image_filepath:
         try:
-            img = Image.open(input_image_filepath)
-            orig_w, orig_h = img.size
-            actual_height, actual_width = calculate_new_dimensions(orig_w, orig_h)
-        except Exception as e:
-            print(f"Error processing input image: {e}")
-            if final_image_filepath:
-                try:
-                    img = Image.open(final_image_filepath)
-                    orig_w, orig_h = img.size
-                    actual_height, actual_width = calculate_new_dimensions(orig_w, orig_h)
-                except Exception as e:
-                    print(f"Error processing final image: {e}")
-                    actual_height, actual_width = TARGET_FIXED_SIDE, TARGET_FIXED_SIDE
-            else:
-                actual_height, actual_width = TARGET_FIXED_SIDE, TARGET_FIXED_SIDE
-    elif final_image_filepath:
-        try:
-            img = Image.open(final_image_filepath)
             orig_w, orig_h = img.size
             actual_height, actual_width = calculate_new_dimensions(orig_w, orig_h)
         except Exception as e:
-            print(f"Error processing final image: {e}")
             actual_height, actual_width = TARGET_FIXED_SIDE, TARGET_FIXED_SIDE
     else:
         actual_height, actual_width = TARGET_FIXED_SIDE, TARGET_FIXED_SIDE
@@ -252,43 +236,47 @@ def generate(prompt, input_image_url=None, final_image_url=None, duration_ui=2,
     # Add initial frame conditioning if provided
     if input_image_filepath:
         try:
-            # First resize and squash the image to the exact dimensions we want
             resized_image_path = resize_and_squash_image(input_image_filepath, actual_width, actual_height)
-            # Now load this pre-resized image with load_image_to_tensor_with_resize_and_crop
-            # Since it's already the correct size, the "crop" part will be a no-op
             media_tensor = load_image_to_tensor_with_resize_and_crop(
                 resized_image_path, actual_height, actual_width
             )
-            # Clean up temporary file
             if os.path.exists(resized_image_path):
                 os.remove(resized_image_path)
             media_tensor = torch.nn.functional.pad(media_tensor, padding_values)
             conditioning_items.append(ConditioningItem(media_tensor.to("cuda"), 0, 1.0))
         except Exception as e:
             print(f"Error loading initial image: {e}")
             raise gr.Error(f"Could not load initial image: {e}")
     # Add final frame conditioning if provided
     if final_image_filepath:
         try:
-            # First resize and squash the final image to match the initial image dimensions
             resized_final_path = resize_and_squash_image(
                 final_image_filepath, actual_width, actual_height
             )
-            # Now load this pre-resized image with load_image_to_tensor_with_resize_and_crop
-            # Since it's already the correct size, the "crop" part will be a no-op
             final_media_tensor = load_image_to_tensor_with_resize_and_crop(
                 resized_final_path, actual_height, actual_width
             )
-            # Clean up temporary file
             if os.path.exists(resized_final_path):
                 os.remove(resized_final_path)
             final_media_tensor = torch.nn.functional.pad(final_media_tensor, padding_values)
             conditioning_items.append(ConditioningItem(final_media_tensor.to("cuda"), num_frames_padded - 1, 1.0))
         except Exception as e:
@@ -380,14 +368,14 @@ css = """
     background-color: #f5f5f5;
 }
 """
 with gr.Blocks(css=css) as demo:
     gr.Markdown("# LTX Video Generator")
-    gr.Markdown("Generate videos from images using AI. Provide at least one input image (first frame or last frame) and a prompt.")
     with gr.Row():
         with gr.Column():
             gr.Markdown("### Input Options")
             input_image_input = gr.Image(label="First Frame Image (Optional)", type="filepath", sources=["upload", "webcam", "clipboard"])
             final_image_input = gr.Image(label="Last Frame Image (Optional)", type="filepath", sources=["upload", "webcam", "clipboard"])
             prompt_input = gr.Textbox(label="Prompt", value="The creature from the image starts to move", lines=3)
             duration_input = gr.Slider(
@@ -403,11 +391,11 @@ with gr.Blocks(css=css) as demo:
             gr.Markdown("### Output")
             video_output = gr.Textbox(label="Generated Video URL", interactive=False)
             video_preview = gr.Video(label="Video Preview", interactive=False, visible=False)
-    gr.Markdown("**Note:** You must provide at least one input image (either first frame or last frame).")
     generate_button.click(
         fn=generate,
-        inputs=[prompt_input, input_image_input, final_image_input, duration_input],
         outputs=[video_output],
         api_name="generate_video"
     )

 MAX_IMAGE_SIZE = 1440
 MAX_NUM_FRAMES = 257
 FPS = 24.0
 # Default values
 DEFAULT_NEGATIVE_PROMPT = "worst quality, inconsistent motion, blurry, jittery, distorted"
 DEFAULT_GUIDANCE_SCALE = PIPELINE_CONFIG_YAML.get("first_pass", {}).get("guidance_scale", 1.0)
 DEFAULT_SEED = 42
 DEFAULT_IMPROVE_TEXTURE = True
 TARGET_FIXED_SIDE = 768
 # Global variables for loaded models
 pipeline_instance = None
 latent_upsampler_instance = None
     """
     if orig_w == 0 or orig_h == 0:
         return TARGET_FIXED_SIDE, TARGET_FIXED_SIDE
     # Step 1: Handle dimensions > 1024
     new_w, new_h = orig_w, orig_h
     if max(orig_w, orig_h) > MAX_IMAGE_SIZE:
         scale = MAX_IMAGE_SIZE / max_dim
         new_w = int(orig_w * scale)
         new_h = int(orig_h * scale)
     # Step 2: Round to nearest multiples of 32
     def round_to_multiple(x, multiple=32):
         return round(x / multiple) * multiple
     new_w = round_to_multiple(new_w)
     new_h = round_to_multiple(new_h)
     # Step 3: Ensure within bounds
     new_w = max(256, min(new_w, MAX_IMAGE_SIZE))
     new_h = max(256, min(new_h, MAX_IMAGE_SIZE))
     return new_h, new_w
 def resize_and_squash_image(image_path, target_width, target_height):
     img = Image.open(image_path)
     # Resize to exact dimensions, possibly distorting aspect ratio
     img = img.resize((target_width, target_height), Image.LANCZOS)
     # Save to temporary file
     temp_path = os.path.join(tempfile.gettempdir(), f"resized_{os.path.basename(image_path)}")
     img.save(temp_path)
         latent_upsampler_instance.to(target_inference_device)
 @spaces.GPU(duration=60)
+def generate(prompt, input_image_url=None, middle_image_url=None, final_image_url=None, duration_ui=2, progress=gr.Progress(track_tqdm=True)):
     """Generate video from image(s) and prompt"""
     # Validate input - at least one image must be provided
+    if input_image_url is None and final_image_url is None and middle_image_url is None:
+        raise gr.Error("Please provide at least one input image (first frame, middle frame, or last frame)")
     input_image_filepath = input_image_url
+    middle_image_filepath = middle_image_url
     final_image_filepath = final_image_url
     # Set default values
     if randomize_seed:
         seed_ui = random.randint(0, 2**32 - 1)
     seed_everething(int(seed_ui))
     # Calculate target frames
     actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(n_val * 8 + 1)))
     # Calculate dimensions based on the provided image(s)
+    dimension_sources = []
     if input_image_filepath:
+        dimension_sources.append(input_image_filepath)
+    if middle_image_filepath:
+        dimension_sources.append(middle_image_filepath)
+    if final_image_filepath:
+        dimension_sources.append(final_image_filepath)
+    if dimension_sources:
         try:
+            # Use the first available image to determine dimensions
+            img = Image.open(dimension_sources[0])
             orig_w, orig_h = img.size
             actual_height, actual_width = calculate_new_dimensions(orig_w, orig_h)
         except Exception as e:
+            print(f"Error processing dimension source image: {e}")
             actual_height, actual_width = TARGET_FIXED_SIDE, TARGET_FIXED_SIDE
     else:
         actual_height, actual_width = TARGET_FIXED_SIDE, TARGET_FIXED_SIDE
     # Add initial frame conditioning if provided
     if input_image_filepath:
         try:
             resized_image_path = resize_and_squash_image(input_image_filepath, actual_width, actual_height)
             media_tensor = load_image_to_tensor_with_resize_and_crop(
                 resized_image_path, actual_height, actual_width
             )
             if os.path.exists(resized_image_path):
                 os.remove(resized_image_path)
             media_tensor = torch.nn.functional.pad(media_tensor, padding_values)
             conditioning_items.append(ConditioningItem(media_tensor.to("cuda"), 0, 1.0))
         except Exception as e:
             print(f"Error loading initial image: {e}")
             raise gr.Error(f"Could not load initial image: {e}")
+    # Add middle frame conditioning if provided
+    if middle_image_filepath:
+        try:
+            middle_frame_position = num_frames_padded // 2
+            resized_middle_path = resize_and_squash_image(
+                middle_image_filepath, actual_width, actual_height
+            )
+            middle_media_tensor = load_image_to_tensor_with_resize_and_crop(
+                resized_middle_path, actual_height, actual_width
+            )
+            if os.path.exists(resized_middle_path):
+                os.remove(resized_middle_path)
+            middle_media_tensor = torch.nn.functional.pad(middle_media_tensor, padding_values)
+            conditioning_items.append(ConditioningItem(middle_media_tensor.to("cuda"), middle_frame_position, 1.0))
+        except Exception as e:
+            print(f"Error loading middle image: {e}")
+            raise gr.Error(f"Could not load middle image: {e}")
     # Add final frame conditioning if provided
     if final_image_filepath:
         try:
             resized_final_path = resize_and_squash_image(
                 final_image_filepath, actual_width, actual_height
             )
             final_media_tensor = load_image_to_tensor_with_resize_and_crop(
                 resized_final_path, actual_height, actual_width
             )
             if os.path.exists(resized_final_path):
                 os.remove(resized_final_path)
             final_media_tensor = torch.nn.functional.pad(final_media_tensor, padding_values)
             conditioning_items.append(ConditioningItem(final_media_tensor.to("cuda"), num_frames_padded - 1, 1.0))
         except Exception as e:
     background-color: #f5f5f5;
 }
 """
 with gr.Blocks(css=css) as demo:
     gr.Markdown("# LTX Video Generator")
+    gr.Markdown("Generate videos from images using AI. Provide at least one input image (first frame, middle frame, or last frame) and a prompt.")
     with gr.Row():
         with gr.Column():
             gr.Markdown("### Input Options")
             input_image_input = gr.Image(label="First Frame Image (Optional)", type="filepath", sources=["upload", "webcam", "clipboard"])
+            middle_image_input = gr.Image(label="Middle Frame Image (Optional)", type="filepath", sources=["upload", "webcam", "clipboard"])
             final_image_input = gr.Image(label="Last Frame Image (Optional)", type="filepath", sources=["upload", "webcam", "clipboard"])
             prompt_input = gr.Textbox(label="Prompt", value="The creature from the image starts to move", lines=3)
             duration_input = gr.Slider(
             gr.Markdown("### Output")
             video_output = gr.Textbox(label="Generated Video URL", interactive=False)
             video_preview = gr.Video(label="Video Preview", interactive=False, visible=False)
+    gr.Markdown("**Note:** You must provide at least one input image (first frame, middle frame, or last frame).")
     generate_button.click(
         fn=generate,
+        inputs=[prompt_input, input_image_input, middle_image_input, final_image_input, duration_input],
         outputs=[video_output],
         api_name="generate_video"
     )