ltx-video-distilled

Running on Zero

App Files Files Community

multimodalart HF Staff commited on May 16

Commit

190cbef

verified ·

1 Parent(s): 7d1232d

update-inference (#4)

Browse files

- Update inference.py (7b9f053b980d37b4af43b808e6cfada81caee869)
- Update app.py (9742f923e6f1247a0c029af3dce3613733f5530c)

Files changed (2) hide show

app.py +1 -1
inference.py +11 -15

app.py CHANGED Viewed

@@ -346,7 +346,7 @@ with gr.Blocks(css=css) as demo:
         with gr.Column():
             with gr.Tab("image-to-video") as image_tab:
                 video_i_hidden = gr.Textbox(label="video_i", visible=False, value=None)
-                image_i2v = gr.Image(label="Input Image", type="filepath", sources=["upload", "webcam"])
                 i2v_prompt = gr.Textbox(label="Prompt", value="The creature from the image starts to move", lines=3)
                 i2v_button = gr.Button("Generate Image-to-Video", variant="primary")
             with gr.Tab("text-to-video") as text_tab:

         with gr.Column():
             with gr.Tab("image-to-video") as image_tab:
                 video_i_hidden = gr.Textbox(label="video_i", visible=False, value=None)
+                image_i2v = gr.Image(label="Input Image", type="filepath", sources=["upload", "webcam", "clipboard"])
                 i2v_prompt = gr.Textbox(label="Prompt", value="The creature from the image starts to move", lines=3)
                 i2v_button = gr.Button("Generate Image-to-Video", variant="primary")
             with gr.Tab("text-to-video") as text_tab:

inference.py CHANGED Viewed

@@ -11,6 +11,7 @@ import imageio
 import json
 import numpy as np
 import torch
 from safetensors import safe_open
 from PIL import Image
 from transformers import (
@@ -35,6 +36,7 @@ from ltx_video.pipelines.pipeline_ltx_video import (
 from ltx_video.schedulers.rf import RectifiedFlowScheduler
 from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
 from ltx_video.models.autoencoders.latent_upsampler import LatentUpsampler
 MAX_HEIGHT = 720
 MAX_WIDTH = 1280
@@ -96,7 +98,12 @@ def load_image_to_tensor_with_resize_and_crop(
     image = image.crop((x_start, y_start, x_start + new_width, y_start + new_height))
     if not just_crop:
         image = image.resize((target_width, target_height))
-    frame_tensor = torch.tensor(np.array(image)).permute(2, 0, 1).float()
     frame_tensor = (frame_tensor / 127.5) - 1.0
     # Create 5D tensor: (batch_size=1, channels=3, num_frames=1, height, width)
     return frame_tensor.unsqueeze(0).unsqueeze(2)
@@ -266,13 +273,6 @@ def main():
         help="Path to the input video (or imaage) to be modified using the video-to-video pipeline",
     )
-    parser.add_argument(
-        "--strength",
-        type=float,
-        default=1.0,
-        help="Editing strength (noising level) for video-to-video pipeline.",
-    )
     # Conditioning arguments
     parser.add_argument(
         "--conditioning_media_paths",
@@ -407,7 +407,6 @@ def infer(
     negative_prompt: str,
     offload_to_cpu: bool,
     input_media_path: Optional[str] = None,
-    strength: Optional[float] = 1.0,
     conditioning_media_paths: Optional[List[str]] = None,
     conditioning_strengths: Optional[List[float]] = None,
     conditioning_start_frames: Optional[List[int]] = None,
@@ -422,12 +421,10 @@ def infer(
     models_dir = "MODEL_DIR"
-    #ltxv_model_name_or_path = pipeline_config["checkpoint_path"]
-    ltxv_model_name_or_path = "ltxv-13b-0.9.7-distilled-rc3.safetensors"
     if not os.path.isfile(ltxv_model_name_or_path):
         ltxv_model_path = hf_hub_download(
-            repo_id="LTX-Colab/LTX-Video-Preview",
-            #repo_id="Lightricks/LTX-Video",
             filename=ltxv_model_name_or_path,
             local_dir=models_dir,
             repo_type="model",
@@ -616,7 +613,6 @@ def infer(
         frame_rate=frame_rate,
         **sample,
         media_items=media_item,
-        strength=strength,
         conditioning_items=conditioning_items,
         is_video=True,
         vae_per_channel_normalize=True,
@@ -775,4 +771,4 @@ def load_media_file(
 if __name__ == "__main__":
-    main()

 import json
 import numpy as np
 import torch
+import cv2
 from safetensors import safe_open
 from PIL import Image
 from transformers import (
 from ltx_video.schedulers.rf import RectifiedFlowScheduler
 from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
 from ltx_video.models.autoencoders.latent_upsampler import LatentUpsampler
+import ltx_video.pipelines.crf_compressor as crf_compressor
 MAX_HEIGHT = 720
 MAX_WIDTH = 1280
     image = image.crop((x_start, y_start, x_start + new_width, y_start + new_height))
     if not just_crop:
         image = image.resize((target_width, target_height))
+    image = np.array(image)
+    image = cv2.GaussianBlur(image, (3, 3), 0)
+    frame_tensor = torch.from_numpy(image).float()
+    frame_tensor = crf_compressor.compress(frame_tensor / 255.0) * 255.0
+    frame_tensor = frame_tensor.permute(2, 0, 1)
     frame_tensor = (frame_tensor / 127.5) - 1.0
     # Create 5D tensor: (batch_size=1, channels=3, num_frames=1, height, width)
     return frame_tensor.unsqueeze(0).unsqueeze(2)
         help="Path to the input video (or imaage) to be modified using the video-to-video pipeline",
     )
     # Conditioning arguments
     parser.add_argument(
         "--conditioning_media_paths",
     negative_prompt: str,
     offload_to_cpu: bool,
     input_media_path: Optional[str] = None,
     conditioning_media_paths: Optional[List[str]] = None,
     conditioning_strengths: Optional[List[float]] = None,
     conditioning_start_frames: Optional[List[int]] = None,
     models_dir = "MODEL_DIR"
+    ltxv_model_name_or_path = pipeline_config["checkpoint_path"]
     if not os.path.isfile(ltxv_model_name_or_path):
         ltxv_model_path = hf_hub_download(
+            repo_id="Lightricks/LTX-Video",
             filename=ltxv_model_name_or_path,
             local_dir=models_dir,
             repo_type="model",
         frame_rate=frame_rate,
         **sample,
         media_items=media_item,
         conditioning_items=conditioning_items,
         is_video=True,
         vae_per_channel_normalize=True,
 if __name__ == "__main__":
+    main()