pyramid-flow

Paused

multimodalart HF Staff commited on Oct 10, 2024

Commit

b832af5

verified ·

1 Parent(s): 702754c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,6 +15,27 @@ MODEL_REPO = "rain1011/pyramid-flow-sd3"
 MODEL_VARIANT = "diffusion_transformer_768p"
 MODEL_DTYPE = "bf16"
 # Download and load the model
 def load_model():
     if not os.path.exists(MODEL_PATH):
@@ -67,12 +88,13 @@ def generate_video_from_image(image, prompt, duration, video_guidance_scale):
     torch_dtype = torch.bfloat16 if MODEL_DTYPE == "bf16" else torch.float32
     target_size = (1280, 720)
-    image = ImageOps.fit(image, target_size, method=Image.LANCZOS, centering=(0.5, 0.5))
     with torch.no_grad(), torch.cuda.amp.autocast(enabled=True, dtype=torch_dtype):
         frames = model.generate_i2v(
             prompt=prompt,
-            input_image=image,
             num_inference_steps=[10, 10, 10],
             temp=temp,
             guidance_scale=7.0,

 MODEL_VARIANT = "diffusion_transformer_768p"
 MODEL_DTYPE = "bf16"
+def center_crop(image, target_width, target_height):
+    width, height = image.size
+    aspect_ratio_target = target_width / target_height
+    aspect_ratio_image = width / height
+    if aspect_ratio_image > aspect_ratio_target:
+        # Crop the width (left and right)
+        new_width = int(height * aspect_ratio_target)
+        left = (width - new_width) // 2
+        right = left + new_width
+        top, bottom = 0, height
+    else:
+        # Crop the height (top and bottom)
+        new_height = int(width / aspect_ratio_target)
+        top = (height - new_height) // 2
+        bottom = top + new_height
+        left, right = 0, width
+    image = image.crop((left, top, right, bottom))
+    return image
 # Download and load the model
 def load_model():
     if not os.path.exists(MODEL_PATH):
     torch_dtype = torch.bfloat16 if MODEL_DTYPE == "bf16" else torch.float32
     target_size = (1280, 720)
+    cropped_image = center_crop(image, 1280, 720)
+    resized_image = cropped_image.resize((1280, 720))
     with torch.no_grad(), torch.cuda.amp.autocast(enabled=True, dtype=torch_dtype):
         frames = model.generate_i2v(
             prompt=prompt,
+            input_image=resized_image,
             num_inference_steps=[10, 10, 10],
             temp=temp,
             guidance_scale=7.0,