Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -32,14 +32,12 @@ LTX_REPO = "Lightricks/LTX-Video"
|
|
| 32 |
MAX_IMAGE_SIZE = 1440
|
| 33 |
MAX_NUM_FRAMES = 257
|
| 34 |
FPS = 24.0
|
| 35 |
-
|
| 36 |
# Default values
|
| 37 |
DEFAULT_NEGATIVE_PROMPT = "worst quality, inconsistent motion, blurry, jittery, distorted"
|
| 38 |
DEFAULT_GUIDANCE_SCALE = PIPELINE_CONFIG_YAML.get("first_pass", {}).get("guidance_scale", 1.0)
|
| 39 |
DEFAULT_SEED = 42
|
| 40 |
DEFAULT_IMPROVE_TEXTURE = True
|
| 41 |
TARGET_FIXED_SIDE = 768
|
| 42 |
-
|
| 43 |
# Global variables for loaded models
|
| 44 |
pipeline_instance = None
|
| 45 |
latent_upsampler_instance = None
|
|
@@ -72,7 +70,6 @@ def calculate_new_dimensions(orig_w, orig_h):
|
|
| 72 |
"""
|
| 73 |
if orig_w == 0 or orig_h == 0:
|
| 74 |
return TARGET_FIXED_SIDE, TARGET_FIXED_SIDE
|
| 75 |
-
|
| 76 |
# Step 1: Handle dimensions > 1024
|
| 77 |
new_w, new_h = orig_w, orig_h
|
| 78 |
if max(orig_w, orig_h) > MAX_IMAGE_SIZE:
|
|
@@ -80,18 +77,14 @@ def calculate_new_dimensions(orig_w, orig_h):
|
|
| 80 |
scale = MAX_IMAGE_SIZE / max_dim
|
| 81 |
new_w = int(orig_w * scale)
|
| 82 |
new_h = int(orig_h * scale)
|
| 83 |
-
|
| 84 |
# Step 2: Round to nearest multiples of 32
|
| 85 |
def round_to_multiple(x, multiple=32):
|
| 86 |
return round(x / multiple) * multiple
|
| 87 |
-
|
| 88 |
new_w = round_to_multiple(new_w)
|
| 89 |
new_h = round_to_multiple(new_h)
|
| 90 |
-
|
| 91 |
# Step 3: Ensure within bounds
|
| 92 |
new_w = max(256, min(new_w, MAX_IMAGE_SIZE))
|
| 93 |
new_h = max(256, min(new_h, MAX_IMAGE_SIZE))
|
| 94 |
-
|
| 95 |
return new_h, new_w
|
| 96 |
|
| 97 |
def resize_and_squash_image(image_path, target_width, target_height):
|
|
@@ -102,7 +95,6 @@ def resize_and_squash_image(image_path, target_width, target_height):
|
|
| 102 |
img = Image.open(image_path)
|
| 103 |
# Resize to exact dimensions, possibly distorting aspect ratio
|
| 104 |
img = img.resize((target_width, target_height), Image.LANCZOS)
|
| 105 |
-
|
| 106 |
# Save to temporary file
|
| 107 |
temp_path = os.path.join(tempfile.gettempdir(), f"resized_{os.path.basename(image_path)}")
|
| 108 |
img.save(temp_path)
|
|
@@ -150,13 +142,14 @@ def initialize_models():
|
|
| 150 |
latent_upsampler_instance.to(target_inference_device)
|
| 151 |
|
| 152 |
@spaces.GPU(duration=60)
|
| 153 |
-
def generate(prompt, input_image_url=None, final_image_url=None, duration_ui=2, progress=gr.Progress(track_tqdm=True)):
|
| 154 |
"""Generate video from image(s) and prompt"""
|
| 155 |
# Validate input - at least one image must be provided
|
| 156 |
-
if input_image_url is None and final_image_url is None:
|
| 157 |
-
raise gr.Error("Please provide at least one input image (
|
| 158 |
|
| 159 |
input_image_filepath = input_image_url
|
|
|
|
| 160 |
final_image_filepath = final_image_url
|
| 161 |
|
| 162 |
# Set default values
|
|
@@ -168,7 +161,6 @@ def generate(prompt, input_image_url=None, final_image_url=None, duration_ui=2,
|
|
| 168 |
|
| 169 |
if randomize_seed:
|
| 170 |
seed_ui = random.randint(0, 2**32 - 1)
|
| 171 |
-
|
| 172 |
seed_everething(int(seed_ui))
|
| 173 |
|
| 174 |
# Calculate target frames
|
|
@@ -178,30 +170,22 @@ def generate(prompt, input_image_url=None, final_image_url=None, duration_ui=2,
|
|
| 178 |
actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(n_val * 8 + 1)))
|
| 179 |
|
| 180 |
# Calculate dimensions based on the provided image(s)
|
|
|
|
| 181 |
if input_image_filepath:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
try:
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
actual_height, actual_width = calculate_new_dimensions(orig_w, orig_h)
|
| 186 |
-
except Exception as e:
|
| 187 |
-
print(f"Error processing input image: {e}")
|
| 188 |
-
if final_image_filepath:
|
| 189 |
-
try:
|
| 190 |
-
img = Image.open(final_image_filepath)
|
| 191 |
-
orig_w, orig_h = img.size
|
| 192 |
-
actual_height, actual_width = calculate_new_dimensions(orig_w, orig_h)
|
| 193 |
-
except Exception as e:
|
| 194 |
-
print(f"Error processing final image: {e}")
|
| 195 |
-
actual_height, actual_width = TARGET_FIXED_SIDE, TARGET_FIXED_SIDE
|
| 196 |
-
else:
|
| 197 |
-
actual_height, actual_width = TARGET_FIXED_SIDE, TARGET_FIXED_SIDE
|
| 198 |
-
elif final_image_filepath:
|
| 199 |
-
try:
|
| 200 |
-
img = Image.open(final_image_filepath)
|
| 201 |
orig_w, orig_h = img.size
|
| 202 |
actual_height, actual_width = calculate_new_dimensions(orig_w, orig_h)
|
| 203 |
except Exception as e:
|
| 204 |
-
print(f"Error processing
|
| 205 |
actual_height, actual_width = TARGET_FIXED_SIDE, TARGET_FIXED_SIDE
|
| 206 |
else:
|
| 207 |
actual_height, actual_width = TARGET_FIXED_SIDE, TARGET_FIXED_SIDE
|
|
@@ -252,43 +236,47 @@ def generate(prompt, input_image_url=None, final_image_url=None, duration_ui=2,
|
|
| 252 |
# Add initial frame conditioning if provided
|
| 253 |
if input_image_filepath:
|
| 254 |
try:
|
| 255 |
-
# First resize and squash the image to the exact dimensions we want
|
| 256 |
resized_image_path = resize_and_squash_image(input_image_filepath, actual_width, actual_height)
|
| 257 |
-
|
| 258 |
-
# Now load this pre-resized image with load_image_to_tensor_with_resize_and_crop
|
| 259 |
-
# Since it's already the correct size, the "crop" part will be a no-op
|
| 260 |
media_tensor = load_image_to_tensor_with_resize_and_crop(
|
| 261 |
resized_image_path, actual_height, actual_width
|
| 262 |
)
|
| 263 |
-
|
| 264 |
-
# Clean up temporary file
|
| 265 |
if os.path.exists(resized_image_path):
|
| 266 |
os.remove(resized_image_path)
|
| 267 |
-
|
| 268 |
media_tensor = torch.nn.functional.pad(media_tensor, padding_values)
|
| 269 |
conditioning_items.append(ConditioningItem(media_tensor.to("cuda"), 0, 1.0))
|
| 270 |
except Exception as e:
|
| 271 |
print(f"Error loading initial image: {e}")
|
| 272 |
raise gr.Error(f"Could not load initial image: {e}")
|
| 273 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
# Add final frame conditioning if provided
|
| 275 |
if final_image_filepath:
|
| 276 |
try:
|
| 277 |
-
# First resize and squash the final image to match the initial image dimensions
|
| 278 |
resized_final_path = resize_and_squash_image(
|
| 279 |
final_image_filepath, actual_width, actual_height
|
| 280 |
)
|
| 281 |
-
|
| 282 |
-
# Now load this pre-resized image with load_image_to_tensor_with_resize_and_crop
|
| 283 |
-
# Since it's already the correct size, the "crop" part will be a no-op
|
| 284 |
final_media_tensor = load_image_to_tensor_with_resize_and_crop(
|
| 285 |
resized_final_path, actual_height, actual_width
|
| 286 |
)
|
| 287 |
-
|
| 288 |
-
# Clean up temporary file
|
| 289 |
if os.path.exists(resized_final_path):
|
| 290 |
os.remove(resized_final_path)
|
| 291 |
-
|
| 292 |
final_media_tensor = torch.nn.functional.pad(final_media_tensor, padding_values)
|
| 293 |
conditioning_items.append(ConditioningItem(final_media_tensor.to("cuda"), num_frames_padded - 1, 1.0))
|
| 294 |
except Exception as e:
|
|
@@ -380,14 +368,14 @@ css = """
|
|
| 380 |
background-color: #f5f5f5;
|
| 381 |
}
|
| 382 |
"""
|
| 383 |
-
|
| 384 |
with gr.Blocks(css=css) as demo:
|
| 385 |
gr.Markdown("# LTX Video Generator")
|
| 386 |
-
gr.Markdown("Generate videos from images using AI. Provide at least one input image (first frame or last frame) and a prompt.")
|
| 387 |
with gr.Row():
|
| 388 |
with gr.Column():
|
| 389 |
gr.Markdown("### Input Options")
|
| 390 |
input_image_input = gr.Image(label="First Frame Image (Optional)", type="filepath", sources=["upload", "webcam", "clipboard"])
|
|
|
|
| 391 |
final_image_input = gr.Image(label="Last Frame Image (Optional)", type="filepath", sources=["upload", "webcam", "clipboard"])
|
| 392 |
prompt_input = gr.Textbox(label="Prompt", value="The creature from the image starts to move", lines=3)
|
| 393 |
duration_input = gr.Slider(
|
|
@@ -403,11 +391,11 @@ with gr.Blocks(css=css) as demo:
|
|
| 403 |
gr.Markdown("### Output")
|
| 404 |
video_output = gr.Textbox(label="Generated Video URL", interactive=False)
|
| 405 |
video_preview = gr.Video(label="Video Preview", interactive=False, visible=False)
|
|
|
|
| 406 |
|
| 407 |
-
gr.Markdown("**Note:** You must provide at least one input image (either first frame or last frame).")
|
| 408 |
generate_button.click(
|
| 409 |
fn=generate,
|
| 410 |
-
inputs=[prompt_input, input_image_input, final_image_input, duration_input],
|
| 411 |
outputs=[video_output],
|
| 412 |
api_name="generate_video"
|
| 413 |
)
|
|
|
|
| 32 |
MAX_IMAGE_SIZE = 1440
|
| 33 |
MAX_NUM_FRAMES = 257
|
| 34 |
FPS = 24.0
|
|
|
|
| 35 |
# Default values
|
| 36 |
DEFAULT_NEGATIVE_PROMPT = "worst quality, inconsistent motion, blurry, jittery, distorted"
|
| 37 |
DEFAULT_GUIDANCE_SCALE = PIPELINE_CONFIG_YAML.get("first_pass", {}).get("guidance_scale", 1.0)
|
| 38 |
DEFAULT_SEED = 42
|
| 39 |
DEFAULT_IMPROVE_TEXTURE = True
|
| 40 |
TARGET_FIXED_SIDE = 768
|
|
|
|
| 41 |
# Global variables for loaded models
|
| 42 |
pipeline_instance = None
|
| 43 |
latent_upsampler_instance = None
|
|
|
|
| 70 |
"""
|
| 71 |
if orig_w == 0 or orig_h == 0:
|
| 72 |
return TARGET_FIXED_SIDE, TARGET_FIXED_SIDE
|
|
|
|
| 73 |
# Step 1: Handle dimensions > 1024
|
| 74 |
new_w, new_h = orig_w, orig_h
|
| 75 |
if max(orig_w, orig_h) > MAX_IMAGE_SIZE:
|
|
|
|
| 77 |
scale = MAX_IMAGE_SIZE / max_dim
|
| 78 |
new_w = int(orig_w * scale)
|
| 79 |
new_h = int(orig_h * scale)
|
|
|
|
| 80 |
# Step 2: Round to nearest multiples of 32
|
| 81 |
def round_to_multiple(x, multiple=32):
|
| 82 |
return round(x / multiple) * multiple
|
|
|
|
| 83 |
new_w = round_to_multiple(new_w)
|
| 84 |
new_h = round_to_multiple(new_h)
|
|
|
|
| 85 |
# Step 3: Ensure within bounds
|
| 86 |
new_w = max(256, min(new_w, MAX_IMAGE_SIZE))
|
| 87 |
new_h = max(256, min(new_h, MAX_IMAGE_SIZE))
|
|
|
|
| 88 |
return new_h, new_w
|
| 89 |
|
| 90 |
def resize_and_squash_image(image_path, target_width, target_height):
|
|
|
|
| 95 |
img = Image.open(image_path)
|
| 96 |
# Resize to exact dimensions, possibly distorting aspect ratio
|
| 97 |
img = img.resize((target_width, target_height), Image.LANCZOS)
|
|
|
|
| 98 |
# Save to temporary file
|
| 99 |
temp_path = os.path.join(tempfile.gettempdir(), f"resized_{os.path.basename(image_path)}")
|
| 100 |
img.save(temp_path)
|
|
|
|
| 142 |
latent_upsampler_instance.to(target_inference_device)
|
| 143 |
|
| 144 |
@spaces.GPU(duration=60)
|
| 145 |
+
def generate(prompt, input_image_url=None, middle_image_url=None, final_image_url=None, duration_ui=2, progress=gr.Progress(track_tqdm=True)):
|
| 146 |
"""Generate video from image(s) and prompt"""
|
| 147 |
# Validate input - at least one image must be provided
|
| 148 |
+
if input_image_url is None and final_image_url is None and middle_image_url is None:
|
| 149 |
+
raise gr.Error("Please provide at least one input image (first frame, middle frame, or last frame)")
|
| 150 |
|
| 151 |
input_image_filepath = input_image_url
|
| 152 |
+
middle_image_filepath = middle_image_url
|
| 153 |
final_image_filepath = final_image_url
|
| 154 |
|
| 155 |
# Set default values
|
|
|
|
| 161 |
|
| 162 |
if randomize_seed:
|
| 163 |
seed_ui = random.randint(0, 2**32 - 1)
|
|
|
|
| 164 |
seed_everething(int(seed_ui))
|
| 165 |
|
| 166 |
# Calculate target frames
|
|
|
|
| 170 |
actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(n_val * 8 + 1)))
|
| 171 |
|
| 172 |
# Calculate dimensions based on the provided image(s)
|
| 173 |
+
dimension_sources = []
|
| 174 |
if input_image_filepath:
|
| 175 |
+
dimension_sources.append(input_image_filepath)
|
| 176 |
+
if middle_image_filepath:
|
| 177 |
+
dimension_sources.append(middle_image_filepath)
|
| 178 |
+
if final_image_filepath:
|
| 179 |
+
dimension_sources.append(final_image_filepath)
|
| 180 |
+
|
| 181 |
+
if dimension_sources:
|
| 182 |
try:
|
| 183 |
+
# Use the first available image to determine dimensions
|
| 184 |
+
img = Image.open(dimension_sources[0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
orig_w, orig_h = img.size
|
| 186 |
actual_height, actual_width = calculate_new_dimensions(orig_w, orig_h)
|
| 187 |
except Exception as e:
|
| 188 |
+
print(f"Error processing dimension source image: {e}")
|
| 189 |
actual_height, actual_width = TARGET_FIXED_SIDE, TARGET_FIXED_SIDE
|
| 190 |
else:
|
| 191 |
actual_height, actual_width = TARGET_FIXED_SIDE, TARGET_FIXED_SIDE
|
|
|
|
| 236 |
# Add initial frame conditioning if provided
|
| 237 |
if input_image_filepath:
|
| 238 |
try:
|
|
|
|
| 239 |
resized_image_path = resize_and_squash_image(input_image_filepath, actual_width, actual_height)
|
|
|
|
|
|
|
|
|
|
| 240 |
media_tensor = load_image_to_tensor_with_resize_and_crop(
|
| 241 |
resized_image_path, actual_height, actual_width
|
| 242 |
)
|
|
|
|
|
|
|
| 243 |
if os.path.exists(resized_image_path):
|
| 244 |
os.remove(resized_image_path)
|
|
|
|
| 245 |
media_tensor = torch.nn.functional.pad(media_tensor, padding_values)
|
| 246 |
conditioning_items.append(ConditioningItem(media_tensor.to("cuda"), 0, 1.0))
|
| 247 |
except Exception as e:
|
| 248 |
print(f"Error loading initial image: {e}")
|
| 249 |
raise gr.Error(f"Could not load initial image: {e}")
|
| 250 |
|
| 251 |
+
# Add middle frame conditioning if provided
|
| 252 |
+
if middle_image_filepath:
|
| 253 |
+
try:
|
| 254 |
+
middle_frame_position = num_frames_padded // 2
|
| 255 |
+
resized_middle_path = resize_and_squash_image(
|
| 256 |
+
middle_image_filepath, actual_width, actual_height
|
| 257 |
+
)
|
| 258 |
+
middle_media_tensor = load_image_to_tensor_with_resize_and_crop(
|
| 259 |
+
resized_middle_path, actual_height, actual_width
|
| 260 |
+
)
|
| 261 |
+
if os.path.exists(resized_middle_path):
|
| 262 |
+
os.remove(resized_middle_path)
|
| 263 |
+
middle_media_tensor = torch.nn.functional.pad(middle_media_tensor, padding_values)
|
| 264 |
+
conditioning_items.append(ConditioningItem(middle_media_tensor.to("cuda"), middle_frame_position, 1.0))
|
| 265 |
+
except Exception as e:
|
| 266 |
+
print(f"Error loading middle image: {e}")
|
| 267 |
+
raise gr.Error(f"Could not load middle image: {e}")
|
| 268 |
+
|
| 269 |
# Add final frame conditioning if provided
|
| 270 |
if final_image_filepath:
|
| 271 |
try:
|
|
|
|
| 272 |
resized_final_path = resize_and_squash_image(
|
| 273 |
final_image_filepath, actual_width, actual_height
|
| 274 |
)
|
|
|
|
|
|
|
|
|
|
| 275 |
final_media_tensor = load_image_to_tensor_with_resize_and_crop(
|
| 276 |
resized_final_path, actual_height, actual_width
|
| 277 |
)
|
|
|
|
|
|
|
| 278 |
if os.path.exists(resized_final_path):
|
| 279 |
os.remove(resized_final_path)
|
|
|
|
| 280 |
final_media_tensor = torch.nn.functional.pad(final_media_tensor, padding_values)
|
| 281 |
conditioning_items.append(ConditioningItem(final_media_tensor.to("cuda"), num_frames_padded - 1, 1.0))
|
| 282 |
except Exception as e:
|
|
|
|
| 368 |
background-color: #f5f5f5;
|
| 369 |
}
|
| 370 |
"""
|
|
|
|
| 371 |
with gr.Blocks(css=css) as demo:
|
| 372 |
gr.Markdown("# LTX Video Generator")
|
| 373 |
+
gr.Markdown("Generate videos from images using AI. Provide at least one input image (first frame, middle frame, or last frame) and a prompt.")
|
| 374 |
with gr.Row():
|
| 375 |
with gr.Column():
|
| 376 |
gr.Markdown("### Input Options")
|
| 377 |
input_image_input = gr.Image(label="First Frame Image (Optional)", type="filepath", sources=["upload", "webcam", "clipboard"])
|
| 378 |
+
middle_image_input = gr.Image(label="Middle Frame Image (Optional)", type="filepath", sources=["upload", "webcam", "clipboard"])
|
| 379 |
final_image_input = gr.Image(label="Last Frame Image (Optional)", type="filepath", sources=["upload", "webcam", "clipboard"])
|
| 380 |
prompt_input = gr.Textbox(label="Prompt", value="The creature from the image starts to move", lines=3)
|
| 381 |
duration_input = gr.Slider(
|
|
|
|
| 391 |
gr.Markdown("### Output")
|
| 392 |
video_output = gr.Textbox(label="Generated Video URL", interactive=False)
|
| 393 |
video_preview = gr.Video(label="Video Preview", interactive=False, visible=False)
|
| 394 |
+
gr.Markdown("**Note:** You must provide at least one input image (first frame, middle frame, or last frame).")
|
| 395 |
|
|
|
|
| 396 |
generate_button.click(
|
| 397 |
fn=generate,
|
| 398 |
+
inputs=[prompt_input, input_image_input, middle_image_input, final_image_input, duration_input],
|
| 399 |
outputs=[video_output],
|
| 400 |
api_name="generate_video"
|
| 401 |
)
|