KingNish commited on
Commit
2942983
·
verified ·
1 Parent(s): f1f0f08

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -49
app.py CHANGED
@@ -32,14 +32,12 @@ LTX_REPO = "Lightricks/LTX-Video"
32
  MAX_IMAGE_SIZE = 1440
33
  MAX_NUM_FRAMES = 257
34
  FPS = 24.0
35
-
36
  # Default values
37
  DEFAULT_NEGATIVE_PROMPT = "worst quality, inconsistent motion, blurry, jittery, distorted"
38
  DEFAULT_GUIDANCE_SCALE = PIPELINE_CONFIG_YAML.get("first_pass", {}).get("guidance_scale", 1.0)
39
  DEFAULT_SEED = 42
40
  DEFAULT_IMPROVE_TEXTURE = True
41
  TARGET_FIXED_SIDE = 768
42
-
43
  # Global variables for loaded models
44
  pipeline_instance = None
45
  latent_upsampler_instance = None
@@ -72,7 +70,6 @@ def calculate_new_dimensions(orig_w, orig_h):
72
  """
73
  if orig_w == 0 or orig_h == 0:
74
  return TARGET_FIXED_SIDE, TARGET_FIXED_SIDE
75
-
76
  # Step 1: Handle dimensions > 1024
77
  new_w, new_h = orig_w, orig_h
78
  if max(orig_w, orig_h) > MAX_IMAGE_SIZE:
@@ -80,18 +77,14 @@ def calculate_new_dimensions(orig_w, orig_h):
80
  scale = MAX_IMAGE_SIZE / max_dim
81
  new_w = int(orig_w * scale)
82
  new_h = int(orig_h * scale)
83
-
84
  # Step 2: Round to nearest multiples of 32
85
  def round_to_multiple(x, multiple=32):
86
  return round(x / multiple) * multiple
87
-
88
  new_w = round_to_multiple(new_w)
89
  new_h = round_to_multiple(new_h)
90
-
91
  # Step 3: Ensure within bounds
92
  new_w = max(256, min(new_w, MAX_IMAGE_SIZE))
93
  new_h = max(256, min(new_h, MAX_IMAGE_SIZE))
94
-
95
  return new_h, new_w
96
 
97
  def resize_and_squash_image(image_path, target_width, target_height):
@@ -102,7 +95,6 @@ def resize_and_squash_image(image_path, target_width, target_height):
102
  img = Image.open(image_path)
103
  # Resize to exact dimensions, possibly distorting aspect ratio
104
  img = img.resize((target_width, target_height), Image.LANCZOS)
105
-
106
  # Save to temporary file
107
  temp_path = os.path.join(tempfile.gettempdir(), f"resized_{os.path.basename(image_path)}")
108
  img.save(temp_path)
@@ -150,13 +142,14 @@ def initialize_models():
150
  latent_upsampler_instance.to(target_inference_device)
151
 
152
  @spaces.GPU(duration=60)
153
- def generate(prompt, input_image_url=None, final_image_url=None, duration_ui=2, progress=gr.Progress(track_tqdm=True)):
154
  """Generate video from image(s) and prompt"""
155
  # Validate input - at least one image must be provided
156
- if input_image_url is None and final_image_url is None:
157
- raise gr.Error("Please provide at least one input image (either first frame or last frame)")
158
 
159
  input_image_filepath = input_image_url
 
160
  final_image_filepath = final_image_url
161
 
162
  # Set default values
@@ -168,7 +161,6 @@ def generate(prompt, input_image_url=None, final_image_url=None, duration_ui=2,
168
 
169
  if randomize_seed:
170
  seed_ui = random.randint(0, 2**32 - 1)
171
-
172
  seed_everething(int(seed_ui))
173
 
174
  # Calculate target frames
@@ -178,30 +170,22 @@ def generate(prompt, input_image_url=None, final_image_url=None, duration_ui=2,
178
  actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(n_val * 8 + 1)))
179
 
180
  # Calculate dimensions based on the provided image(s)
 
181
  if input_image_filepath:
 
 
 
 
 
 
 
182
  try:
183
- img = Image.open(input_image_filepath)
184
- orig_w, orig_h = img.size
185
- actual_height, actual_width = calculate_new_dimensions(orig_w, orig_h)
186
- except Exception as e:
187
- print(f"Error processing input image: {e}")
188
- if final_image_filepath:
189
- try:
190
- img = Image.open(final_image_filepath)
191
- orig_w, orig_h = img.size
192
- actual_height, actual_width = calculate_new_dimensions(orig_w, orig_h)
193
- except Exception as e:
194
- print(f"Error processing final image: {e}")
195
- actual_height, actual_width = TARGET_FIXED_SIDE, TARGET_FIXED_SIDE
196
- else:
197
- actual_height, actual_width = TARGET_FIXED_SIDE, TARGET_FIXED_SIDE
198
- elif final_image_filepath:
199
- try:
200
- img = Image.open(final_image_filepath)
201
  orig_w, orig_h = img.size
202
  actual_height, actual_width = calculate_new_dimensions(orig_w, orig_h)
203
  except Exception as e:
204
- print(f"Error processing final image: {e}")
205
  actual_height, actual_width = TARGET_FIXED_SIDE, TARGET_FIXED_SIDE
206
  else:
207
  actual_height, actual_width = TARGET_FIXED_SIDE, TARGET_FIXED_SIDE
@@ -252,43 +236,47 @@ def generate(prompt, input_image_url=None, final_image_url=None, duration_ui=2,
252
  # Add initial frame conditioning if provided
253
  if input_image_filepath:
254
  try:
255
- # First resize and squash the image to the exact dimensions we want
256
  resized_image_path = resize_and_squash_image(input_image_filepath, actual_width, actual_height)
257
-
258
- # Now load this pre-resized image with load_image_to_tensor_with_resize_and_crop
259
- # Since it's already the correct size, the "crop" part will be a no-op
260
  media_tensor = load_image_to_tensor_with_resize_and_crop(
261
  resized_image_path, actual_height, actual_width
262
  )
263
-
264
- # Clean up temporary file
265
  if os.path.exists(resized_image_path):
266
  os.remove(resized_image_path)
267
-
268
  media_tensor = torch.nn.functional.pad(media_tensor, padding_values)
269
  conditioning_items.append(ConditioningItem(media_tensor.to("cuda"), 0, 1.0))
270
  except Exception as e:
271
  print(f"Error loading initial image: {e}")
272
  raise gr.Error(f"Could not load initial image: {e}")
273
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  # Add final frame conditioning if provided
275
  if final_image_filepath:
276
  try:
277
- # First resize and squash the final image to match the initial image dimensions
278
  resized_final_path = resize_and_squash_image(
279
  final_image_filepath, actual_width, actual_height
280
  )
281
-
282
- # Now load this pre-resized image with load_image_to_tensor_with_resize_and_crop
283
- # Since it's already the correct size, the "crop" part will be a no-op
284
  final_media_tensor = load_image_to_tensor_with_resize_and_crop(
285
  resized_final_path, actual_height, actual_width
286
  )
287
-
288
- # Clean up temporary file
289
  if os.path.exists(resized_final_path):
290
  os.remove(resized_final_path)
291
-
292
  final_media_tensor = torch.nn.functional.pad(final_media_tensor, padding_values)
293
  conditioning_items.append(ConditioningItem(final_media_tensor.to("cuda"), num_frames_padded - 1, 1.0))
294
  except Exception as e:
@@ -380,14 +368,14 @@ css = """
380
  background-color: #f5f5f5;
381
  }
382
  """
383
-
384
  with gr.Blocks(css=css) as demo:
385
  gr.Markdown("# LTX Video Generator")
386
- gr.Markdown("Generate videos from images using AI. Provide at least one input image (first frame or last frame) and a prompt.")
387
  with gr.Row():
388
  with gr.Column():
389
  gr.Markdown("### Input Options")
390
  input_image_input = gr.Image(label="First Frame Image (Optional)", type="filepath", sources=["upload", "webcam", "clipboard"])
 
391
  final_image_input = gr.Image(label="Last Frame Image (Optional)", type="filepath", sources=["upload", "webcam", "clipboard"])
392
  prompt_input = gr.Textbox(label="Prompt", value="The creature from the image starts to move", lines=3)
393
  duration_input = gr.Slider(
@@ -403,11 +391,11 @@ with gr.Blocks(css=css) as demo:
403
  gr.Markdown("### Output")
404
  video_output = gr.Textbox(label="Generated Video URL", interactive=False)
405
  video_preview = gr.Video(label="Video Preview", interactive=False, visible=False)
 
406
 
407
- gr.Markdown("**Note:** You must provide at least one input image (either first frame or last frame).")
408
  generate_button.click(
409
  fn=generate,
410
- inputs=[prompt_input, input_image_input, final_image_input, duration_input],
411
  outputs=[video_output],
412
  api_name="generate_video"
413
  )
 
32
  MAX_IMAGE_SIZE = 1440
33
  MAX_NUM_FRAMES = 257
34
  FPS = 24.0
 
35
  # Default values
36
  DEFAULT_NEGATIVE_PROMPT = "worst quality, inconsistent motion, blurry, jittery, distorted"
37
  DEFAULT_GUIDANCE_SCALE = PIPELINE_CONFIG_YAML.get("first_pass", {}).get("guidance_scale", 1.0)
38
  DEFAULT_SEED = 42
39
  DEFAULT_IMPROVE_TEXTURE = True
40
  TARGET_FIXED_SIDE = 768
 
41
  # Global variables for loaded models
42
  pipeline_instance = None
43
  latent_upsampler_instance = None
 
70
  """
71
  if orig_w == 0 or orig_h == 0:
72
  return TARGET_FIXED_SIDE, TARGET_FIXED_SIDE
 
73
  # Step 1: Handle dimensions > 1024
74
  new_w, new_h = orig_w, orig_h
75
  if max(orig_w, orig_h) > MAX_IMAGE_SIZE:
 
77
  scale = MAX_IMAGE_SIZE / max_dim
78
  new_w = int(orig_w * scale)
79
  new_h = int(orig_h * scale)
 
80
  # Step 2: Round to nearest multiples of 32
81
  def round_to_multiple(x, multiple=32):
82
  return round(x / multiple) * multiple
 
83
  new_w = round_to_multiple(new_w)
84
  new_h = round_to_multiple(new_h)
 
85
  # Step 3: Ensure within bounds
86
  new_w = max(256, min(new_w, MAX_IMAGE_SIZE))
87
  new_h = max(256, min(new_h, MAX_IMAGE_SIZE))
 
88
  return new_h, new_w
89
 
90
  def resize_and_squash_image(image_path, target_width, target_height):
 
95
  img = Image.open(image_path)
96
  # Resize to exact dimensions, possibly distorting aspect ratio
97
  img = img.resize((target_width, target_height), Image.LANCZOS)
 
98
  # Save to temporary file
99
  temp_path = os.path.join(tempfile.gettempdir(), f"resized_{os.path.basename(image_path)}")
100
  img.save(temp_path)
 
142
  latent_upsampler_instance.to(target_inference_device)
143
 
144
  @spaces.GPU(duration=60)
145
+ def generate(prompt, input_image_url=None, middle_image_url=None, final_image_url=None, duration_ui=2, progress=gr.Progress(track_tqdm=True)):
146
  """Generate video from image(s) and prompt"""
147
  # Validate input - at least one image must be provided
148
+ if input_image_url is None and final_image_url is None and middle_image_url is None:
149
+ raise gr.Error("Please provide at least one input image (first frame, middle frame, or last frame)")
150
 
151
  input_image_filepath = input_image_url
152
+ middle_image_filepath = middle_image_url
153
  final_image_filepath = final_image_url
154
 
155
  # Set default values
 
161
 
162
  if randomize_seed:
163
  seed_ui = random.randint(0, 2**32 - 1)
 
164
  seed_everething(int(seed_ui))
165
 
166
  # Calculate target frames
 
170
  actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(n_val * 8 + 1)))
171
 
172
  # Calculate dimensions based on the provided image(s)
173
+ dimension_sources = []
174
  if input_image_filepath:
175
+ dimension_sources.append(input_image_filepath)
176
+ if middle_image_filepath:
177
+ dimension_sources.append(middle_image_filepath)
178
+ if final_image_filepath:
179
+ dimension_sources.append(final_image_filepath)
180
+
181
+ if dimension_sources:
182
  try:
183
+ # Use the first available image to determine dimensions
184
+ img = Image.open(dimension_sources[0])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  orig_w, orig_h = img.size
186
  actual_height, actual_width = calculate_new_dimensions(orig_w, orig_h)
187
  except Exception as e:
188
+ print(f"Error processing dimension source image: {e}")
189
  actual_height, actual_width = TARGET_FIXED_SIDE, TARGET_FIXED_SIDE
190
  else:
191
  actual_height, actual_width = TARGET_FIXED_SIDE, TARGET_FIXED_SIDE
 
236
  # Add initial frame conditioning if provided
237
  if input_image_filepath:
238
  try:
 
239
  resized_image_path = resize_and_squash_image(input_image_filepath, actual_width, actual_height)
 
 
 
240
  media_tensor = load_image_to_tensor_with_resize_and_crop(
241
  resized_image_path, actual_height, actual_width
242
  )
 
 
243
  if os.path.exists(resized_image_path):
244
  os.remove(resized_image_path)
 
245
  media_tensor = torch.nn.functional.pad(media_tensor, padding_values)
246
  conditioning_items.append(ConditioningItem(media_tensor.to("cuda"), 0, 1.0))
247
  except Exception as e:
248
  print(f"Error loading initial image: {e}")
249
  raise gr.Error(f"Could not load initial image: {e}")
250
 
251
+ # Add middle frame conditioning if provided
252
+ if middle_image_filepath:
253
+ try:
254
+ middle_frame_position = num_frames_padded // 2
255
+ resized_middle_path = resize_and_squash_image(
256
+ middle_image_filepath, actual_width, actual_height
257
+ )
258
+ middle_media_tensor = load_image_to_tensor_with_resize_and_crop(
259
+ resized_middle_path, actual_height, actual_width
260
+ )
261
+ if os.path.exists(resized_middle_path):
262
+ os.remove(resized_middle_path)
263
+ middle_media_tensor = torch.nn.functional.pad(middle_media_tensor, padding_values)
264
+ conditioning_items.append(ConditioningItem(middle_media_tensor.to("cuda"), middle_frame_position, 1.0))
265
+ except Exception as e:
266
+ print(f"Error loading middle image: {e}")
267
+ raise gr.Error(f"Could not load middle image: {e}")
268
+
269
  # Add final frame conditioning if provided
270
  if final_image_filepath:
271
  try:
 
272
  resized_final_path = resize_and_squash_image(
273
  final_image_filepath, actual_width, actual_height
274
  )
 
 
 
275
  final_media_tensor = load_image_to_tensor_with_resize_and_crop(
276
  resized_final_path, actual_height, actual_width
277
  )
 
 
278
  if os.path.exists(resized_final_path):
279
  os.remove(resized_final_path)
 
280
  final_media_tensor = torch.nn.functional.pad(final_media_tensor, padding_values)
281
  conditioning_items.append(ConditioningItem(final_media_tensor.to("cuda"), num_frames_padded - 1, 1.0))
282
  except Exception as e:
 
368
  background-color: #f5f5f5;
369
  }
370
  """
 
371
  with gr.Blocks(css=css) as demo:
372
  gr.Markdown("# LTX Video Generator")
373
+ gr.Markdown("Generate videos from images using AI. Provide at least one input image (first frame, middle frame, or last frame) and a prompt.")
374
  with gr.Row():
375
  with gr.Column():
376
  gr.Markdown("### Input Options")
377
  input_image_input = gr.Image(label="First Frame Image (Optional)", type="filepath", sources=["upload", "webcam", "clipboard"])
378
+ middle_image_input = gr.Image(label="Middle Frame Image (Optional)", type="filepath", sources=["upload", "webcam", "clipboard"])
379
  final_image_input = gr.Image(label="Last Frame Image (Optional)", type="filepath", sources=["upload", "webcam", "clipboard"])
380
  prompt_input = gr.Textbox(label="Prompt", value="The creature from the image starts to move", lines=3)
381
  duration_input = gr.Slider(
 
391
  gr.Markdown("### Output")
392
  video_output = gr.Textbox(label="Generated Video URL", interactive=False)
393
  video_preview = gr.Video(label="Video Preview", interactive=False, visible=False)
394
+ gr.Markdown("**Note:** You must provide at least one input image (first frame, middle frame, or last frame).")
395
 
 
396
  generate_button.click(
397
  fn=generate,
398
+ inputs=[prompt_input, input_image_input, middle_image_input, final_image_input, duration_input],
399
  outputs=[video_output],
400
  api_name="generate_video"
401
  )