Spaces:

Vijish
/

flux-layer

Runtime error

App Files Files Community

Vijish commited on 24 days ago

Commit

105147f

verified ·

1 Parent(s): 859bed9

Update utils/i2i.py

Browse files

Files changed (1) hide show

utils/i2i.py +11 -54

utils/i2i.py CHANGED Viewed

@@ -36,7 +36,7 @@ trans_vae.load_state_dict(torch.load("./models/TransparentVAE.pth"), strict=Fals
 trans_vae.to(device)
 # Custom function to safely decode latents
-def safe_decode(trans_vae, latents, original_alpha=None):
     try:
         # Standard decoding approach
         original_x, x = trans_vae.decode(latents)
@@ -73,12 +73,6 @@ def i2i_gen(
         # Process the input image
         original_image = (transforms.ToTensor()(input_image)).unsqueeze(0)
-        # Store the original alpha channel for later use
-        original_alpha = None
-        if original_image.shape[1] == 4:
-            original_alpha = original_image[:, 3:4, :, :].clone()
-            print("Original alpha channel detected and saved")
         # Print original image shape for debugging
         print(f"Original image shape: {original_image.shape}")
@@ -114,15 +108,11 @@ def i2i_gen(
         if height != original_image.shape[2] or width != original_image.shape[3]:
             print(f"Resizing image from {original_image.shape[2]}x{original_image.shape[3]} to {height}x{width}")
             original_image = transforms.functional.resize(original_image, (height, width))
-            # Also resize the alpha channel if it exists
-            if original_alpha is not None:
-                original_alpha = transforms.functional.resize(original_alpha, (height, width))
         # Print resized image shape for debugging
         print(f"Resized image shape: {original_image.shape}")
-        # Prepare the image for processing
         padding_feed = [x for x in original_image.movedim(1, -1).float().cpu().numpy()]
         list_of_np_rgb_padded = [pad_rgb(x) for x in padding_feed]
         rgb_padded_bchw_01 = torch.from_numpy(np.stack(list_of_np_rgb_padded, axis=0)).float().movedim(-1, 1).to(device)
@@ -139,22 +129,18 @@ def i2i_gen(
             alpha = torch.ones((original_image_feed.shape[0], 1, height, width), device=original_image_feed.device)
             original_image_feed = torch.cat([original_image_feed, alpha], dim=1)
-        # Print alpha channel shape for debugging
-        print(f"Alpha channel shape: {original_image_feed[:, 3:4, :, :].shape}")
-        # Make sure alpha channel is correctly extracted
-        alpha_channel = original_image_feed[:, 3:4, :, :].clone()
-        # Apply alpha to RGB channels
-        original_image_rgb = original_image_feed[:, :3, :, :].clone() * alpha_channel
         # Print shape information for debugging
         print(f"RGB tensor shape: {original_image_feed[:, :3, :, :].shape}")
         print(f"RGB*alpha tensor shape: {original_image_rgb.shape}")
         # Move tensors to device
         original_image_feed = original_image_feed.to(device)
         original_image_rgb = original_image_rgb.to(device)
         # Verify tensor shapes before encoding
         print(f"Before encoding - original_image_feed: {original_image_feed.shape}")
@@ -194,7 +180,7 @@ def i2i_gen(
             raise
         # Free up memory
-        del initial_latent
         torch.cuda.empty_cache()
         # Process the latents
@@ -233,42 +219,13 @@ def i2i_gen(
             del latents
             torch.cuda.empty_cache()
-        # Convert to image with transparency
         x = x.clamp(0, 1)
-        # Create a new tensor with 4 channels (RGBA)
-        rgba_tensor = torch.zeros((x.shape[0], 4, x.shape[2], x.shape[3]), device=x.device)
-        # Copy the RGB channels
-        rgba_tensor[:, :3, :, :] = x
-        # Use the original alpha channel if available, otherwise use the alpha from the model
-        if original_alpha is not None:
-            print("Using original alpha channel for output")
-            # Resize alpha to match output dimensions if needed
-            if original_alpha.shape[2:] != x.shape[2:]:
-                original_alpha = torch.nn.functional.interpolate(
-                    original_alpha,
-                    size=(x.shape[2], x.shape[3]),
-                    mode='bilinear',
-                    align_corners=False
-                )
-            rgba_tensor[:, 3:4, :, :] = original_alpha.to(x.device)
-        else:
-            # If no original alpha, create a binary mask based on pixel intensity
-            # Pixels that are nearly black (all channels < 0.05) will be transparent
-            print("Creating alpha mask based on pixel intensity")
-            rgb_sum = x.sum(dim=1, keepdim=True) / 3.0
-            mask = (rgb_sum > 0.05).float()
-            rgba_tensor[:, 3:4, :, :] = mask
-        # Convert to PIL image with transparency
-        rgba_tensor = rgba_tensor.permute(0, 2, 3, 1)
-        rgba_array = (rgba_tensor[0] * 255).float().cpu().numpy().astype(np.uint8)
-        img = Image.fromarray(rgba_array, mode='RGBA')
         # Clean up
-        del original_x, x, rgba_tensor, original_image
         torch.cuda.empty_cache()
         gc.collect()

 trans_vae.to(device)
 # Custom function to safely decode latents
+def safe_decode(trans_vae, latents):
     try:
         # Standard decoding approach
         original_x, x = trans_vae.decode(latents)
         # Process the input image
         original_image = (transforms.ToTensor()(input_image)).unsqueeze(0)
         # Print original image shape for debugging
         print(f"Original image shape: {original_image.shape}")
         if height != original_image.shape[2] or width != original_image.shape[3]:
             print(f"Resizing image from {original_image.shape[2]}x{original_image.shape[3]} to {height}x{width}")
             original_image = transforms.functional.resize(original_image, (height, width))
         # Print resized image shape for debugging
         print(f"Resized image shape: {original_image.shape}")
+        # Prepare the image for processing - EXACTLY as in demo_i2i.py
         padding_feed = [x for x in original_image.movedim(1, -1).float().cpu().numpy()]
         list_of_np_rgb_padded = [pad_rgb(x) for x in padding_feed]
         rgb_padded_bchw_01 = torch.from_numpy(np.stack(list_of_np_rgb_padded, axis=0)).float().movedim(-1, 1).to(device)
             alpha = torch.ones((original_image_feed.shape[0], 1, height, width), device=original_image_feed.device)
             original_image_feed = torch.cat([original_image_feed, alpha], dim=1)
+        # Apply alpha to RGB channels - EXACTLY as in demo_i2i.py
+        original_image_rgb = original_image_feed[:, :3, :, :] * original_image_feed[:, 3:4, :, :]
         # Print shape information for debugging
         print(f"RGB tensor shape: {original_image_feed[:, :3, :, :].shape}")
+        print(f"Alpha channel shape: {original_image_feed[:, 3:4, :, :].shape}")
         print(f"RGB*alpha tensor shape: {original_image_rgb.shape}")
         # Move tensors to device
         original_image_feed = original_image_feed.to(device)
         original_image_rgb = original_image_rgb.to(device)
+        rgb_padded_bchw_01 = rgb_padded_bchw_01.to(device)
         # Verify tensor shapes before encoding
         print(f"Before encoding - original_image_feed: {original_image_feed.shape}")
             raise
         # Free up memory
+        del initial_latent, original_image
         torch.cuda.empty_cache()
         # Process the latents
             del latents
             torch.cuda.empty_cache()
+        # Convert to image - EXACTLY as in demo_i2i.py
         x = x.clamp(0, 1)
+        x = x.permute(0, 2, 3, 1)
+        img = Image.fromarray((x*255).float().cpu().numpy().astype(np.uint8)[0])
         # Clean up
+        del original_x, x
         torch.cuda.empty_cache()
         gc.collect()