image-ip-composer

Running on Zero

App Files Files Community

linoyts HF Staff commited on Mar 28

Commit

62151c8

verified ·

1 Parent(s): 29b1411

Update app.py

Browse files

initial functionality

Files changed (1) hide show

app.py +41 -39

app.py CHANGED Viewed

@@ -30,7 +30,7 @@ ip_model = IPAdapterXL(pipe, image_encoder_repo, image_encoder_subfolder, ip_ckp
 # Initialize CLIP model
 clip_model, _, preprocess = open_clip.create_model_and_transforms('hf-hub:laion/CLIP-ViT-H-14-laion2B-s32B-b79K')
 clip_model.to(device)
-print("Models initialized successfully!")
 def get_image_embeds(pil_image, model=clip_model, preproc=preprocess, dev=device):
     """Get CLIP image embeddings for a given PIL image"""
@@ -41,9 +41,9 @@ def get_image_embeds(pil_image, model=clip_model, preproc=preprocess, dev=device
 def process_images(
     base_image,
-    concept_image1, concept_desc1,
-    concept_image2=None, concept_desc2=None,
-    concept_image3=None, concept_desc3=None,
     rank1=10, rank2=10, rank3=10,
     prompt=None,
     scale=1.0,
@@ -52,28 +52,28 @@ def process_images(
     """Process the base image and concept images to generate modified images"""
     # Process base image
     base_image_pil = Image.fromarray(base_image).convert("RGB")
-    base_embed = get_image_embeds(base_image_pil)
     # Process concept images
     concept_images = []
     concept_descriptions = []
-    # Add first concept (required)
     if concept_image1 is not None:
         concept_images.append(concept_image1)
-        concept_descriptions.append(concept_desc1 if concept_desc1 else "Concept 1")
     else:
         return None, "Please upload at least one concept image"
     # Add second concept (optional)
     if concept_image2 is not None:
         concept_images.append(concept_image2)
-        concept_descriptions.append(concept_desc2 if concept_desc2 else "Concept 2")
     # Add third concept (optional)
     if concept_image3 is not None:
         concept_images.append(concept_image3)
-        concept_descriptions.append(concept_desc3 if concept_desc3 else "Concept 3")
     # Get all ranks
     ranks = [rank1]
@@ -81,21 +81,23 @@ def process_images(
         ranks.append(rank2)
     if concept_image3 is not None:
         ranks.append(rank3)
     concept_embeds = []
-    for img in concept_images:
-        if img is not None:
-            img_pil = Image.fromarray(img).convert("RGB")
-            concept_embeds.append(get_image_embeds(img_pil))
-    # Compute projection matrices
     projection_matrices = []
-    for i, embed in enumerate(concept_embeds):
-        # For a single image, we need to reshape to have the same format as a collection
-        single_embed = embed.reshape(1, *embed.shape)
-        projection_matrix = compute_dataset_embeds_svd(single_embed, ranks[i])
         projection_matrices.append(projection_matrix)
     # Create projection data structure for the composition
     projections_data = [
         {
@@ -116,14 +118,14 @@ def process_images(
         seed=seed
     )
-    return modified_images
 def process_and_display(
     base_image,
-    concept_image1, concept_desc1,
-    concept_image2=None, concept_desc2=None,
-    concept_image3=None, concept_desc3=None,
-    rank1=10, rank2=10, rank3=10,
     prompt=None, scale=1.0, seed=420
 ):
     """Wrapper for process_images that handles UI updates"""
@@ -135,9 +137,9 @@ def process_and_display(
     modified_images = process_images(
         base_image,
-        concept_image1, concept_desc1,
-        concept_image2, concept_desc2,
-        concept_image3, concept_desc3,
         rank1, rank2, rank3,
         prompt, scale, seed
     )
@@ -159,23 +161,23 @@ with gr.Blocks(title="Image Concept Composition") as demo:
                 with gr.Row():
                     with gr.Column(scale=2):
                         concept_image1 = gr.Image(label="Concept Image 1 (Required)", type="numpy")
-                    with gr.Column(scale=1):
-                        concept_desc1 = gr.Textbox(label="Concept 1 Description", placeholder="Describe this concept")
-                        rank1 = gr.Slider(minimum=1, maximum=50, value=10, step=1, label="Rank 1")
                 with gr.Row():
                     with gr.Column(scale=2):
                         concept_image2 = gr.Image(label="Concept Image 2 (Optional)", type="numpy")
-                    with gr.Column(scale=1):
-                        concept_desc2 = gr.Textbox(label="Concept 2 Description", placeholder="Describe this concept")
-                        rank2 = gr.Slider(minimum=1, maximum=50, value=10, step=1, label="Rank 2")
                 with gr.Row():
                     with gr.Column(scale=2):
                         concept_image3 = gr.Image(label="Concept Image 3 (Optional)", type="numpy")
-                    with gr.Column(scale=1):
-                        concept_desc3 = gr.Textbox(label="Concept 3 Description", placeholder="Describe this concept")
-                        rank3 = gr.Slider(minimum=1, maximum=50, value=10, step=1, label="Rank 3")
                 prompt = gr.Textbox(label="Guidance Prompt (Optional)", placeholder="Optional text prompt to guide generation")
@@ -192,9 +194,9 @@ with gr.Blocks(title="Image Concept Composition") as demo:
             fn=process_and_display,
             inputs=[
                 base_image,
-                concept_image1, concept_desc1,
-                concept_image2, concept_desc2,
-                concept_image3, concept_desc3,
                 rank1, rank2, rank3,
                 prompt, scale, seed
             ],

 # Initialize CLIP model
 clip_model, _, preprocess = open_clip.create_model_and_transforms('hf-hub:laion/CLIP-ViT-H-14-laion2B-s32B-b79K')
 clip_model.to(device)
+# print("Models initialized successfully!")
 def get_image_embeds(pil_image, model=clip_model, preproc=preprocess, dev=device):
     """Get CLIP image embeddings for a given PIL image"""
 def process_images(
     base_image,
+    concept_image1, concept_name1,
+    concept_image2=None, concept_name2=None,
+    concept_image3=None, concept_name3=None,
     rank1=10, rank2=10, rank3=10,
     prompt=None,
     scale=1.0,
     """Process the base image and concept images to generate modified images"""
     # Process base image
     base_image_pil = Image.fromarray(base_image).convert("RGB")
+    base_embed = get_image_embeds(base_image_pil, clip_model, preprocess, device)
     # Process concept images
     concept_images = []
     concept_descriptions = []
+    # for demo purposes we allow for up to 3 different concepts and corresponding concept images
     if concept_image1 is not None:
         concept_images.append(concept_image1)
+        concept_descriptions.append(concept_name1)
     else:
         return None, "Please upload at least one concept image"
     # Add second concept (optional)
     if concept_image2 is not None:
         concept_images.append(concept_image2)
+        concept_descriptions.append(concept_name2)
     # Add third concept (optional)
     if concept_image3 is not None:
         concept_images.append(concept_image3)
+        concept_descriptions.append(concept_name3)
     # Get all ranks
     ranks = [rank1]
         ranks.append(rank2)
     if concept_image3 is not None:
         ranks.append(rank3)
     concept_embeds = []
     projection_matrices = []
+    # for the demo, we assume 1 concept image per concept
+    # for each concept image, we calculate it's image embeedings and load the concepts textual embeddings to copmpute the projection matrix over it
+    for i, concept_name in enumerate(concept_descriptions):
+        img_pil = Image.fromarray(concept_images[i]).convert("RGB")
+        concept_embeds.append(get_image_embeds(img_pil, clip_model, preprocess, device))
+        embeds_path = f"./IP_Composer/text_embeddings/{concept_name}_descriptions.npy"
+        with open(embeds_path, "rb") as f:
+            all_embeds_in = np.load(f)
+        projection_matrix = compute_dataset_embeds_svd(all_embeds_in, ranks[i])
         projection_matrices.append(projection_matrix)
     # Create projection data structure for the composition
     projections_data = [
         {
         seed=seed
     )
+    return modified_images[0]
 def process_and_display(
     base_image,
+    concept_image1, concept_name1="age",
+    concept_image2=None, concept_name2=None,
+    concept_image3=None, concept_name3=None,
+    rank1=30, rank2=30, rank3=30,
     prompt=None, scale=1.0, seed=420
 ):
     """Wrapper for process_images that handles UI updates"""
     modified_images = process_images(
         base_image,
+        concept_image1, concept_name1,
+        concept_image2, concept_name2,
+        concept_image3, concept_name3,
         rank1, rank2, rank3,
         prompt, scale, seed
     )
                 with gr.Row():
                     with gr.Column(scale=2):
                         concept_image1 = gr.Image(label="Concept Image 1 (Required)", type="numpy")
+                    with gr.Row():
+                        concept_name1 = gr.Textbox(label="Concept 1 Description", placeholder="Describe this concept")
+                        rank1 = gr.Slider(minimum=1, maximum=50, value=30, step=1, label="Rank 1")
                 with gr.Row():
                     with gr.Column(scale=2):
                         concept_image2 = gr.Image(label="Concept Image 2 (Optional)", type="numpy")
+                    with gr.Row():
+                        concept_name2 = gr.Textbox(label="Concept 2 Description", placeholder="Describe this concept")
+                        rank2 = gr.Slider(minimum=1, maximum=50, value=30, step=1, label="Rank 2")
                 with gr.Row():
                     with gr.Column(scale=2):
                         concept_image3 = gr.Image(label="Concept Image 3 (Optional)", type="numpy")
+                    with gr.Row():
+                        concept_name3 = gr.Textbox(label="Concept 3 Description", placeholder="Describe this concept")
+                        rank3 = gr.Slider(minimum=1, maximum=50, value=30, step=1, label="Rank 3")
                 prompt = gr.Textbox(label="Guidance Prompt (Optional)", placeholder="Optional text prompt to guide generation")
             fn=process_and_display,
             inputs=[
                 base_image,
+                concept_image1, concept_name1,
+                concept_image2, concept_name2,
+                concept_image3, concept_name3,
                 rank1, rank2, rank3,
                 prompt, scale, seed
             ],