Spaces:

Ashoka74
/

ProductPlacement

Runtime error

App Files Files Community

Ashoka74 commited on Dec 10, 2024

Commit

d068918

verified ·

1 Parent(s): a0b057f

Update gradio_demo.py

Browse files

Files changed (1) hide show

gradio_demo.py +82 -40

gradio_demo.py CHANGED Viewed

@@ -953,56 +953,92 @@ def process_image(input_image, input_text):
         task = DinoxTask(
             image_url=image_url,
             prompts=[TextPrompt(text=input_text)]
         )
         client.run_task(task)
         result = task.result
         objects = result.objects
-        for obj in objects:
-            input_boxes.append(obj.bbox)
             confidences.append(obj.score)
             cls_name = obj.category.lower().strip()
             class_names.append(cls_name)
             class_ids.append(class_name_to_id[cls_name])
-        input_boxes = np.array(input_boxes)
         class_ids = np.array(class_ids)
         # Initialize SAM2
-        torch.autocast(device_type=DEVICE, dtype=torch.bfloat16).__enter__()
-        if torch.cuda.get_device_properties(0).major >= 8:
-            torch.backends.cuda.matmul.allow_tf32 = True
-            torch.backends.cudnn.allow_tf32 = True
-        sam2_model = build_sam2(SAM2_MODEL_CONFIG, SAM2_CHECKPOINT, device=DEVICE)
-        sam2_predictor = SAM2ImagePredictor(sam2_model)
-        sam2_predictor.set_image(input_image)
         # sam2_predictor = run_sam_inference(SAM_IMAGE_MODEL, input_image, detections)
         # Get masks from SAM2
-        masks, scores, logits = sam2_predictor.predict(
-            point_coords=None,
-            point_labels=None,
-            box=input_boxes,
-            multimask_output=False,
-        )
         if masks.ndim == 4:
             masks = masks.squeeze(1)
         # Create visualization
-        labels = [f"{class_name} {confidence:.2f}"
-                 for class_name, confidence in zip(class_names, confidences)]
         detections = sv.Detections(
-            xyxy=input_boxes,
-            mask=masks.astype(bool),
-            class_id=class_ids
-        )
         box_annotator = sv.BoxAnnotator()
         label_annotator = sv.LabelAnnotator()
         mask_annotator = sv.MaskAnnotator()
@@ -1157,6 +1193,8 @@ with block:
                     with gr.Group():
                         gr.Markdown("Extract Foreground")
                         input_image = gr.Image(type="numpy", label="Input Image", height=480)
                         find_objects_button = gr.Button(value="(Option 1) Segment Object from text")
                         text_prompt = gr.Textbox(
                                 label="Text Prompt",
@@ -1311,25 +1349,29 @@ with block:
         #     #     return mask_mover.create_composite(self.original_bg, x_pos, y_pos, scale)
-            def update_position(background, x_pos, y_pos, scale):
-                if background is None:
                     return None
-                # Restore a fresh copy of the original background
-                fresh_bg = bg_manager.original_bg.copy()
-                # Composite the foreground once
                 return mask_mover.create_composite(fresh_bg, float(x_pos), float(y_pos), float(scale))
         # Create an instance of BackgroundManager
         bg_manager = BackgroundManager()
-        def update_position(background, x_pos, y_pos, scale):
-            if background is None:
-                return None
-            fresh_bg = bg_manager.original_bg.copy()  # Start from a clean original background
-            # Composite the extracted foreground onto fresh_bg
-            return mask_mover.create_composite(fresh_bg, float(x_pos), float(y_pos), float(scale))
@@ -1341,19 +1383,19 @@ with block:
         )
         x_slider.change(
-            fn=lambda bg, x, y, scale: bg_manager.update_position(bg, x, y, scale),
             inputs=[input_bg, x_slider, y_slider, fg_scale_slider],
             outputs=[input_bg]
         )
         y_slider.change(
-            fn=lambda bg, x, y, scale: bg_manager.update_position(bg, x, y, scale),
             inputs=[input_bg, x_slider, y_slider, fg_scale_slider],
             outputs=[input_bg]
         )
         fg_scale_slider.change(
-            fn=lambda bg, x, y, scale: bg_manager.update_position(bg, x, y, scale),
             inputs=[input_bg, x_slider, y_slider, fg_scale_slider],
             outputs=[input_bg]
         )

         task = DinoxTask(
             image_url=image_url,
             prompts=[TextPrompt(text=input_text)]
+            targets=[DetectionTarget.BBox, DetectionTarget.Mask]
         )
         client.run_task(task)
         result = task.result
         objects = result.objects
+        # for obj in objects:
+        #     input_boxes.append(obj.bbox)
+        #     confidences.append(obj.score)
+        #     cls_name = obj.category.lower().strip()
+        #     class_names.append(cls_name)
+        #     class_ids.append(class_name_to_id[cls_name])
+        # input_boxes = np.array(input_boxes)
+        # class_ids = np.array(class_ids)
+        classes = [x.strip().lower() for x in TEXT_PROMPT.split('.') if x]
+        class_name_to_id = {name: id for id, name in enumerate(classes)}
+        class_id_to_name = {id: name for name, id in class_name_to_id.items()}
+        boxes = []
+        masks = []
+        confidences = []
+        class_names = []
+        class_ids = []
+        for idx, obj in enumerate(predictions):
+            boxes.append(obj.bbox)
+            masks.append(DetectionTask.rle2mask(DetectionTask.string2rle(obj.mask.counts), obj.mask.size))  # convert mask to np.array using DDS API
             confidences.append(obj.score)
             cls_name = obj.category.lower().strip()
             class_names.append(cls_name)
             class_ids.append(class_name_to_id[cls_name])
+        boxes = np.array(boxes)
+        masks = np.array(masks)
         class_ids = np.array(class_ids)
+        labels = [
+            f"{class_name} {confidence:.2f}"
+            for class_name, confidence
+            in zip(class_names, confidences)
+        ]
         # Initialize SAM2
+        # torch.autocast(device_type=DEVICE, dtype=torch.bfloat16).__enter__()
+        # if torch.cuda.get_device_properties(0).major >= 8:
+        #     torch.backends.cuda.matmul.allow_tf32 = True
+        #     torch.backends.cudnn.allow_tf32 = True
+        # sam2_model = build_sam2(SAM2_MODEL_CONFIG, SAM2_CHECKPOINT, device=DEVICE)
+        # sam2_predictor = SAM2ImagePredictor(sam2_model)
+        # sam2_predictor.set_image(input_image)
         # sam2_predictor = run_sam_inference(SAM_IMAGE_MODEL, input_image, detections)
         # Get masks from SAM2
+        # masks, scores, logits = sam2_predictor.predict(
+        #     point_coords=None,
+        #     point_labels=None,
+        #     box=input_boxes,
+        #     multimask_output=False,
+        # )
         if masks.ndim == 4:
             masks = masks.squeeze(1)
         # Create visualization
+        # labels = [f"{class_name} {confidence:.2f}"
+        #          for class_name, confidence in zip(class_names, confidences)]
+        # detections = sv.Detections(
+        #     xyxy=input_boxes,
+        #     mask=masks.astype(bool),
+        #     class_id=class_ids
+        # )
         detections = sv.Detections(
+        xyxy = boxes,
+        mask = masks.astype(bool),
+        class_id = class_ids,
+    )
         box_annotator = sv.BoxAnnotator()
         label_annotator = sv.LabelAnnotator()
         mask_annotator = sv.MaskAnnotator()
                     with gr.Group():
                         gr.Markdown("Extract Foreground")
                         input_image = gr.Image(type="numpy", label="Input Image", height=480)
+                with gr.Row():
+                    with gr.Group():
                         find_objects_button = gr.Button(value="(Option 1) Segment Object from text")
                         text_prompt = gr.Textbox(
                                 label="Text Prompt",
         #     #     return mask_mover.create_composite(self.original_bg, x_pos, y_pos, scale)
+            def update_position(self, background, x_pos, y_pos, scale, *args):
+                if self.original_bg is None:
+                    print("No original background set.")
                     return None
+                fresh_bg = self.original_bg.copy()  # Start from a clean original background
                 return mask_mover.create_composite(fresh_bg, float(x_pos), float(y_pos), float(scale))
         # Create an instance of BackgroundManager
         bg_manager = BackgroundManager()
+        def update_position_wrapper(background, x_pos, y_pos, scale):
+            return bg_manager.update_position(background, x_pos, y_pos, scale)
+        # def update_position(background, x_pos, y_pos, scale):
+        #     if background is None:
+        #         return None
+        #     fresh_bg = bg_manager.original_bg.copy()  # Start from a clean original background
+        #     # Composite the extracted foreground onto fresh_bg
+        #     return mask_mover.create_composite(fresh_bg, float(x_pos), float(y_pos), float(scale))
         )
         x_slider.change(
+            fn=update_position_wrapper,
             inputs=[input_bg, x_slider, y_slider, fg_scale_slider],
             outputs=[input_bg]
         )
         y_slider.change(
+            fn=update_position_wrapper,
             inputs=[input_bg, x_slider, y_slider, fg_scale_slider],
             outputs=[input_bg]
         )
         fg_scale_slider.change(
+            fn=update_position_wrapper,
             inputs=[input_bg, x_slider, y_slider, fg_scale_slider],
             outputs=[input_bg]
         )