Spaces:

rootglitch
/

CarVizGradioDemo01

Running on Zero

App Files Files Community

rootglitch commited on Mar 8

Commit

7877974

1 Parent(s): bd1381f

Debug mask images

Browse files

Files changed (1) hide show

app.py +86 -87

app.py CHANGED Viewed

@@ -225,100 +225,99 @@ def draw_box(box: torch.Tensor, draw: ImageDraw.Draw, label: Optional[str]) -> N
 def run_grounded_sam(input_image):
     """Main function to run GroundingDINO and SAM-HQ"""
-    try:
-        # Create output directory
-        os.makedirs(OUTPUT_DIR, exist_ok=True)
-        text_prompt = 'car'
-        task_type = 'text'
-        box_threshold = 0.3
-        text_threshold = 0.25
-        iou_threshold = 0.8
-        hq_token_only = True
-        # Process input image
-        if isinstance(input_image, dict):
-            # Input from gradio sketch component
-            scribble = np.array(input_image["mask"])
-            image_pil = input_image["image"].convert("RGB")
-        else:
-            # Direct image input
-            image_pil = input_image.convert("RGB") if input_image else None
-            scribble = None
-        if image_pil is None:
-            logger.error("No input image provided")
-            return [Image.new('RGB', (400, 300), color='gray')]
-        # Transform image for GroundingDINO
-        transformed_image = transform_image(image_pil)
-        # Load models as needed
-        ModelManager.load_model('groundingdino')
-        size = image_pil.size
-        H, W = size[1], size[0]
-        # Run GroundingDINO with provided text
-        boxes_filt, scores, pred_phrases = get_grounding_output(
-            transformed_image, text_prompt, box_threshold, text_threshold
-        )
-        if boxes_filt is not None:
-            # Scale boxes to image dimensions
-            for i in range(boxes_filt.size(0)):
-                boxes_filt[i] = boxes_filt[i] * torch.Tensor([W, H, W, H])
-                boxes_filt[i][:2] -= boxes_filt[i][2:] / 2
-                boxes_filt[i][2:] += boxes_filt[i][:2]
-            # Apply non-maximum suppression if we have multiple boxes
-            if boxes_filt.size(0) > 1:
-                logger.info(f"Before NMS: {boxes_filt.shape[0]} boxes")
-                nms_idx = torchvision.ops.nms(boxes_filt, scores, iou_threshold).numpy().tolist()
-                boxes_filt = boxes_filt[nms_idx]
-                pred_phrases = [pred_phrases[idx] for idx in nms_idx]
-                logger.info(f"After NMS: {boxes_filt.shape[0]} boxes")
-        # Load SAM model
-        ModelManager.load_model('sam')
-        sam_predictor = ModelManager.get_model('sam_predictor')
-        # Set image for SAM
-        image = np.array(image_pil)
-        sam_predictor.set_image(image)
-        # Run SAM
-        # Use boxes for these task types
-        if boxes_filt.size(0) == 0:
-            logger.warning("No boxes detected")
-            return [image_pil, Image.new('RGBA', size, color=(0, 0, 0, 0))]
-        transformed_boxes = sam_predictor.transform.apply_boxes_torch(boxes_filt, image.shape[:2]).to(device)
-        masks, _, _ = sam_predictor.predict_torch(
-            point_coords=None,
-            point_labels=None,
-            boxes=transformed_boxes,
-            multimask_output=False,
-            hq_token_only=hq_token_only,
-        )
-        # Create mask image
-        mask_image = Image.new('RGBA', size, color=(0, 0, 0, 0))
-        mask_draw = ImageDraw.Draw(mask_image)
-        # Draw masks
-        for mask in masks:
-            draw_mask(mask[0].cpu().numpy(), mask_draw)
-        # Draw boxes and points on original image
-        image_draw = ImageDraw.Draw(image_pil)
-        for box, label in zip(boxes_filt, pred_phrases):
-            draw_box(box, image_draw, label)
-        return mask_image
-    except Exception as e:
-        logger.error(f"Error in run_grounded_sam: {e}")
         # # Return original image on error
         # if isinstance(input_image, dict) and "image" in input_image:
         #     return [input_image["image"], Image.new('RGBA', input_image["image"].size, color=(0, 0, 0, 0))]

 def run_grounded_sam(input_image):
     """Main function to run GroundingDINO and SAM-HQ"""
+    # Create output directory
+    os.makedirs(OUTPUT_DIR, exist_ok=True)
+    text_prompt = 'car'
+    task_type = 'text'
+    box_threshold = 0.3
+    text_threshold = 0.25
+    iou_threshold = 0.8
+    hq_token_only = True
+    # Process input image
+    if isinstance(input_image, dict):
+        # Input from gradio sketch component
+        scribble = np.array(input_image["mask"])
+        image_pil = input_image["image"].convert("RGB")
+    else:
+        # Direct image input
+        image_pil = input_image.convert("RGB") if input_image else None
+        scribble = None
+    if image_pil is None:
+        logger.error("No input image provided")
+        return [Image.new('RGB', (400, 300), color='gray')]
+    # Transform image for GroundingDINO
+    transformed_image = transform_image(image_pil)
+    # Load models as needed
+    ModelManager.load_model('groundingdino')
+    size = image_pil.size
+    H, W = size[1], size[0]
+    # Run GroundingDINO with provided text
+    boxes_filt, scores, pred_phrases = get_grounding_output(
+        transformed_image, text_prompt, box_threshold, text_threshold
+    )
+    if boxes_filt is not None:
+        # Scale boxes to image dimensions
+        for i in range(boxes_filt.size(0)):
+            boxes_filt[i] = boxes_filt[i] * torch.Tensor([W, H, W, H])
+            boxes_filt[i][:2] -= boxes_filt[i][2:] / 2
+            boxes_filt[i][2:] += boxes_filt[i][:2]
+        # Apply non-maximum suppression if we have multiple boxes
+        if boxes_filt.size(0) > 1:
+            logger.info(f"Before NMS: {boxes_filt.shape[0]} boxes")
+            nms_idx = torchvision.ops.nms(boxes_filt, scores, iou_threshold).numpy().tolist()
+            boxes_filt = boxes_filt[nms_idx]
+            pred_phrases = [pred_phrases[idx] for idx in nms_idx]
+            logger.info(f"After NMS: {boxes_filt.shape[0]} boxes")
+    # Load SAM model
+    ModelManager.load_model('sam')
+    sam_predictor = ModelManager.get_model('sam_predictor')
+    # Set image for SAM
+    image = np.array(image_pil)
+    sam_predictor.set_image(image)
+    # Run SAM
+    # Use boxes for these task types
+    if boxes_filt.size(0) == 0:
+        logger.warning("No boxes detected")
+        return [image_pil, Image.new('RGBA', size, color=(0, 0, 0, 0))]
+    transformed_boxes = sam_predictor.transform.apply_boxes_torch(boxes_filt, image.shape[:2]).to(device)
+    masks, _, _ = sam_predictor.predict_torch(
+        point_coords=None,
+        point_labels=None,
+        boxes=transformed_boxes,
+        multimask_output=False,
+        hq_token_only=hq_token_only,
+    )
+    # Create mask image
+    mask_image = Image.new('RGBA', size, color=(0, 0, 0, 0))
+    mask_draw = ImageDraw.Draw(mask_image)
+    # Draw masks
+    for mask in masks:
+        draw_mask(mask[0].cpu().numpy(), mask_draw)
+    # Draw boxes and points on original image
+    image_draw = ImageDraw.Draw(image_pil)
+    for box, label in zip(boxes_filt, pred_phrases):
+        draw_box(box, image_draw, label)
+    return mask_image
+    # except Exception as e:
+    #     logger.error(f"Error in run_grounded_sam: {e}")
         # # Return original image on error
         # if isinstance(input_image, dict) and "image" in input_image:
         #     return [input_image["image"], Image.new('RGBA', input_image["image"].size, color=(0, 0, 0, 0))]