Spaces:

Ashoka74
/

ProductPlacement

Runtime error

App Files Files Community

Ashoka74 commited on Dec 10, 2024

Commit

01f030e

verified ·

1 Parent(s): 9615931

Update gradio_demo.py

Browse files

Files changed (1) hide show

gradio_demo.py +104 -2

gradio_demo.py CHANGED Viewed

@@ -9,6 +9,8 @@ import db_examples
 import datetime
 from pathlib import Path
 from io import BytesIO
 from PIL import Image
 from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline
@@ -23,6 +25,7 @@ from enum import Enum
 from torch.hub import download_url_to_file
 import tempfile
 from sam2.build_sam import build_sam2
 from sam2.sam2_image_predictor import SAM2ImagePredictor
@@ -39,7 +42,6 @@ client = httpx.Client(timeout=httpx.Timeout(10.0))  # Set timeout to 10 seconds
 # from FLORENCE
 import spaces
 import supervision as sv
@@ -49,6 +51,7 @@ from PIL import Image
 from utils.sam import load_sam_image_model, run_sam_inference
 try:
     import xformers
     import xformers.ops
@@ -83,6 +86,9 @@ model.load_state_dict(torch.load('checkpoints/depth_anything_v2_vits.pth', map_l
 model = model.to(device)
 model.eval()
 # Change UNet
 with torch.no_grad():
@@ -826,8 +832,10 @@ def compress_image(image):
     compressed_img = np.array(Image.open("compressed_image.jpg"))
     return compressed_img
 @spaces.GPU(duration=60)
-@torch.inference_mode()
 def process_image(input_image, input_text):
     """Main processing function for the Gradio interface"""
@@ -839,6 +847,8 @@ def process_image(input_image, input_text):
     OUTPUT_DIR = Path("outputs/grounded_sam2_dinox_demo")
     OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
     # Initialize DDS client
     config = Config(API_TOKEN)
     client = Client(config)
@@ -933,6 +943,98 @@ def process_image(input_image, input_text):
             return annotated_frame, cropped_rgba, gr.update(visible=True), gr.update(visible=True)
 block = gr.Blocks().queue()

 import datetime
 from pathlib import Path
 from io import BytesIO
+from hydra import initialize, compose
 from PIL import Image
 from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline
 from torch.hub import download_url_to_file
 import tempfile
 from sam2.build_sam import build_sam2
 from sam2.sam2_image_predictor import SAM2ImagePredictor
 # from FLORENCE
 import spaces
 import supervision as sv
 from utils.sam import load_sam_image_model, run_sam_inference
 try:
     import xformers
     import xformers.ops
 model = model.to(device)
 model.eval()
+SAM_IMAGE_MODEL = load_sam_image_model(device=device)
 # Change UNet
 with torch.no_grad():
     compressed_img = np.array(Image.open("compressed_image.jpg"))
     return compressed_img
 @spaces.GPU(duration=60)
+@torch.inference_mode
+@hydra.main(config_path="/home/user/app/configs", config_name="sam2_hiera_l")
 def process_image(input_image, input_text):
     """Main processing function for the Gradio interface"""
     OUTPUT_DIR = Path("outputs/grounded_sam2_dinox_demo")
     OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
     # Initialize DDS client
     config = Config(API_TOKEN)
     client = Client(config)
             return annotated_frame, cropped_rgba, gr.update(visible=True), gr.update(visible=True)
+    else:
+        # Run DINO-X detection
+        task = DinoxTask(
+            image_url=image_url,
+            prompts=[TextPrompt(text=input_text)]
+        )
+        client.run_task(task)
+        result = task.result
+        objects = result.objects
+        for obj in objects:
+            input_boxes.append(obj.bbox)
+            confidences.append(obj.score)
+            cls_name = obj.category.lower().strip()
+            class_names.append(cls_name)
+            class_ids.append(class_name_to_id[cls_name])
+        input_boxes = np.array(input_boxes)
+        class_ids = np.array(class_ids)
+        # Initialize SAM2
+        torch.autocast(device_type=DEVICE, dtype=torch.bfloat16).__enter__()
+        if torch.cuda.get_device_properties(0).major >= 8:
+            torch.backends.cuda.matmul.allow_tf32 = True
+            torch.backends.cudnn.allow_tf32 = True
+        sam2_model = build_sam2(SAM2_MODEL_CONFIG, SAM2_CHECKPOINT, device=DEVICE)
+        sam2_predictor = SAM2ImagePredictor(sam2_model)
+        sam2_predictor.set_image(input_image)
+        # sam2_predictor = run_sam_inference(SAM_IMAGE_MODEL, input_image, detections)
+        # Get masks from SAM2
+        masks, scores, logits = sam2_predictor.predict(
+            point_coords=None,
+            point_labels=None,
+            box=input_boxes,
+            multimask_output=False,
+        )
+        if masks.ndim == 4:
+            masks = masks.squeeze(1)
+        # Create visualization
+        labels = [f"{class_name} {confidence:.2f}"
+                 for class_name, confidence in zip(class_names, confidences)]
+        detections = sv.Detections(
+            xyxy=input_boxes,
+            mask=masks.astype(bool),
+            class_id=class_ids
+        )
+        box_annotator = sv.BoxAnnotator()
+        label_annotator = sv.LabelAnnotator()
+        mask_annotator = sv.MaskAnnotator()
+        annotated_frame = input_image.copy()
+        annotated_frame = box_annotator.annotate(scene=annotated_frame, detections=detections)
+        annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=detections, labels=labels)
+        annotated_frame = mask_annotator.annotate(scene=annotated_frame, detections=detections)
+        # Create transparent mask for first detected object
+        if len(detections) > 0:
+            # Get first mask
+            first_mask = detections.mask[0]
+            # Get original RGB image
+            img = input_image.copy()
+            H, W, C = img.shape
+            # Create RGBA image
+            alpha = np.zeros((H, W, 1), dtype=np.uint8)
+            alpha[first_mask] = 255
+            rgba = np.dstack((img, alpha)).astype(np.uint8)
+            # Crop to mask bounds to minimize image size
+            y_indices, x_indices = np.where(first_mask)
+            y_min, y_max = y_indices.min(), y_indices.max()
+            x_min, x_max = x_indices.min(), x_indices.max()
+            # Crop the RGBA image
+            cropped_rgba = rgba[y_min:y_max+1, x_min:x_max+1]
+            # Set extracted foreground for mask mover
+            mask_mover.set_extracted_fg(cropped_rgba)
+            return annotated_frame, cropped_rgba, gr.update(visible=True), gr.update(visible=True)
+        return annotated_frame, None, gr.update(visible=False), gr.update(visible=False)
 block = gr.Blocks().queue()