import gradio as gr from PIL import Image # Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("object-detection", model="facebook/detr-resnet-101") from PIL import Image, ImageDraw, ImageFont def merge_bboxes(bboxes, threshold=50): """ Merges bounding boxes that are close to each other into one. :param bboxes: List of bounding boxes (xmin, ymin, xmax, ymax) :param threshold: Maximum distance between boxes to be considered as part of the same object :return: List of merged bounding boxes """ merged_bboxes = [] for bbox in bboxes: added = False for i, merged_bbox in enumerate(merged_bboxes): if (abs(bbox[0] - merged_bbox[0]) < threshold and abs(bbox[1] - merged_bbox[1]) < threshold) or \ (abs(bbox[2] - merged_bbox[2]) < threshold and abs(bbox[3] - merged_bbox[3]) < threshold): # Merge the boxes by extending the boundaries new_bbox = ( min(bbox[0], merged_bbox[0]), min(bbox[1], merged_bbox[1]), max(bbox[2], merged_bbox[2]), max(bbox[3], merged_bbox[3]) ) merged_bboxes[i] = new_bbox added = True break if not added: merged_bboxes.append(bbox) return merged_bboxes def draw_bounding_boxes(image, detections): """ Draws bounding boxes on the given PIL image based on the object detection results. Merges close bounding boxes to avoid duplicate detections. :param image: PIL Image object :param detections: List of dictionaries containing object detection results :return: PIL Image with bounding boxes """ draw = ImageDraw.Draw(image) try: font = ImageFont.truetype("arial.ttf", 40) # Load font (adjust size as needed) except: font = ImageFont.load_default() # Use default font if Arial is not available bboxes = [] labels = [] # Collect all bounding boxes and labels for obj in detections: box = obj["box"] label = f"{obj['label']} ({obj['score']:.2f})" xmin, ymin, xmax, ymax = box["xmin"], box["ymin"], box["xmax"], box["ymax"] bboxes.append((xmin, ymin, xmax, ymax)) labels.append(label) # Merge close bounding boxes merged_bboxes = merge_bboxes(bboxes) # Draw bounding boxes and labels for merged boxes for idx, bbox in enumerate(merged_bboxes): xmin, ymin, xmax, ymax = bbox label = labels[idx] # Draw bounding box draw.rectangle([xmin, ymin, xmax, ymax], outline="red", width=5) # Adjust label placement to avoid overlap label_bbox = draw.textbbox((xmin, ymin), label, font=font) label_width, label_height = label_bbox[2] - label_bbox[0], label_bbox[3] - label_bbox[1] label_x = xmin label_y = ymin - label_height if ymin - label_height > 0 else ymin + 5 # Adjust above or below box # Draw label background to make text more visible draw.rectangle([label_x, label_y, label_x + label_width, label_y + label_height], fill="red") draw.text((label_x, label_y), label, fill="white", font=font) return image # Example usage: # image = Image.open("your_image.jpg") # detections = [{'score': 0.92, 'label': 'dog', 'box': {'xmin': 2929, 'ymin': 1297, 'xmax': 5067, 'ymax': 3693}}, ...] # image_with_boxes = draw_bounding_boxes(image, detections) # image_with_boxes.show() def detect_object(image): raw_imagee = image output=pipe(raw_imagee) processed_image = draw_bounding_boxes(raw_imagee, output) return processed_image demo = gr.Interface(fn = detect_object, inputs=[gr.Image(label='Please upload the file to detect the data', type='pil')], outputs=[gr.Image(label='Detected object', type='pil')], title= 'Object Detector', description = 'This application will be used to detect image' ) demo.launch(share=True)