# import gradio as gr # import io # from ultralytics import YOLO # import cv2 # import numpy as np # from PIL import Image # import json # # Load your custom YOLO model # model = YOLO("fentanyl_oft.pt") # # model = YOLO("avatar_ckpt.pt") # def detect_keypoints(image): # """ # Run YOLO inference and return keypoints data # """ # try: # # Convert PIL Image to numpy array # if isinstance(image, Image.Image): # image_np = np.array(image) # image_cv2 = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR) # else: # image_cv2 = image # # Run inference # results = model.predict( # source=image_cv2, # conf=0.05, # iou=0.7, # max_det=1, # imgsz=1440, # device='cpu', # verbose=False # ) # keypoints_data = [] # if results and len(results) > 0: # result = results[0] # if result.keypoints is not None: # kpts = result.keypoints.xy.cpu().numpy() # conf = result.keypoints.conf.cpu().numpy() # for i in range(kpts.shape[1]): # if i < len(kpts[0]): # x, y = kpts[0][i] # confidence = conf[0][i] if i < len(conf[0]) else 0.0 # keypoints_data.append({ # "id": i, # "x": float(x), # "y": float(y), # "confidence": float(confidence) # }) # return { # "success": True, # "keypoints": keypoints_data, # "image_width": image_cv2.shape[1], # "image_height": image_cv2.shape[0], # "num_keypoints": len(keypoints_data) # } # except Exception as e: # return {"success": False, "error": str(e)} # # Create Gradio interface with API access enabled # iface = gr.Interface( # fn=detect_keypoints, # inputs=gr.Image(type="pil"), # outputs=gr.JSON(), # title="YOLO Keypoint Detection", # description="Upload an image to detect keypoints using custom YOLO model", # api_name="predict" # This enables API access at /api/predict # ) # # Launch with API enabled # if __name__ == "__main__": # iface.launch(share=False) # import gradio as gr # import io # from ultralytics import YOLO # import cv2 # import numpy as np # from PIL import Image # import json # # Load both models # single_animal_model = YOLO("fentanyl_oft.pt") # Single animal model # multi_animal_model = YOLO("avatar_ckpt.pt") # Multi-animal model # def detect_keypoints(image, mode="single"): # """ # Run YOLO inference and return keypoints data # Args: # image: PIL Image # mode: "single" or "multi" to determine which model to use # """ # try: # # Select model and parameters based on mode # if mode == "multi": # model = multi_animal_model # imgsz = 1504 # max_det = 5 # else: # default to single # model = single_animal_model # imgsz = 1440 # max_det = 1 # # Convert PIL Image to numpy array # if isinstance(image, Image.Image): # image_np = np.array(image) # image_cv2 = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR) # else: # image_cv2 = image # # Run inference with mode-specific parameters # results = model.predict( # source=image_cv2, # conf=0.05, # iou=0.7, # max_det=max_det, # imgsz=imgsz, # device='cpu', # verbose=False # ) # keypoints_data = [] # if results and len(results) > 0: # result = results[0] # if result.keypoints is not None: # kpts = result.keypoints.xy.cpu().numpy() # conf = result.keypoints.conf.cpu().numpy() # # Handle multiple detections (for multi-animal mode) # for detection_idx in range(kpts.shape[0]): # detection_keypoints = [] # for i in range(kpts.shape[1]): # if i < len(kpts[detection_idx]): # x, y = kpts[detection_idx][i] # confidence = conf[detection_idx][i] if i < len(conf[detection_idx]) else 0.0 # detection_keypoints.append({ # "id": i, # "x": float(x), # "y": float(y), # "confidence": float(confidence) # }) # # For single animal mode, flatten the structure # if mode == "single": # keypoints_data = detection_keypoints # break # Only take first detection # else: # # For multi-animal mode, keep detection structure # keypoints_data.append({ # "detection_id": detection_idx, # "keypoints": detection_keypoints # }) # return { # "success": True, # "mode": mode, # "keypoints": keypoints_data, # "image_width": image_cv2.shape[1], # "image_height": image_cv2.shape[0], # "num_detections": len(keypoints_data) if mode == "multi" else (1 if keypoints_data else 0), # "num_keypoints": len(keypoints_data) if mode == "single" else sum(len(det["keypoints"]) for det in keypoints_data) if mode == "multi" else 0 # } # except Exception as e: # return {"success": False, "error": str(e), "mode": mode} # # Create Gradio interface with mode parameter # iface = gr.Interface( # fn=detect_keypoints, # inputs=[ # gr.Image(type="pil"), # gr.Dropdown(choices=["single", "multi"], value="single", label="Detection Mode") # ], # outputs=gr.JSON(), # title="YOLO Keypoint Detection", # description="Upload an image to detect keypoints using custom YOLO model. Choose single or multi-animal mode.", # api_name="predict" # This enables API access at /api/predict # ) # # Launch with API enabled # if __name__ == "__main__": # iface.launch(share=False) import gradio as gr import io from ultralytics import YOLO import cv2 import numpy as np from PIL import Image import json # Load both models single_animal_model = YOLO("fentanyl_oft.pt") # Single animal model multi_animal_model = YOLO("avatar_ckpt.pt") # Multi-animal model def detect_keypoints(image, mode="single"): """ Run YOLO inference and return keypoints and bounding boxes data Args: image: PIL Image mode: "single" or "multi" to determine which model to use """ try: # Select model and parameters based on mode if mode == "multi": model = multi_animal_model imgsz = 1504 max_det = 5 else: # default to single model = single_animal_model imgsz = 1440 max_det = 1 # Convert PIL Image to numpy array if isinstance(image, Image.Image): image_np = np.array(image) image_cv2 = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR) else: image_cv2 = image # Run inference with mode-specific parameters results = model.predict( source=image_cv2, conf=0.05, iou=0.7, max_det=max_det, imgsz=imgsz, device='cpu', verbose=False ) detections_data = [] if results and len(results) > 0: result = results[0] # Extract bounding boxes bboxes = [] if result.boxes is not None: boxes_xyxy = result.boxes.xyxy.cpu().numpy() # [x1, y1, x2, y2] boxes_conf = result.boxes.conf.cpu().numpy() for i in range(len(boxes_xyxy)): x1, y1, x2, y2 = boxes_xyxy[i] confidence = float(boxes_conf[i]) bboxes.append({ "x1": float(x1), "y1": float(y1), "x2": float(x2), "y2": float(y2), "confidence": confidence }) # Extract keypoints keypoints_per_detection = [] if result.keypoints is not None: kpts = result.keypoints.xy.cpu().numpy() conf = result.keypoints.conf.cpu().numpy() # Handle multiple detections (for multi-animal mode) for detection_idx in range(kpts.shape[0]): detection_keypoints = [] for i in range(kpts.shape[1]): if i < len(kpts[detection_idx]): x, y = kpts[detection_idx][i] confidence = conf[detection_idx][i] if i < len(conf[detection_idx]) else 0.0 detection_keypoints.append({ "id": i, "x": float(x), "y": float(y), "confidence": float(confidence) }) keypoints_per_detection.append(detection_keypoints) # Combine keypoints and bboxes for each detection max_detections = max(len(bboxes), len(keypoints_per_detection)) for detection_idx in range(max_detections): detection_data = { "detection_id": detection_idx } # Add keypoints if available for this detection if detection_idx < len(keypoints_per_detection): detection_data["keypoints"] = keypoints_per_detection[detection_idx] else: detection_data["keypoints"] = [] # Add bbox if available for this detection if detection_idx < len(bboxes): detection_data["bbox"] = bboxes[detection_idx] else: detection_data["bbox"] = None detections_data.append(detection_data) # Format response based on mode if mode == "single": # For single animal mode, flatten the structure (legacy compatibility) if detections_data: single_detection = detections_data[0] return { "success": True, "mode": mode, "keypoints": single_detection["keypoints"], "bbox": single_detection["bbox"], "image_width": image_cv2.shape[1], "image_height": image_cv2.shape[0], "num_detections": 1 if single_detection["keypoints"] or single_detection["bbox"] else 0, "num_keypoints": len(single_detection["keypoints"]) } else: return { "success": True, "mode": mode, "keypoints": [], "bbox": None, "image_width": image_cv2.shape[1], "image_height": image_cv2.shape[0], "num_detections": 0, "num_keypoints": 0 } else: # For multi-animal mode, return all detections return { "success": True, "mode": mode, "detections": detections_data, "image_width": image_cv2.shape[1], "image_height": image_cv2.shape[0], "num_detections": len(detections_data), "num_keypoints": sum(len(det["keypoints"]) for det in detections_data) } except Exception as e: return {"success": False, "error": str(e), "mode": mode} # Create Gradio interface with mode parameter iface = gr.Interface( fn=detect_keypoints, inputs=[ gr.Image(type="pil"), gr.Dropdown(choices=["single", "multi"], value="single", label="Detection Mode") ], outputs=gr.JSON(), title="YOLO Keypoint Detection", description="Upload an image to detect keypoints and bounding boxes using custom YOLO model. Choose single or multi-animal mode.", api_name="predict" # This enables API access at /api/predict ) # Launch with API enabled if __name__ == "__main__": iface.launch(share=False)