Spaces:
Running
Running
# import gradio as gr | |
# import io | |
# from ultralytics import YOLO | |
# import cv2 | |
# import numpy as np | |
# from PIL import Image | |
# import json | |
# # Load your custom YOLO model | |
# model = YOLO("fentanyl_oft.pt") | |
# # model = YOLO("avatar_ckpt.pt") | |
# def detect_keypoints(image): | |
# """ | |
# Run YOLO inference and return keypoints data | |
# """ | |
# try: | |
# # Convert PIL Image to numpy array | |
# if isinstance(image, Image.Image): | |
# image_np = np.array(image) | |
# image_cv2 = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR) | |
# else: | |
# image_cv2 = image | |
# # Run inference | |
# results = model.predict( | |
# source=image_cv2, | |
# conf=0.05, | |
# iou=0.7, | |
# max_det=1, | |
# imgsz=1440, | |
# device='cpu', | |
# verbose=False | |
# ) | |
# keypoints_data = [] | |
# if results and len(results) > 0: | |
# result = results[0] | |
# if result.keypoints is not None: | |
# kpts = result.keypoints.xy.cpu().numpy() | |
# conf = result.keypoints.conf.cpu().numpy() | |
# for i in range(kpts.shape[1]): | |
# if i < len(kpts[0]): | |
# x, y = kpts[0][i] | |
# confidence = conf[0][i] if i < len(conf[0]) else 0.0 | |
# keypoints_data.append({ | |
# "id": i, | |
# "x": float(x), | |
# "y": float(y), | |
# "confidence": float(confidence) | |
# }) | |
# return { | |
# "success": True, | |
# "keypoints": keypoints_data, | |
# "image_width": image_cv2.shape[1], | |
# "image_height": image_cv2.shape[0], | |
# "num_keypoints": len(keypoints_data) | |
# } | |
# except Exception as e: | |
# return {"success": False, "error": str(e)} | |
# # Create Gradio interface with API access enabled | |
# iface = gr.Interface( | |
# fn=detect_keypoints, | |
# inputs=gr.Image(type="pil"), | |
# outputs=gr.JSON(), | |
# title="YOLO Keypoint Detection", | |
# description="Upload an image to detect keypoints using custom YOLO model", | |
# api_name="predict" # This enables API access at /api/predict | |
# ) | |
# # Launch with API enabled | |
# if __name__ == "__main__": | |
# iface.launch(share=False) | |
# import gradio as gr | |
# import io | |
# from ultralytics import YOLO | |
# import cv2 | |
# import numpy as np | |
# from PIL import Image | |
# import json | |
# # Load both models | |
# single_animal_model = YOLO("fentanyl_oft.pt") # Single animal model | |
# multi_animal_model = YOLO("avatar_ckpt.pt") # Multi-animal model | |
# def detect_keypoints(image, mode="single"): | |
# """ | |
# Run YOLO inference and return keypoints data | |
# Args: | |
# image: PIL Image | |
# mode: "single" or "multi" to determine which model to use | |
# """ | |
# try: | |
# # Select model and parameters based on mode | |
# if mode == "multi": | |
# model = multi_animal_model | |
# imgsz = 1504 | |
# max_det = 5 | |
# else: # default to single | |
# model = single_animal_model | |
# imgsz = 1440 | |
# max_det = 1 | |
# # Convert PIL Image to numpy array | |
# if isinstance(image, Image.Image): | |
# image_np = np.array(image) | |
# image_cv2 = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR) | |
# else: | |
# image_cv2 = image | |
# # Run inference with mode-specific parameters | |
# results = model.predict( | |
# source=image_cv2, | |
# conf=0.05, | |
# iou=0.7, | |
# max_det=max_det, | |
# imgsz=imgsz, | |
# device='cpu', | |
# verbose=False | |
# ) | |
# keypoints_data = [] | |
# if results and len(results) > 0: | |
# result = results[0] | |
# if result.keypoints is not None: | |
# kpts = result.keypoints.xy.cpu().numpy() | |
# conf = result.keypoints.conf.cpu().numpy() | |
# # Handle multiple detections (for multi-animal mode) | |
# for detection_idx in range(kpts.shape[0]): | |
# detection_keypoints = [] | |
# for i in range(kpts.shape[1]): | |
# if i < len(kpts[detection_idx]): | |
# x, y = kpts[detection_idx][i] | |
# confidence = conf[detection_idx][i] if i < len(conf[detection_idx]) else 0.0 | |
# detection_keypoints.append({ | |
# "id": i, | |
# "x": float(x), | |
# "y": float(y), | |
# "confidence": float(confidence) | |
# }) | |
# # For single animal mode, flatten the structure | |
# if mode == "single": | |
# keypoints_data = detection_keypoints | |
# break # Only take first detection | |
# else: | |
# # For multi-animal mode, keep detection structure | |
# keypoints_data.append({ | |
# "detection_id": detection_idx, | |
# "keypoints": detection_keypoints | |
# }) | |
# return { | |
# "success": True, | |
# "mode": mode, | |
# "keypoints": keypoints_data, | |
# "image_width": image_cv2.shape[1], | |
# "image_height": image_cv2.shape[0], | |
# "num_detections": len(keypoints_data) if mode == "multi" else (1 if keypoints_data else 0), | |
# "num_keypoints": len(keypoints_data) if mode == "single" else sum(len(det["keypoints"]) for det in keypoints_data) if mode == "multi" else 0 | |
# } | |
# except Exception as e: | |
# return {"success": False, "error": str(e), "mode": mode} | |
# # Create Gradio interface with mode parameter | |
# iface = gr.Interface( | |
# fn=detect_keypoints, | |
# inputs=[ | |
# gr.Image(type="pil"), | |
# gr.Dropdown(choices=["single", "multi"], value="single", label="Detection Mode") | |
# ], | |
# outputs=gr.JSON(), | |
# title="YOLO Keypoint Detection", | |
# description="Upload an image to detect keypoints using custom YOLO model. Choose single or multi-animal mode.", | |
# api_name="predict" # This enables API access at /api/predict | |
# ) | |
# # Launch with API enabled | |
# if __name__ == "__main__": | |
# iface.launch(share=False) | |
import gradio as gr | |
import io | |
from ultralytics import YOLO | |
import cv2 | |
import numpy as np | |
from PIL import Image | |
import json | |
# Load both models | |
single_animal_model = YOLO("fentanyl_oft.pt") # Single animal model | |
multi_animal_model = YOLO("avatar_ckpt.pt") # Multi-animal model | |
def detect_keypoints(image, mode="single"): | |
""" | |
Run YOLO inference and return keypoints and bounding boxes data | |
Args: | |
image: PIL Image | |
mode: "single" or "multi" to determine which model to use | |
""" | |
try: | |
# Select model and parameters based on mode | |
if mode == "multi": | |
model = multi_animal_model | |
imgsz = 1504 | |
max_det = 5 | |
else: # default to single | |
model = single_animal_model | |
imgsz = 1440 | |
max_det = 1 | |
# Convert PIL Image to numpy array | |
if isinstance(image, Image.Image): | |
image_np = np.array(image) | |
image_cv2 = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR) | |
else: | |
image_cv2 = image | |
# Run inference with mode-specific parameters | |
results = model.predict( | |
source=image_cv2, | |
conf=0.05, | |
iou=0.7, | |
max_det=max_det, | |
imgsz=imgsz, | |
device='cpu', | |
verbose=False | |
) | |
detections_data = [] | |
if results and len(results) > 0: | |
result = results[0] | |
# Extract bounding boxes | |
bboxes = [] | |
if result.boxes is not None: | |
boxes_xyxy = result.boxes.xyxy.cpu().numpy() # [x1, y1, x2, y2] | |
boxes_conf = result.boxes.conf.cpu().numpy() | |
for i in range(len(boxes_xyxy)): | |
x1, y1, x2, y2 = boxes_xyxy[i] | |
confidence = float(boxes_conf[i]) | |
bboxes.append({ | |
"x1": float(x1), | |
"y1": float(y1), | |
"x2": float(x2), | |
"y2": float(y2), | |
"confidence": confidence | |
}) | |
# Extract keypoints | |
keypoints_per_detection = [] | |
if result.keypoints is not None: | |
kpts = result.keypoints.xy.cpu().numpy() | |
conf = result.keypoints.conf.cpu().numpy() | |
# Handle multiple detections (for multi-animal mode) | |
for detection_idx in range(kpts.shape[0]): | |
detection_keypoints = [] | |
for i in range(kpts.shape[1]): | |
if i < len(kpts[detection_idx]): | |
x, y = kpts[detection_idx][i] | |
confidence = conf[detection_idx][i] if i < len(conf[detection_idx]) else 0.0 | |
detection_keypoints.append({ | |
"id": i, | |
"x": float(x), | |
"y": float(y), | |
"confidence": float(confidence) | |
}) | |
keypoints_per_detection.append(detection_keypoints) | |
# Combine keypoints and bboxes for each detection | |
max_detections = max(len(bboxes), len(keypoints_per_detection)) | |
for detection_idx in range(max_detections): | |
detection_data = { | |
"detection_id": detection_idx | |
} | |
# Add keypoints if available for this detection | |
if detection_idx < len(keypoints_per_detection): | |
detection_data["keypoints"] = keypoints_per_detection[detection_idx] | |
else: | |
detection_data["keypoints"] = [] | |
# Add bbox if available for this detection | |
if detection_idx < len(bboxes): | |
detection_data["bbox"] = bboxes[detection_idx] | |
else: | |
detection_data["bbox"] = None | |
detections_data.append(detection_data) | |
# Format response based on mode | |
if mode == "single": | |
# For single animal mode, flatten the structure (legacy compatibility) | |
if detections_data: | |
single_detection = detections_data[0] | |
return { | |
"success": True, | |
"mode": mode, | |
"keypoints": single_detection["keypoints"], | |
"bbox": single_detection["bbox"], | |
"image_width": image_cv2.shape[1], | |
"image_height": image_cv2.shape[0], | |
"num_detections": 1 if single_detection["keypoints"] or single_detection["bbox"] else 0, | |
"num_keypoints": len(single_detection["keypoints"]) | |
} | |
else: | |
return { | |
"success": True, | |
"mode": mode, | |
"keypoints": [], | |
"bbox": None, | |
"image_width": image_cv2.shape[1], | |
"image_height": image_cv2.shape[0], | |
"num_detections": 0, | |
"num_keypoints": 0 | |
} | |
else: | |
# For multi-animal mode, return all detections | |
return { | |
"success": True, | |
"mode": mode, | |
"detections": detections_data, | |
"image_width": image_cv2.shape[1], | |
"image_height": image_cv2.shape[0], | |
"num_detections": len(detections_data), | |
"num_keypoints": sum(len(det["keypoints"]) for det in detections_data) | |
} | |
except Exception as e: | |
return {"success": False, "error": str(e), "mode": mode} | |
# Create Gradio interface with mode parameter | |
iface = gr.Interface( | |
fn=detect_keypoints, | |
inputs=[ | |
gr.Image(type="pil"), | |
gr.Dropdown(choices=["single", "multi"], value="single", label="Detection Mode") | |
], | |
outputs=gr.JSON(), | |
title="YOLO Keypoint Detection", | |
description="Upload an image to detect keypoints and bounding boxes using custom YOLO model. Choose single or multi-animal mode.", | |
api_name="predict" # This enables API access at /api/predict | |
) | |
# Launch with API enabled | |
if __name__ == "__main__": | |
iface.launch(share=False) |