detect_kpts / app.py
ShinSeungJ's picture
Update app.py
8e29ff1 verified
# import gradio as gr
# import io
# from ultralytics import YOLO
# import cv2
# import numpy as np
# from PIL import Image
# import json
# # Load your custom YOLO model
# model = YOLO("fentanyl_oft.pt")
# # model = YOLO("avatar_ckpt.pt")
# def detect_keypoints(image):
# """
# Run YOLO inference and return keypoints data
# """
# try:
# # Convert PIL Image to numpy array
# if isinstance(image, Image.Image):
# image_np = np.array(image)
# image_cv2 = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
# else:
# image_cv2 = image
# # Run inference
# results = model.predict(
# source=image_cv2,
# conf=0.05,
# iou=0.7,
# max_det=1,
# imgsz=1440,
# device='cpu',
# verbose=False
# )
# keypoints_data = []
# if results and len(results) > 0:
# result = results[0]
# if result.keypoints is not None:
# kpts = result.keypoints.xy.cpu().numpy()
# conf = result.keypoints.conf.cpu().numpy()
# for i in range(kpts.shape[1]):
# if i < len(kpts[0]):
# x, y = kpts[0][i]
# confidence = conf[0][i] if i < len(conf[0]) else 0.0
# keypoints_data.append({
# "id": i,
# "x": float(x),
# "y": float(y),
# "confidence": float(confidence)
# })
# return {
# "success": True,
# "keypoints": keypoints_data,
# "image_width": image_cv2.shape[1],
# "image_height": image_cv2.shape[0],
# "num_keypoints": len(keypoints_data)
# }
# except Exception as e:
# return {"success": False, "error": str(e)}
# # Create Gradio interface with API access enabled
# iface = gr.Interface(
# fn=detect_keypoints,
# inputs=gr.Image(type="pil"),
# outputs=gr.JSON(),
# title="YOLO Keypoint Detection",
# description="Upload an image to detect keypoints using custom YOLO model",
# api_name="predict" # This enables API access at /api/predict
# )
# # Launch with API enabled
# if __name__ == "__main__":
# iface.launch(share=False)
# import gradio as gr
# import io
# from ultralytics import YOLO
# import cv2
# import numpy as np
# from PIL import Image
# import json
# # Load both models
# single_animal_model = YOLO("fentanyl_oft.pt") # Single animal model
# multi_animal_model = YOLO("avatar_ckpt.pt") # Multi-animal model
# def detect_keypoints(image, mode="single"):
# """
# Run YOLO inference and return keypoints data
# Args:
# image: PIL Image
# mode: "single" or "multi" to determine which model to use
# """
# try:
# # Select model and parameters based on mode
# if mode == "multi":
# model = multi_animal_model
# imgsz = 1504
# max_det = 5
# else: # default to single
# model = single_animal_model
# imgsz = 1440
# max_det = 1
# # Convert PIL Image to numpy array
# if isinstance(image, Image.Image):
# image_np = np.array(image)
# image_cv2 = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
# else:
# image_cv2 = image
# # Run inference with mode-specific parameters
# results = model.predict(
# source=image_cv2,
# conf=0.05,
# iou=0.7,
# max_det=max_det,
# imgsz=imgsz,
# device='cpu',
# verbose=False
# )
# keypoints_data = []
# if results and len(results) > 0:
# result = results[0]
# if result.keypoints is not None:
# kpts = result.keypoints.xy.cpu().numpy()
# conf = result.keypoints.conf.cpu().numpy()
# # Handle multiple detections (for multi-animal mode)
# for detection_idx in range(kpts.shape[0]):
# detection_keypoints = []
# for i in range(kpts.shape[1]):
# if i < len(kpts[detection_idx]):
# x, y = kpts[detection_idx][i]
# confidence = conf[detection_idx][i] if i < len(conf[detection_idx]) else 0.0
# detection_keypoints.append({
# "id": i,
# "x": float(x),
# "y": float(y),
# "confidence": float(confidence)
# })
# # For single animal mode, flatten the structure
# if mode == "single":
# keypoints_data = detection_keypoints
# break # Only take first detection
# else:
# # For multi-animal mode, keep detection structure
# keypoints_data.append({
# "detection_id": detection_idx,
# "keypoints": detection_keypoints
# })
# return {
# "success": True,
# "mode": mode,
# "keypoints": keypoints_data,
# "image_width": image_cv2.shape[1],
# "image_height": image_cv2.shape[0],
# "num_detections": len(keypoints_data) if mode == "multi" else (1 if keypoints_data else 0),
# "num_keypoints": len(keypoints_data) if mode == "single" else sum(len(det["keypoints"]) for det in keypoints_data) if mode == "multi" else 0
# }
# except Exception as e:
# return {"success": False, "error": str(e), "mode": mode}
# # Create Gradio interface with mode parameter
# iface = gr.Interface(
# fn=detect_keypoints,
# inputs=[
# gr.Image(type="pil"),
# gr.Dropdown(choices=["single", "multi"], value="single", label="Detection Mode")
# ],
# outputs=gr.JSON(),
# title="YOLO Keypoint Detection",
# description="Upload an image to detect keypoints using custom YOLO model. Choose single or multi-animal mode.",
# api_name="predict" # This enables API access at /api/predict
# )
# # Launch with API enabled
# if __name__ == "__main__":
# iface.launch(share=False)
import gradio as gr
import io
from ultralytics import YOLO
import cv2
import numpy as np
from PIL import Image
import json
# Load both models
single_animal_model = YOLO("fentanyl_oft.pt") # Single animal model
multi_animal_model = YOLO("avatar_ckpt.pt") # Multi-animal model
def detect_keypoints(image, mode="single"):
"""
Run YOLO inference and return keypoints and bounding boxes data
Args:
image: PIL Image
mode: "single" or "multi" to determine which model to use
"""
try:
# Select model and parameters based on mode
if mode == "multi":
model = multi_animal_model
imgsz = 1504
max_det = 5
else: # default to single
model = single_animal_model
imgsz = 1440
max_det = 1
# Convert PIL Image to numpy array
if isinstance(image, Image.Image):
image_np = np.array(image)
image_cv2 = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
else:
image_cv2 = image
# Run inference with mode-specific parameters
results = model.predict(
source=image_cv2,
conf=0.05,
iou=0.7,
max_det=max_det,
imgsz=imgsz,
device='cpu',
verbose=False
)
detections_data = []
if results and len(results) > 0:
result = results[0]
# Extract bounding boxes
bboxes = []
if result.boxes is not None:
boxes_xyxy = result.boxes.xyxy.cpu().numpy() # [x1, y1, x2, y2]
boxes_conf = result.boxes.conf.cpu().numpy()
for i in range(len(boxes_xyxy)):
x1, y1, x2, y2 = boxes_xyxy[i]
confidence = float(boxes_conf[i])
bboxes.append({
"x1": float(x1),
"y1": float(y1),
"x2": float(x2),
"y2": float(y2),
"confidence": confidence
})
# Extract keypoints
keypoints_per_detection = []
if result.keypoints is not None:
kpts = result.keypoints.xy.cpu().numpy()
conf = result.keypoints.conf.cpu().numpy()
# Handle multiple detections (for multi-animal mode)
for detection_idx in range(kpts.shape[0]):
detection_keypoints = []
for i in range(kpts.shape[1]):
if i < len(kpts[detection_idx]):
x, y = kpts[detection_idx][i]
confidence = conf[detection_idx][i] if i < len(conf[detection_idx]) else 0.0
detection_keypoints.append({
"id": i,
"x": float(x),
"y": float(y),
"confidence": float(confidence)
})
keypoints_per_detection.append(detection_keypoints)
# Combine keypoints and bboxes for each detection
max_detections = max(len(bboxes), len(keypoints_per_detection))
for detection_idx in range(max_detections):
detection_data = {
"detection_id": detection_idx
}
# Add keypoints if available for this detection
if detection_idx < len(keypoints_per_detection):
detection_data["keypoints"] = keypoints_per_detection[detection_idx]
else:
detection_data["keypoints"] = []
# Add bbox if available for this detection
if detection_idx < len(bboxes):
detection_data["bbox"] = bboxes[detection_idx]
else:
detection_data["bbox"] = None
detections_data.append(detection_data)
# Format response based on mode
if mode == "single":
# For single animal mode, flatten the structure (legacy compatibility)
if detections_data:
single_detection = detections_data[0]
return {
"success": True,
"mode": mode,
"keypoints": single_detection["keypoints"],
"bbox": single_detection["bbox"],
"image_width": image_cv2.shape[1],
"image_height": image_cv2.shape[0],
"num_detections": 1 if single_detection["keypoints"] or single_detection["bbox"] else 0,
"num_keypoints": len(single_detection["keypoints"])
}
else:
return {
"success": True,
"mode": mode,
"keypoints": [],
"bbox": None,
"image_width": image_cv2.shape[1],
"image_height": image_cv2.shape[0],
"num_detections": 0,
"num_keypoints": 0
}
else:
# For multi-animal mode, return all detections
return {
"success": True,
"mode": mode,
"detections": detections_data,
"image_width": image_cv2.shape[1],
"image_height": image_cv2.shape[0],
"num_detections": len(detections_data),
"num_keypoints": sum(len(det["keypoints"]) for det in detections_data)
}
except Exception as e:
return {"success": False, "error": str(e), "mode": mode}
# Create Gradio interface with mode parameter
iface = gr.Interface(
fn=detect_keypoints,
inputs=[
gr.Image(type="pil"),
gr.Dropdown(choices=["single", "multi"], value="single", label="Detection Mode")
],
outputs=gr.JSON(),
title="YOLO Keypoint Detection",
description="Upload an image to detect keypoints and bounding boxes using custom YOLO model. Choose single or multi-animal mode.",
api_name="predict" # This enables API access at /api/predict
)
# Launch with API enabled
if __name__ == "__main__":
iface.launch(share=False)