Spaces:

ShinSeungJ
/

detect_kpts

Running

App Files Files Community

detect_kpts / app.py

ShinSeungJ

Update app.py

8e29ff1 verified 12 days ago

raw

history blame contribute delete

13.2 kB

	# import gradio as gr
	# import io
	# from ultralytics import YOLO
	# import cv2
	# import numpy as np
	# from PIL import Image
	# import json

	# # Load your custom YOLO model
	# model = YOLO("fentanyl_oft.pt")
	# # model = YOLO("avatar_ckpt.pt")

	# def detect_keypoints(image):
	# """
	# Run YOLO inference and return keypoints data
	# """
	# try:
	# # Convert PIL Image to numpy array
	# if isinstance(image, Image.Image):
	# image_np = np.array(image)
	# image_cv2 = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
	# else:
	# image_cv2 = image

	# # Run inference
	# results = model.predict(
	# source=image_cv2,
	# conf=0.05,
	# iou=0.7,
	# max_det=1,
	# imgsz=1440,
	# device='cpu',
	# verbose=False
	# )

	# keypoints_data = []
	# if results and len(results) > 0:
	# result = results[0]
	# if result.keypoints is not None:
	# kpts = result.keypoints.xy.cpu().numpy()
	# conf = result.keypoints.conf.cpu().numpy()

	# for i in range(kpts.shape[1]):
	# if i < len(kpts[0]):
	# x, y = kpts[0][i]
	# confidence = conf[0][i] if i < len(conf[0]) else 0.0
	# keypoints_data.append({
	# "id": i,
	# "x": float(x),
	# "y": float(y),
	# "confidence": float(confidence)
	# })

	# return {
	# "success": True,
	# "keypoints": keypoints_data,
	# "image_width": image_cv2.shape[1],
	# "image_height": image_cv2.shape[0],
	# "num_keypoints": len(keypoints_data)
	# }

	# except Exception as e:
	# return {"success": False, "error": str(e)}

	# # Create Gradio interface with API access enabled
	# iface = gr.Interface(
	# fn=detect_keypoints,
	# inputs=gr.Image(type="pil"),
	# outputs=gr.JSON(),
	# title="YOLO Keypoint Detection",
	# description="Upload an image to detect keypoints using custom YOLO model",
	# api_name="predict" # This enables API access at /api/predict
	# )

	# # Launch with API enabled
	# if __name__ == "__main__":
	# iface.launch(share=False)




	# import gradio as gr
	# import io
	# from ultralytics import YOLO
	# import cv2
	# import numpy as np
	# from PIL import Image
	# import json

	# # Load both models
	# single_animal_model = YOLO("fentanyl_oft.pt") # Single animal model
	# multi_animal_model = YOLO("avatar_ckpt.pt") # Multi-animal model

	# def detect_keypoints(image, mode="single"):
	# """
	# Run YOLO inference and return keypoints data
	# Args:
	# image: PIL Image
	# mode: "single" or "multi" to determine which model to use
	# """
	# try:
	# # Select model and parameters based on mode
	# if mode == "multi":
	# model = multi_animal_model
	# imgsz = 1504
	# max_det = 5
	# else: # default to single
	# model = single_animal_model
	# imgsz = 1440
	# max_det = 1

	# # Convert PIL Image to numpy array
	# if isinstance(image, Image.Image):
	# image_np = np.array(image)
	# image_cv2 = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
	# else:
	# image_cv2 = image

	# # Run inference with mode-specific parameters
	# results = model.predict(
	# source=image_cv2,
	# conf=0.05,
	# iou=0.7,
	# max_det=max_det,
	# imgsz=imgsz,
	# device='cpu',
	# verbose=False
	# )

	# keypoints_data = []
	# if results and len(results) > 0:
	# result = results[0]
	# if result.keypoints is not None:
	# kpts = result.keypoints.xy.cpu().numpy()
	# conf = result.keypoints.conf.cpu().numpy()

	# # Handle multiple detections (for multi-animal mode)
	# for detection_idx in range(kpts.shape[0]):
	# detection_keypoints = []
	# for i in range(kpts.shape[1]):
	# if i < len(kpts[detection_idx]):
	# x, y = kpts[detection_idx][i]
	# confidence = conf[detection_idx][i] if i < len(conf[detection_idx]) else 0.0
	# detection_keypoints.append({
	# "id": i,
	# "x": float(x),
	# "y": float(y),
	# "confidence": float(confidence)
	# })

	# # For single animal mode, flatten the structure
	# if mode == "single":
	# keypoints_data = detection_keypoints
	# break # Only take first detection
	# else:
	# # For multi-animal mode, keep detection structure
	# keypoints_data.append({
	# "detection_id": detection_idx,
	# "keypoints": detection_keypoints
	# })

	# return {
	# "success": True,
	# "mode": mode,
	# "keypoints": keypoints_data,
	# "image_width": image_cv2.shape[1],
	# "image_height": image_cv2.shape[0],
	# "num_detections": len(keypoints_data) if mode == "multi" else (1 if keypoints_data else 0),
	# "num_keypoints": len(keypoints_data) if mode == "single" else sum(len(det["keypoints"]) for det in keypoints_data) if mode == "multi" else 0
	# }

	# except Exception as e:
	# return {"success": False, "error": str(e), "mode": mode}

	# # Create Gradio interface with mode parameter
	# iface = gr.Interface(
	# fn=detect_keypoints,
	# inputs=[
	# gr.Image(type="pil"),
	# gr.Dropdown(choices=["single", "multi"], value="single", label="Detection Mode")
	# ],
	# outputs=gr.JSON(),
	# title="YOLO Keypoint Detection",
	# description="Upload an image to detect keypoints using custom YOLO model. Choose single or multi-animal mode.",
	# api_name="predict" # This enables API access at /api/predict
	# )

	# # Launch with API enabled
	# if __name__ == "__main__":
	# iface.launch(share=False)

	import gradio as gr
	import io
	from ultralytics import YOLO
	import cv2
	import numpy as np
	from PIL import Image
	import json

	# Load both models
	single_animal_model = YOLO("fentanyl_oft.pt") # Single animal model
	multi_animal_model = YOLO("avatar_ckpt.pt") # Multi-animal model

	def detect_keypoints(image, mode="single"):
	"""
	Run YOLO inference and return keypoints and bounding boxes data
	Args:
	image: PIL Image
	mode: "single" or "multi" to determine which model to use
	"""
	try:
	# Select model and parameters based on mode
	if mode == "multi":
	model = multi_animal_model
	imgsz = 1504
	max_det = 5
	else: # default to single
	model = single_animal_model
	imgsz = 1440
	max_det = 1

	# Convert PIL Image to numpy array
	if isinstance(image, Image.Image):
	image_np = np.array(image)
	image_cv2 = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
	else:
	image_cv2 = image

	# Run inference with mode-specific parameters
	results = model.predict(
	source=image_cv2,
	conf=0.05,
	iou=0.7,
	max_det=max_det,
	imgsz=imgsz,
	device='cpu',
	verbose=False
	)

	detections_data = []
	if results and len(results) > 0:
	result = results[0]

	# Extract bounding boxes
	bboxes = []
	if result.boxes is not None:
	boxes_xyxy = result.boxes.xyxy.cpu().numpy() # [x1, y1, x2, y2]
	boxes_conf = result.boxes.conf.cpu().numpy()

	for i in range(len(boxes_xyxy)):
	x1, y1, x2, y2 = boxes_xyxy[i]
	confidence = float(boxes_conf[i])

	bboxes.append({
	"x1": float(x1),
	"y1": float(y1),
	"x2": float(x2),
	"y2": float(y2),
	"confidence": confidence
	})

	# Extract keypoints
	keypoints_per_detection = []
	if result.keypoints is not None:
	kpts = result.keypoints.xy.cpu().numpy()
	conf = result.keypoints.conf.cpu().numpy()

	# Handle multiple detections (for multi-animal mode)
	for detection_idx in range(kpts.shape[0]):
	detection_keypoints = []
	for i in range(kpts.shape[1]):
	if i < len(kpts[detection_idx]):
	x, y = kpts[detection_idx][i]
	confidence = conf[detection_idx][i] if i < len(conf[detection_idx]) else 0.0
	detection_keypoints.append({
	"id": i,
	"x": float(x),
	"y": float(y),
	"confidence": float(confidence)
	})
	keypoints_per_detection.append(detection_keypoints)

	# Combine keypoints and bboxes for each detection
	max_detections = max(len(bboxes), len(keypoints_per_detection))

	for detection_idx in range(max_detections):
	detection_data = {
	"detection_id": detection_idx
	}

	# Add keypoints if available for this detection
	if detection_idx < len(keypoints_per_detection):
	detection_data["keypoints"] = keypoints_per_detection[detection_idx]
	else:
	detection_data["keypoints"] = []

	# Add bbox if available for this detection
	if detection_idx < len(bboxes):
	detection_data["bbox"] = bboxes[detection_idx]
	else:
	detection_data["bbox"] = None

	detections_data.append(detection_data)

	# Format response based on mode
	if mode == "single":
	# For single animal mode, flatten the structure (legacy compatibility)
	if detections_data:
	single_detection = detections_data[0]
	return {
	"success": True,
	"mode": mode,
	"keypoints": single_detection["keypoints"],
	"bbox": single_detection["bbox"],
	"image_width": image_cv2.shape[1],
	"image_height": image_cv2.shape[0],
	"num_detections": 1 if single_detection["keypoints"] or single_detection["bbox"] else 0,
	"num_keypoints": len(single_detection["keypoints"])
	}
	else:
	return {
	"success": True,
	"mode": mode,
	"keypoints": [],
	"bbox": None,
	"image_width": image_cv2.shape[1],
	"image_height": image_cv2.shape[0],
	"num_detections": 0,
	"num_keypoints": 0
	}
	else:
	# For multi-animal mode, return all detections
	return {
	"success": True,
	"mode": mode,
	"detections": detections_data,
	"image_width": image_cv2.shape[1],
	"image_height": image_cv2.shape[0],
	"num_detections": len(detections_data),
	"num_keypoints": sum(len(det["keypoints"]) for det in detections_data)
	}

	except Exception as e:
	return {"success": False, "error": str(e), "mode": mode}

	# Create Gradio interface with mode parameter
	iface = gr.Interface(
	fn=detect_keypoints,
	inputs=[
	gr.Image(type="pil"),
	gr.Dropdown(choices=["single", "multi"], value="single", label="Detection Mode")
	],
	outputs=gr.JSON(),
	title="YOLO Keypoint Detection",
	description="Upload an image to detect keypoints and bounding boxes using custom YOLO model. Choose single or multi-animal mode.",
	api_name="predict" # This enables API access at /api/predict
	)

	# Launch with API enabled
	if __name__ == "__main__":
	iface.launch(share=False)