Spaces:

ShinSeungJ
/

detect_kpts

Sleeping

App Files Files Community

ShinSeungJ commited on 9 days ago

Commit

8e29ff1

verified ·

1 Parent(s): 7a87091

Update app.py

Browse files

Files changed (1) hide show

app.py +197 -23

app.py CHANGED Viewed

@@ -76,6 +76,119 @@
 # if __name__ == "__main__":
 #     iface.launch(share=False)
 import gradio as gr
 import io
 from ultralytics import YOLO
@@ -90,7 +203,7 @@ multi_animal_model = YOLO("avatar_ckpt.pt")   # Multi-animal model
 def detect_keypoints(image, mode="single"):
     """
-    Run YOLO inference and return keypoints data
     Args:
         image: PIL Image
         mode: "single" or "multi" to determine which model to use
@@ -124,9 +237,30 @@ def detect_keypoints(image, mode="single"):
             verbose=False
         )
-        keypoints_data = []
         if results and len(results) > 0:
             result = results[0]
             if result.keypoints is not None:
                 kpts = result.keypoints.xy.cpu().numpy()
                 conf = result.keypoints.conf.cpu().numpy()
@@ -144,27 +278,67 @@ def detect_keypoints(image, mode="single"):
                                 "y": float(y),
                                 "confidence": float(confidence)
                             })
-                    # For single animal mode, flatten the structure
-                    if mode == "single":
-                        keypoints_data = detection_keypoints
-                        break  # Only take first detection
-                    else:
-                        # For multi-animal mode, keep detection structure
-                        keypoints_data.append({
-                            "detection_id": detection_idx,
-                            "keypoints": detection_keypoints
-                        })
-        return {
-            "success": True,
-            "mode": mode,
-            "keypoints": keypoints_data,
-            "image_width": image_cv2.shape[1],
-            "image_height": image_cv2.shape[0],
-            "num_detections": len(keypoints_data) if mode == "multi" else (1 if keypoints_data else 0),
-            "num_keypoints": len(keypoints_data) if mode == "single" else sum(len(det["keypoints"]) for det in keypoints_data) if mode == "multi" else 0
-        }
     except Exception as e:
         return {"success": False, "error": str(e), "mode": mode}
@@ -178,7 +352,7 @@ iface = gr.Interface(
     ],
     outputs=gr.JSON(),
     title="YOLO Keypoint Detection",
-    description="Upload an image to detect keypoints using custom YOLO model. Choose single or multi-animal mode.",
     api_name="predict"  # This enables API access at /api/predict
 )

 # if __name__ == "__main__":
 #     iface.launch(share=False)
+# import gradio as gr
+# import io
+# from ultralytics import YOLO
+# import cv2
+# import numpy as np
+# from PIL import Image
+# import json
+# # Load both models
+# single_animal_model = YOLO("fentanyl_oft.pt")  # Single animal model
+# multi_animal_model = YOLO("avatar_ckpt.pt")   # Multi-animal model
+# def detect_keypoints(image, mode="single"):
+#     """
+#     Run YOLO inference and return keypoints data
+#     Args:
+#         image: PIL Image
+#         mode: "single" or "multi" to determine which model to use
+#     """
+#     try:
+#         # Select model and parameters based on mode
+#         if mode == "multi":
+#             model = multi_animal_model
+#             imgsz = 1504
+#             max_det = 5
+#         else:  # default to single
+#             model = single_animal_model
+#             imgsz = 1440
+#             max_det = 1
+#         # Convert PIL Image to numpy array
+#         if isinstance(image, Image.Image):
+#             image_np = np.array(image)
+#             image_cv2 = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
+#         else:
+#             image_cv2 = image
+#         # Run inference with mode-specific parameters
+#         results = model.predict(
+#             source=image_cv2,
+#             conf=0.05,
+#             iou=0.7,
+#             max_det=max_det,
+#             imgsz=imgsz,
+#             device='cpu',
+#             verbose=False
+#         )
+#         keypoints_data = []
+#         if results and len(results) > 0:
+#             result = results[0]
+#             if result.keypoints is not None:
+#                 kpts = result.keypoints.xy.cpu().numpy()
+#                 conf = result.keypoints.conf.cpu().numpy()
+#                 # Handle multiple detections (for multi-animal mode)
+#                 for detection_idx in range(kpts.shape[0]):
+#                     detection_keypoints = []
+#                     for i in range(kpts.shape[1]):
+#                         if i < len(kpts[detection_idx]):
+#                             x, y = kpts[detection_idx][i]
+#                             confidence = conf[detection_idx][i] if i < len(conf[detection_idx]) else 0.0
+#                             detection_keypoints.append({
+#                                 "id": i,
+#                                 "x": float(x),
+#                                 "y": float(y),
+#                                 "confidence": float(confidence)
+#                             })
+#                     # For single animal mode, flatten the structure
+#                     if mode == "single":
+#                         keypoints_data = detection_keypoints
+#                         break  # Only take first detection
+#                     else:
+#                         # For multi-animal mode, keep detection structure
+#                         keypoints_data.append({
+#                             "detection_id": detection_idx,
+#                             "keypoints": detection_keypoints
+#                         })
+#         return {
+#             "success": True,
+#             "mode": mode,
+#             "keypoints": keypoints_data,
+#             "image_width": image_cv2.shape[1],
+#             "image_height": image_cv2.shape[0],
+#             "num_detections": len(keypoints_data) if mode == "multi" else (1 if keypoints_data else 0),
+#             "num_keypoints": len(keypoints_data) if mode == "single" else sum(len(det["keypoints"]) for det in keypoints_data) if mode == "multi" else 0
+#         }
+#     except Exception as e:
+#         return {"success": False, "error": str(e), "mode": mode}
+# # Create Gradio interface with mode parameter
+# iface = gr.Interface(
+#     fn=detect_keypoints,
+#     inputs=[
+#         gr.Image(type="pil"),
+#         gr.Dropdown(choices=["single", "multi"], value="single", label="Detection Mode")
+#     ],
+#     outputs=gr.JSON(),
+#     title="YOLO Keypoint Detection",
+#     description="Upload an image to detect keypoints using custom YOLO model. Choose single or multi-animal mode.",
+#     api_name="predict"  # This enables API access at /api/predict
+# )
+# # Launch with API enabled
+# if __name__ == "__main__":
+#     iface.launch(share=False)
 import gradio as gr
 import io
 from ultralytics import YOLO
 def detect_keypoints(image, mode="single"):
     """
+    Run YOLO inference and return keypoints and bounding boxes data
     Args:
         image: PIL Image
         mode: "single" or "multi" to determine which model to use
             verbose=False
         )
+        detections_data = []
         if results and len(results) > 0:
             result = results[0]
+            # Extract bounding boxes
+            bboxes = []
+            if result.boxes is not None:
+                boxes_xyxy = result.boxes.xyxy.cpu().numpy()  # [x1, y1, x2, y2]
+                boxes_conf = result.boxes.conf.cpu().numpy()
+                for i in range(len(boxes_xyxy)):
+                    x1, y1, x2, y2 = boxes_xyxy[i]
+                    confidence = float(boxes_conf[i])
+                    bboxes.append({
+                        "x1": float(x1),
+                        "y1": float(y1),
+                        "x2": float(x2),
+                        "y2": float(y2),
+                        "confidence": confidence
+                    })
+            # Extract keypoints
+            keypoints_per_detection = []
             if result.keypoints is not None:
                 kpts = result.keypoints.xy.cpu().numpy()
                 conf = result.keypoints.conf.cpu().numpy()
                                 "y": float(y),
                                 "confidence": float(confidence)
                             })
+                    keypoints_per_detection.append(detection_keypoints)
+            # Combine keypoints and bboxes for each detection
+            max_detections = max(len(bboxes), len(keypoints_per_detection))
+            for detection_idx in range(max_detections):
+                detection_data = {
+                    "detection_id": detection_idx
+                }
+                # Add keypoints if available for this detection
+                if detection_idx < len(keypoints_per_detection):
+                    detection_data["keypoints"] = keypoints_per_detection[detection_idx]
+                else:
+                    detection_data["keypoints"] = []
+                # Add bbox if available for this detection
+                if detection_idx < len(bboxes):
+                    detection_data["bbox"] = bboxes[detection_idx]
+                else:
+                    detection_data["bbox"] = None
+                detections_data.append(detection_data)
+        # Format response based on mode
+        if mode == "single":
+            # For single animal mode, flatten the structure (legacy compatibility)
+            if detections_data:
+                single_detection = detections_data[0]
+                return {
+                    "success": True,
+                    "mode": mode,
+                    "keypoints": single_detection["keypoints"],
+                    "bbox": single_detection["bbox"],
+                    "image_width": image_cv2.shape[1],
+                    "image_height": image_cv2.shape[0],
+                    "num_detections": 1 if single_detection["keypoints"] or single_detection["bbox"] else 0,
+                    "num_keypoints": len(single_detection["keypoints"])
+                }
+            else:
+                return {
+                    "success": True,
+                    "mode": mode,
+                    "keypoints": [],
+                    "bbox": None,
+                    "image_width": image_cv2.shape[1],
+                    "image_height": image_cv2.shape[0],
+                    "num_detections": 0,
+                    "num_keypoints": 0
+                }
+        else:
+            # For multi-animal mode, return all detections
+            return {
+                "success": True,
+                "mode": mode,
+                "detections": detections_data,
+                "image_width": image_cv2.shape[1],
+                "image_height": image_cv2.shape[0],
+                "num_detections": len(detections_data),
+                "num_keypoints": sum(len(det["keypoints"]) for det in detections_data)
+            }
     except Exception as e:
         return {"success": False, "error": str(e), "mode": mode}
     ],
     outputs=gr.JSON(),
     title="YOLO Keypoint Detection",
+    description="Upload an image to detect keypoints and bounding boxes using custom YOLO model. Choose single or multi-animal mode.",
     api_name="predict"  # This enables API access at /api/predict
 )