# import gradio as gr
# import io
# from ultralytics import YOLO
# import cv2
# import numpy as np
# from PIL import Image
# import json

# # Load your custom YOLO model
# model = YOLO("fentanyl_oft.pt")
# # model = YOLO("avatar_ckpt.pt")

# def detect_keypoints(image):
#     """
#     Run YOLO inference and return keypoints data
#     """
#     try:
#         # Convert PIL Image to numpy array
#         if isinstance(image, Image.Image):
#             image_np = np.array(image)
#             image_cv2 = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
#         else:
#             image_cv2 = image
        
#         # Run inference
#         results = model.predict(
#             source=image_cv2,
#             conf=0.05,
#             iou=0.7,
#             max_det=1,
#             imgsz=1440,
#             device='cpu',
#             verbose=False
#         )
        
#         keypoints_data = []
#         if results and len(results) > 0:
#             result = results[0]
#             if result.keypoints is not None:
#                 kpts = result.keypoints.xy.cpu().numpy()
#                 conf = result.keypoints.conf.cpu().numpy()
                
#                 for i in range(kpts.shape[1]):
#                     if i < len(kpts[0]):
#                         x, y = kpts[0][i]
#                         confidence = conf[0][i] if i < len(conf[0]) else 0.0
#                         keypoints_data.append({
#                             "id": i,
#                             "x": float(x),
#                             "y": float(y),
#                             "confidence": float(confidence)
#                         })
        
#         return {
#             "success": True,
#             "keypoints": keypoints_data,
#             "image_width": image_cv2.shape[1],
#             "image_height": image_cv2.shape[0],
#             "num_keypoints": len(keypoints_data)
#         }
        
#     except Exception as e:
#         return {"success": False, "error": str(e)}

# # Create Gradio interface with API access enabled
# iface = gr.Interface(
#     fn=detect_keypoints,
#     inputs=gr.Image(type="pil"),
#     outputs=gr.JSON(),
#     title="YOLO Keypoint Detection",
#     description="Upload an image to detect keypoints using custom YOLO model",
#     api_name="predict"  # This enables API access at /api/predict
# )

# # Launch with API enabled
# if __name__ == "__main__":
#     iface.launch(share=False)


# import gradio as gr
# import io
# from ultralytics import YOLO
# import cv2
# import numpy as np
# from PIL import Image
# import json

# # Load both models
# single_animal_model = YOLO("fentanyl_oft.pt")  # Single animal model
# multi_animal_model = YOLO("avatar_ckpt.pt")   # Multi-animal model

# def detect_keypoints(image, mode="single"):
#     """
#     Run YOLO inference and return keypoints data
#     Args:
#         image: PIL Image
#         mode: "single" or "multi" to determine which model to use
#     """
#     try:
#         # Select model and parameters based on mode
#         if mode == "multi":
#             model = multi_animal_model
#             imgsz = 1504
#             max_det = 5
#         else:  # default to single
#             model = single_animal_model
#             imgsz = 1440
#             max_det = 1
        
#         # Convert PIL Image to numpy array
#         if isinstance(image, Image.Image):
#             image_np = np.array(image)
#             image_cv2 = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
#         else:
#             image_cv2 = image
        
#         # Run inference with mode-specific parameters
#         results = model.predict(
#             source=image_cv2,
#             conf=0.05,
#             iou=0.7,
#             max_det=max_det,
#             imgsz=imgsz,
#             device='cpu',
#             verbose=False
#         )
        
#         keypoints_data = []
#         if results and len(results) > 0:
#             result = results[0]
#             if result.keypoints is not None:
#                 kpts = result.keypoints.xy.cpu().numpy()
#                 conf = result.keypoints.conf.cpu().numpy()
                
#                 # Handle multiple detections (for multi-animal mode)
#                 for detection_idx in range(kpts.shape[0]):
#                     detection_keypoints = []
#                     for i in range(kpts.shape[1]):
#                         if i < len(kpts[detection_idx]):
#                             x, y = kpts[detection_idx][i]
#                             confidence = conf[detection_idx][i] if i < len(conf[detection_idx]) else 0.0
#                             detection_keypoints.append({
#                                 "id": i,
#                                 "x": float(x),
#                                 "y": float(y),
#                                 "confidence": float(confidence)
#                             })
                    
#                     # For single animal mode, flatten the structure
#                     if mode == "single":
#                         keypoints_data = detection_keypoints
#                         break  # Only take first detection
#                     else:
#                         # For multi-animal mode, keep detection structure
#                         keypoints_data.append({
#                             "detection_id": detection_idx,
#                             "keypoints": detection_keypoints
#                         })
        
#         return {
#             "success": True,
#             "mode": mode,
#             "keypoints": keypoints_data,
#             "image_width": image_cv2.shape[1],
#             "image_height": image_cv2.shape[0],
#             "num_detections": len(keypoints_data) if mode == "multi" else (1 if keypoints_data else 0),
#             "num_keypoints": len(keypoints_data) if mode == "single" else sum(len(det["keypoints"]) for det in keypoints_data) if mode == "multi" else 0
#         }
        
#     except Exception as e:
#         return {"success": False, "error": str(e), "mode": mode}

# # Create Gradio interface with mode parameter
# iface = gr.Interface(
#     fn=detect_keypoints,
#     inputs=[
#         gr.Image(type="pil"),
#         gr.Dropdown(choices=["single", "multi"], value="single", label="Detection Mode")
#     ],
#     outputs=gr.JSON(),
#     title="YOLO Keypoint Detection",
#     description="Upload an image to detect keypoints using custom YOLO model. Choose single or multi-animal mode.",
#     api_name="predict"  # This enables API access at /api/predict
# )

# # Launch with API enabled
# if __name__ == "__main__":
#     iface.launch(share=False)

import gradio as gr
import io
from ultralytics import YOLO
import cv2
import numpy as np
from PIL import Image
import json

# Load both models
single_animal_model = YOLO("fentanyl_oft.pt")  # Single animal model
multi_animal_model = YOLO("avatar_ckpt.pt")   # Multi-animal model

def detect_keypoints(image, mode="single"):
    """
    Run YOLO inference and return keypoints and bounding boxes data
    Args:
        image: PIL Image
        mode: "single" or "multi" to determine which model to use
    """
    try:
        # Select model and parameters based on mode
        if mode == "multi":
            model = multi_animal_model
            imgsz = 1504
            max_det = 5
        else:  # default to single
            model = single_animal_model
            imgsz = 1440
            max_det = 1
        
        # Convert PIL Image to numpy array
        if isinstance(image, Image.Image):
            image_np = np.array(image)
            image_cv2 = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
        else:
            image_cv2 = image
        
        # Run inference with mode-specific parameters
        results = model.predict(
            source=image_cv2,
            conf=0.05,
            iou=0.7,
            max_det=max_det,
            imgsz=imgsz,
            device='cpu',
            verbose=False
        )
        
        detections_data = []
        if results and len(results) > 0:
            result = results[0]
            
            # Extract bounding boxes
            bboxes = []
            if result.boxes is not None:
                boxes_xyxy = result.boxes.xyxy.cpu().numpy()  # [x1, y1, x2, y2]
                boxes_conf = result.boxes.conf.cpu().numpy()
                
                for i in range(len(boxes_xyxy)):
                    x1, y1, x2, y2 = boxes_xyxy[i]
                    confidence = float(boxes_conf[i])
                    
                    bboxes.append({
                        "x1": float(x1),
                        "y1": float(y1), 
                        "x2": float(x2),
                        "y2": float(y2),
                        "confidence": confidence
                    })
            
            # Extract keypoints
            keypoints_per_detection = []
            if result.keypoints is not None:
                kpts = result.keypoints.xy.cpu().numpy()
                conf = result.keypoints.conf.cpu().numpy()
                
                # Handle multiple detections (for multi-animal mode)
                for detection_idx in range(kpts.shape[0]):
                    detection_keypoints = []
                    for i in range(kpts.shape[1]):
                        if i < len(kpts[detection_idx]):
                            x, y = kpts[detection_idx][i]
                            confidence = conf[detection_idx][i] if i < len(conf[detection_idx]) else 0.0
                            detection_keypoints.append({
                                "id": i,
                                "x": float(x),
                                "y": float(y),
                                "confidence": float(confidence)
                            })
                    keypoints_per_detection.append(detection_keypoints)
            
            # Combine keypoints and bboxes for each detection
            max_detections = max(len(bboxes), len(keypoints_per_detection))
            
            for detection_idx in range(max_detections):
                detection_data = {
                    "detection_id": detection_idx
                }
                
                # Add keypoints if available for this detection
                if detection_idx < len(keypoints_per_detection):
                    detection_data["keypoints"] = keypoints_per_detection[detection_idx]
                else:
                    detection_data["keypoints"] = []
                
                # Add bbox if available for this detection
                if detection_idx < len(bboxes):
                    detection_data["bbox"] = bboxes[detection_idx]
                else:
                    detection_data["bbox"] = None
                
                detections_data.append(detection_data)
        
        # Format response based on mode
        if mode == "single":
            # For single animal mode, flatten the structure (legacy compatibility)
            if detections_data:
                single_detection = detections_data[0]
                return {
                    "success": True,
                    "mode": mode,
                    "keypoints": single_detection["keypoints"],
                    "bbox": single_detection["bbox"],
                    "image_width": image_cv2.shape[1],
                    "image_height": image_cv2.shape[0],
                    "num_detections": 1 if single_detection["keypoints"] or single_detection["bbox"] else 0,
                    "num_keypoints": len(single_detection["keypoints"])
                }
            else:
                return {
                    "success": True,
                    "mode": mode,
                    "keypoints": [],
                    "bbox": None,
                    "image_width": image_cv2.shape[1],
                    "image_height": image_cv2.shape[0],
                    "num_detections": 0,
                    "num_keypoints": 0
                }
        else:
            # For multi-animal mode, return all detections
            return {
                "success": True,
                "mode": mode,
                "detections": detections_data,
                "image_width": image_cv2.shape[1],
                "image_height": image_cv2.shape[0],
                "num_detections": len(detections_data),
                "num_keypoints": sum(len(det["keypoints"]) for det in detections_data)
            }
        
    except Exception as e:
        return {"success": False, "error": str(e), "mode": mode}

# Create Gradio interface with mode parameter
iface = gr.Interface(
    fn=detect_keypoints,
    inputs=[
        gr.Image(type="pil"),
        gr.Dropdown(choices=["single", "multi"], value="single", label="Detection Mode")
    ],
    outputs=gr.JSON(),
    title="YOLO Keypoint Detection",
    description="Upload an image to detect keypoints and bounding boxes using custom YOLO model. Choose single or multi-animal mode.",
    api_name="predict"  # This enables API access at /api/predict
)

# Launch with API enabled
if __name__ == "__main__":
    iface.launch(share=False)