import torch
import gradio as gr
import cv2
import numpy as np
from datetime import datetime

# Load YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'custom', path='best.pt')

def detect_video(video):
    cap = cv2.VideoCapture(video.name)
    
    # List to hold results
    detection_results = []
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # Get timestamp
        timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        
        # Perform detection
        results = model(frame)
        
        # Extract bounding boxes and confidence scores
        for *xyxy, conf, cls in results.xywh[0]:
            x1, y1, x2, y2 = map(int, xyxy)  # Convert to integers
            detection_results.append({
                'timestamp': timestamp,
                'coordinates': {'x1': x1, 'y1': y1, 'x2': x2, 'y2': y2},
                'confidence': float(conf)
            })
    
    cap.release()
    
    return detection_results

# Gradio Interface
interface = gr.Interface(fn=detect_video, 
                         inputs=gr.Video(),  # Removed 'type' argument
                         outputs="json",
                         live=True,
                         title="YOLOv5 Video Object Detection",
                         description="Upload a video to detect objects and get bounding boxes with timestamps.")

interface.launch()