import cv2
import gradio as gr
import numpy as np
from transformers import pipeline

# Load YOLO model from Hugging Face's transformers library
model = pipeline("object-detection", model="hustvl/yolos-tiny")

# Function to capture and process video frames in real time
def capture_and_detect():
    cap = cv2.VideoCapture(0)  # OpenCV video capture from webcam

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Convert frame to RGB as required by YOLO model
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Perform object detection on the frame
        results = model(rgb_frame)

        # Draw bounding boxes and labels on the frame
        for result in results:
            label = result['label']
            score = result['score']
            box = result['box']
            x1, y1, x2, y2 = int(box['xmin']), int(box['ymin']), int(box['xmax']), int(box['ymax'])

            # Draw bounding box and label
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            text = f"{label}: {score:.2f}"
            cv2.putText(frame, text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        # Convert BGR back to RGB for Gradio display
        yield cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    cap.release()

# Gradio Interface using real-time video capture and object detection
def video_stream():
    return capture_and_detect()

# Create Gradio interface
webcam_interface = gr.Interface(
    fn=video_stream,
    inputs=None,
    outputs=gr.Image(),
    live=True,
    description="Real-Time Object Detection with YOLO and Gradio"
)

# Launch Gradio app
if __name__ == "__main__":
    webcam_interface.launch()