import cv2 import gradio as gr import numpy as np from transformers import pipeline # Load YOLO model from Hugging Face's transformers library model = pipeline("object-detection", model="hustvl/yolos-tiny") # Function to capture and process video frames in real time def capture_and_detect(): cap = cv2.VideoCapture(0) # OpenCV video capture from webcam while True: ret, frame = cap.read() if not ret: break # Convert frame to RGB as required by YOLO model rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Perform object detection on the frame results = model(rgb_frame) # Draw bounding boxes and labels on the frame for result in results: label = result['label'] score = result['score'] box = result['box'] x1, y1, x2, y2 = int(box['xmin']), int(box['ymin']), int(box['xmax']), int(box['ymax']) # Draw bounding box and label cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) text = f"{label}: {score:.2f}" cv2.putText(frame, text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) # Convert BGR back to RGB for Gradio display yield cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) cap.release() # Gradio Interface using real-time video capture and object detection def video_stream(): return capture_and_detect() # Create Gradio interface webcam_interface = gr.Interface( fn=video_stream, inputs=None, outputs=gr.Image(), live=True, description="Real-Time Object Detection with YOLO and Gradio" ) # Launch Gradio app if __name__ == "__main__": webcam_interface.launch()