import gradio as gr import cv2 import numpy as np from transformers import pipeline # Load the YOLO model using Hugging Face's pipeline model = pipeline("object-detection", model="hustvl/yolos-tiny") # Function to run YOLO on each video frame def detect_objects(frame): # Convert frame to RGB as required by the model rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Run object detection results = model(rgb_frame) # Draw bounding boxes and labels for result in results: # Extract details label = result['label'] score = result['score'] box = result['box'] x1, y1, x2, y2 = int(box['xmin']), int(box['ymin']), int(box['xmax']), int(box['ymax']) # Draw rectangle and label on the frame cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) text = f"{label}: {score:.2f}" cv2.putText(frame, text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) return cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Convert back to RGB for Gradio # Gradio interface to capture video frames def video_stream(frame): # Run object detection on the frame annotated_frame = detect_objects(frame) return annotated_frame # Create Gradio interface webcam_interface = gr.Interface( fn=video_stream, inputs=gr.Video(source="webcam", streaming=True), outputs=gr.Image(shape=(640, 480)), live=True, description="Real-Time Object Detection with YOLO on Hugging Face" ) # Launch Gradio app if __name__ == "__main__": webcam_interface.launch()