from transformers import AutoImageProcessor, AutoModelForObjectDetection, pipeline from PIL import Image, ImageDraw import gradio as gr import torch if torch.cuda.is_available(): # Only initialize GPU-specific components if a GPU is available from your_library import ZeroGPU ZeroGPU.initialize() else: print("GPU not available, running on CPU. Skipping ZeroGPU initialization.") device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Using device: {device}") checkpoint = "PekingU/rtdetr_v2_r50vd" # Or any of the other checkpoints image_processor = AutoImageProcessor.from_pretrained(checkpoint) model = AutoModelForObjectDetection.from_pretrained(checkpoint).to(device) # Colors for visualization (same as before) COLORS = [[0.000, 0.447, 0.741], [0.850, 0.325, 0.098], [0.929, 0.694, 0.125], [0.494, 0.184, 0.556], [0.466, 0.674, 0.188], [0.301, 0.745, 0.933]] * 100 def inference(image, threshold=0.3): """Performs object detection and returns an annotated image.""" pipe = pipeline("object-detection", model=model, image_processor=image_processor, device=device) results = pipe(image, threshold=threshold) annotated_image = image.copy() draw = ImageDraw.Draw(annotated_image) for i, result in enumerate(results): box = result["box"] color = tuple([int(x * 255) for x in COLORS[i]]) xmin, ymin, xmax, ymax = box["xmin"], box["ymin"], box["xmax"], box["ymax"] draw.rectangle((xmin, ymin, xmax, ymax), fill=None, outline=color, width=2) draw.text((xmin, ymin), text=f"{result['label']}: {result['score']:.2f}", fill=color) return annotated_image # Gradio interface iface = gr.Interface( fn=inference, inputs=gr.Image(type="pil"), outputs=gr.Image(type="pil"), title="RT-DETR v2 Object Detection", description="Upload an image to detect objects.", examples=["/content/crowd7.jpg"], ) iface.launch()