from transformers import AutoImageProcessor, AutoModelForObjectDetection, pipeline
from PIL import Image, ImageDraw
import gradio as gr
import torch

if torch.cuda.is_available():
    # Only initialize GPU-specific components if a GPU is available
    from your_library import ZeroGPU
    ZeroGPU.initialize()
else:
    print("GPU not available, running on CPU. Skipping ZeroGPU initialization.")

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

checkpoint = "PekingU/rtdetr_v2_r50vd"  # Or any of the other checkpoints

image_processor = AutoImageProcessor.from_pretrained(checkpoint)
model = AutoModelForObjectDetection.from_pretrained(checkpoint).to(device)

# Colors for visualization (same as before)
COLORS = [[0.000, 0.447, 0.741], [0.850, 0.325, 0.098], [0.929, 0.694, 0.125],
          [0.494, 0.184, 0.556], [0.466, 0.674, 0.188], [0.301, 0.745, 0.933]] * 100

def inference(image, threshold=0.3):
    """Performs object detection and returns an annotated image."""
    pipe = pipeline("object-detection", model=model, image_processor=image_processor, device=device) 
    results = pipe(image, threshold=threshold)

    annotated_image = image.copy()
    draw = ImageDraw.Draw(annotated_image)

    for i, result in enumerate(results):
        box = result["box"]
        color = tuple([int(x * 255) for x in COLORS[i]])
        xmin, ymin, xmax, ymax = box["xmin"], box["ymin"], box["xmax"], box["ymax"]
        draw.rectangle((xmin, ymin, xmax, ymax), fill=None, outline=color, width=2)
        draw.text((xmin, ymin), text=f"{result['label']}: {result['score']:.2f}", fill=color)

    return annotated_image

# Gradio interface
iface = gr.Interface(
    fn=inference,
    inputs=gr.Image(type="pil"),
    outputs=gr.Image(type="pil"),
    title="RT-DETR v2 Object Detection",
    description="Upload an image to detect objects.",
    examples=["/content/crowd7.jpg"],
)

iface.launch()