Spaces:

ariG23498
/

zero-shot-od

Running on Zero

App Files Files Community

ariG23498 HF Staff commited on Aug 11

Commit

9052bb3

verified ·

1 Parent(s): 2eeb110

Create app.py

Browse files

Files changed (1) hide show

app.py +120 -0

app.py ADDED Viewed

	@@ -0,0 +1,120 @@

+import gradio as gr
+import spaces
+import torch
+from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
+from PIL import Image
+import time
+def extract_model_short_name(model_id):
+    return model_id.split("/")[-1].replace("-", " ").replace("_", " ")
+model_llmdet_id = "iSEE-Laboratory/llmdet_tiny"
+model_mm_grounding_id = "rziga/mm_grounding_dino_tiny_o365v1_goldg"
+model_llmdet_name = extract_model_short_name(model_llmdet_id)
+model_mm_grounding_name = extract_model_short_name(model_mm_grounding_id)
+def detect_llmdet(image: Image.Image, prompts: list, threshold: float):
+    t0 = time.perf_counter()
+    model_id = model_llmdet_id
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    processor = AutoProcessor.from_pretrained(model_id)
+    model = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(device).eval()
+    texts = [prompts]
+    inputs = processor(images=image, text=texts, return_tensors="pt").to(device)
+    with torch.no_grad():
+        outputs = model(**inputs)
+    results = processor.post_process_grounded_object_detection(
+        outputs,
+        threshold=threshold,
+        target_sizes=[image.size[::-1]]
+    )
+    result = results[0]
+    annotations = []
+    raw_results = []
+    for box, score, label in zip(result["boxes"], result["scores"], result["labels"]):
+        if score >= threshold:
+            xmin, ymin, xmax, ymax = [int(x) for x in box.tolist()]
+            annotations.append(((xmin, ymin, xmax, ymax), f"{label} {score:.2f}"))
+            raw_results.append(f"Detected {label} with confidence {score:.2f} at location [{xmin}, {ymin}, {xmax}, {ymax}]")
+    elapsed_ms = (time.perf_counter() - t0) * 1000
+    time_taken = f"**Inference time ({model_llmdet_name}):** {elapsed_ms:.0f} ms"
+    raw_text = "\n".join(raw_results) if raw_results else "No detections"
+    return annotations, raw_text, time_taken
+def detect_mm_grounding(image: Image.Image, prompts: list, threshold: float):
+    t0 = time.perf_counter()
+    model_id = model_mm_grounding_id
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    processor = AutoProcessor.from_pretrained(model_id)
+    model = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(device).eval()
+    texts = [prompts]
+    inputs = processor(images=image, text=texts, return_tensors="pt").to(device)
+    with torch.no_grad():
+        outputs = model(**inputs)
+    results = processor.post_process_grounded_object_detection(
+        outputs,
+        threshold=threshold,
+        target_sizes=[image.size[::-1]]
+    )
+    result = results[0]
+    annotations = []
+    raw_results = []
+    for box, score, label in zip(result["boxes"], result["scores"], result["labels"]):
+        if score >= threshold:
+            xmin, ymin, xmax, ymax = [int(x) for x in box.tolist()]
+            annotations.append(((xmin, ymin, xmax, ymax), f"{label} {score:.2f}"))
+            raw_results.append(f"Detected {label} with confidence {score:.2f} at location [{xmin}, {ymin}, {xmax}, {ymax}]")
+    elapsed_ms = (time.perf_counter() - t0) * 1000
+    time_taken = f"**Inference time ({model_mm_grounding_name}):** {elapsed_ms:.0f} ms"
+    raw_text = "\n".join(raw_results) if raw_results else "No detections"
+    return annotations, raw_text, time_taken
+@spaces.GPU
+def run_detection(image, prompts_str, threshold):
+    if image is None:
+        return (None, []), "No detections", "", (None, []), "No detections", ""
+    prompts = [p.strip() for p in prompts_str.split(",")]
+    ann_llm, raw_llm, time_llm = detect_llmdet(image, prompts, threshold)
+    ann_mm, raw_mm, time_mm = detect_mm_grounding(image, prompts, threshold)
+    return (image, ann_llm), raw_llm, time_llm, (image, ann_mm), raw_mm, time_mm
+with gr.Blocks() as app:
+    gr.Markdown("# Zero-Shot Object Detection Arena")
+    gr.Markdown("### Compare different zero-shot object detection models on the same image and prompts.")
+    with gr.Row():
+        with gr.Column(scale=1):
+            image = gr.Image(type="pil", label="Upload an image", height=400)
+            prompts = gr.Textbox(label="Prompts (comma-separated)", value="a cat, a remote control")
+            threshold = gr.Slider(label="Confidence Threshold", minimum=0.0, maximum=1.0, step=0.05, value=0.30)
+            generate_btn = gr.Button(value="Detect")
+        with gr.Column(scale=2):
+            output_image_llm = gr.AnnotatedImage(label=f"Annotated image for {model_llmdet_name}", height=400)
+            output_text_llm = gr.Textbox(label=f"Model detections for {model_llmdet_name}", lines=10)
+            output_time_llm = gr.Markdown()
+        with gr.Column(scale=2):
+            output_image_mm = gr.AnnotatedImage(label=f"Annotated image for {model_mm_grounding_name}", height=400)
+            output_text_mm = gr.Textbox(label=f"Model detections for {model_mm_grounding_name}", lines=10)
+            output_time_mm = gr.Markdown()
+    gr.Markdown("### Examples")
+    example_data = [
+        ["http://images.cocodataset.org/val2017/000000039769.jpg", "a cat, a remote control", 0.4],
+        ["http://images.cocodataset.org/val2017/000000000139.jpg", "a person, a tv, a remote", 0.3],
+    ]
+    gr.Examples(
+        examples=example_data,
+        inputs=[image, prompts, threshold],
+        label="Click an example to populate the input",
+    )
+    generate_btn.click(
+        fn=run_detection,
+        inputs=[image, prompts, threshold],
+        outputs=[output_image_llm, output_text_llm, output_time_llm, output_image_mm, output_text_mm, output_time_mm],
+    )
+    image.upload(
+        fn=run_detection,
+        inputs=[image, prompts, threshold],
+        outputs=[output_image_llm, output_text_llm, output_time_llm, output_image_mm, output_text_mm, output_time_mm],
+    )
+app.launch()