Spaces:

EduardoPacheco
/

Grounding-Dino-Inference

Running

App Files Files Community

Eduardo Pacheco commited on Apr 12, 2024

Commit

2b1bf26

1 Parent(s): 69f8e74

First commit

Browse files

Files changed (5) hide show

.gitattributes +1 -0
.gitignore +2 -0
app.py +74 -0
input_image.jpeg +0 -0
requirements.txt +2 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+input_image.jpeg filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ gradio_cached_examples
2	+ __pycache__

app.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import torch
+import numpy as np
+import gradio as gr
+from transformers import GroundingDinoForObjectDetection, AutoProcessor
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = GroundingDinoForObjectDetection.from_pretrained('IDEA-Research/grounding-dino-tiny')
+processor = AutoProcessor.from_pretrained("IDEA-Research/grounding-dino-tiny")
+model.to(device);
+def app_fn(
+    image: gr.Image,
+    labels: str,
+    box_threshold: float,
+    text_threhsold: float,
+) -> str:
+    labels = labels.split("\n")
+    labels = [label if label.endswith(".") else label + "." for label in labels]
+    labels = " ".join(labels)
+    inputs = processor(images=image, text=labels, return_tensors="pt").to(device)
+    with torch.no_grad():
+        outputs = model(**inputs)
+    result = processor.post_process_grounded_object_detection(
+        outputs,
+        inputs.input_ids,
+        box_threshold=box_threshold,
+        text_threshold=text_threhsold,
+        target_sizes=[image.size[::-1]]
+    )[0]
+    # convert tensor of [x0,y0,x1,y1] to list of [x0,y0,x1,y1] (int)
+    boxes = result["boxes"].int().cpu().tolist()
+    pred_labels = result["labels"]
+    annot = [(tuple(box), pred_label) for box, pred_label in zip(boxes, pred_labels)]
+    return (image, annot)
+if __name__=="__main__":
+    title = "Grounding DINO 🦖 for Object Detection"
+    with gr.Blocks(title=title) as demo:
+        gr.Markdown(f"# {title}")
+        gr.Markdown(
+            """
+            This app demonstrates the use of the Grounding DINO model for object detection using the Hugging Face Transformers library.
+            Grounding DINO is known for its strong ability of zero-shot object detection, thus it can detect objects in images based on textual descriptions.
+            You can try the model by uploading an image and providing a textual description of the objects you want to detect or by splitting
+            the description in different lines (this is how you can pass multiple labels). The model will then highlight the detected objects in the image 🤗
+            """
+        )
+        with gr.Row():
+            box_threshold = gr.Slider(minimum=0, maximum=1, value=0.3, step=0.05, label="Box Threshold")
+            text_threshold = gr.Slider(minimum=0, maximum=1, value=0.3, step=0.05, label="Text Threshold")
+            labels = gr.Textbox(lines=2, max_lines=5, label="Labels")
+        btn = gr.Button()
+        with gr.Row():
+            img = gr.Image(type="pil")
+            annotated_image = gr.AnnotatedImage()
+        btn.click(fn=app_fn, inputs=[img, labels, box_threshold, text_threshold], outputs=[annotated_image])
+        gr.Examples(
+          [
+            ["input_image.jpeg", "a person.\na mountain.", 0.25, 0.25],
+            ["input_image.jpeg", "a group of peolple running to the sea with mountains on the background.", 0.25, 0.25]
+        ],
+          inputs = [img, labels, box_threshold, text_threshold],
+          outputs = [annotated_image],
+          fn=app_fn,
+          cache_examples=True,
+          label='Try this example input!'
+      )
+    demo.launch()

input_image.jpeg ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ git+https://github.com/huggingface/transformers.git@main#egg=transformers
2	+ torch