johko adirik commited on
Commit
08e780c
·
0 Parent(s):

Duplicate from adirik/image-guided-owlvit

Browse files

Co-authored-by: Alara Dirik <[email protected]>

.DS_Store ADDED
Binary file (6.15 kB). View file
 
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Image-Guided OWL-ViT Demo
3
+ emoji: 🔥
4
+ colorFrom: yellow
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 3.10.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ duplicated_from: adirik/image-guided-owlvit
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import cv2
3
+ import gradio as gr
4
+ import numpy as np
5
+ from transformers import OwlViTProcessor, OwlViTForObjectDetection
6
+
7
+
8
+ # Use GPU if available
9
+ if torch.cuda.is_available():
10
+ device = torch.device("cuda")
11
+ else:
12
+ device = torch.device("cpu")
13
+
14
+ model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32").to(device)
15
+ model.eval()
16
+ processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")
17
+
18
+
19
+ def image_guided_detection(img, query_img, score_threshold, nms_threshold):
20
+ target_sizes = torch.Tensor([img.size[::-1]])
21
+ inputs = processor(query_images=query_img, images=img, return_tensors="pt").to(device)
22
+
23
+ with torch.no_grad():
24
+ outputs = model.image_guided_detection(**inputs)
25
+
26
+ outputs.logits = outputs.logits.cpu()
27
+ outputs.pred_boxes = outputs.target_pred_boxes.cpu()
28
+
29
+ results = processor.post_process_image_guided_detection(
30
+ outputs=outputs,
31
+ threshold=score_threshold,
32
+ nms_threshold=nms_threshold,
33
+ target_sizes=target_sizes
34
+ )
35
+
36
+ boxes, scores = results[0]["boxes"], results[0]["scores"]
37
+ img = np.asarray(img)
38
+
39
+ for box, score in zip(boxes, scores):
40
+ box = [int(i) for i in box.tolist()]
41
+
42
+ if score >= score_threshold:
43
+ img = cv2.rectangle(img, box[:2], box[2:], (255,0,0), 5)
44
+ if box[3] + 25 > 768:
45
+ y = box[3] - 10
46
+ else:
47
+ y = box[3] + 25
48
+ return img
49
+
50
+
51
+ description = """
52
+ Gradio demo for image-guided / one-shot object detection with OWL-ViT -
53
+ <a href="https://huggingface.co/docs/transformers/main/en/model_doc/owlvit">OWL-ViT</a>,
54
+ introduced in <a href="https://arxiv.org/abs/2205.06230">Simple Open-Vocabulary Object Detection
55
+ with Vision Transformers</a>.
56
+
57
+ \n\nYou can use OWL-ViT to query images with text descriptions of any object or alternatively with an
58
+ example / query image of the target object. To use it, simply upload an image and a query image that only contains the object
59
+ you're looking for. You can also use the score and non-maximum suppression threshold sliders to set a threshold to filter out
60
+ low probability and overlapping bounding box predictions.
61
+
62
+ \n\nFor an in-depth tutorial on how to use OWL-ViT with transformers, check out our
63
+ <a href="https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/zeroshot_object_detection_with_owlvit.ipynb">Colab notebook</a>
64
+ and our HF spaces <a href="https://huggingface.co/spaces/adirik/OWL-ViT">demo </a>for zero-shot / text-guided object detection.
65
+ """
66
+
67
+ demo = gr.Interface(
68
+ image_guided_detection,
69
+ inputs=[gr.Image(type="pil"), gr.Image(type="pil"), gr.Slider(0, 1, value=0.6), gr.Slider(0, 1, value=0.3)],
70
+ outputs="image",
71
+ title="Image-Guided Object Detection with OWL-ViT",
72
+ description=description,
73
+ examples=[
74
+ ["assets/image2.jpeg", "assets/query2.jpeg", 0.7, 0.3],
75
+ ["assets/image1.jpeg", "assets/query1.jpeg", 0.6, 0.3]
76
+ ]
77
+ )
78
+
79
+ demo.launch()
assets/.DS_Store ADDED
Binary file (6.15 kB). View file
 
assets/image1.jpeg ADDED
assets/image2.jpeg ADDED
assets/query1.jpeg ADDED
assets/query2.jpeg ADDED
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # pip install -r requirements.txt
2
+
3
+ torch==1.13.0
4
+ git+https://github.com/huggingface/transformers.git
5
+ opencv-python>=4.5.0
6
+ Pillow