Spaces:
Sleeping
Sleeping
added files
Browse files- README.md +19 -4
- app.py +51 -0
- examples/test_0.jpg +0 -0
- examples/test_1.jpg +0 -0
- examples/test_2.jpg +0 -0
- examples/test_3.jpg +0 -0
- examples/test_4.jpg +0 -0
- examples/test_5.jpg +0 -0
- examples/test_6.jpg +0 -0
- examples/test_7.jpg +0 -0
- examples/test_8.jpg +0 -0
- examples/test_9.jpg +0 -0
- requirements.txt +4 -0
README.md
CHANGED
@@ -1,12 +1,27 @@
|
|
1 |
---
|
2 |
title: CLIP ERA S19
|
3 |
emoji: π
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 3.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
|
|
10 |
---
|
11 |
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
title: CLIP ERA S19
|
3 |
emoji: π
|
4 |
+
colorFrom: purple
|
5 |
+
colorTo: blue
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 3.45.2
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
+
license: mit
|
11 |
---
|
12 |
|
13 |
+
|
14 |
+
# Session 19 - ERA Phase I - Assignment
|
15 |
+
## Goals
|
16 |
+
1. Build app on HuggingFace using CLIP model from OpenAI
|
17 |
+
|
18 |
+
## Usage
|
19 |
+
In the App tab, the UI is present for different functionalities like:
|
20 |
+
1. Uploading an image and entering text and getting a similarity score between the two.
|
21 |
+
2. Variety of examples given
|
22 |
+
|
23 |
+
Contributors
|
24 |
+
-------------------------
|
25 |
+
Lavanya Nemani
|
26 |
+
|
27 |
+
Shashank Gupta
|
app.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import numpy as np
|
3 |
+
import gradio as gr
|
4 |
+
|
5 |
+
from transformers import CLIPProcessor, CLIPModel
|
6 |
+
|
7 |
+
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
|
8 |
+
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
9 |
+
|
10 |
+
|
11 |
+
def inference(input_img=None, input_text=None):
|
12 |
+
|
13 |
+
if input_img is not None and input_text is not None:
|
14 |
+
|
15 |
+
inputs = processor(text=input_text.split(","), images=input_img, return_tensors="pt", padding=True)
|
16 |
+
outputs = model(**inputs)
|
17 |
+
logits_per_image = outputs.logits_per_image # this is the image-text similarity score
|
18 |
+
probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities
|
19 |
+
|
20 |
+
output_prob = ', '.join([str(probs.detach().numpy()[0][i]) for i in range(np.shape(probs.detach().numpy()[0])[0])])
|
21 |
+
|
22 |
+
else:
|
23 |
+
output_prob = None
|
24 |
+
|
25 |
+
return output_prob
|
26 |
+
|
27 |
+
|
28 |
+
title = "CLIP OpenAI model"
|
29 |
+
description = "A simple Gradio interface to find similarity between images and text"
|
30 |
+
text_examples = ["A man and a dog, A man wearing a blue coat with a dog inside",
|
31 |
+
"Train tracks and a train, A dog playing in the field",
|
32 |
+
"An outdoor seating glass box, A movie theater",
|
33 |
+
"A building, A building and multiple cars on the road",
|
34 |
+
"A living area, Planet earth",
|
35 |
+
"A dining room, A football stadium",
|
36 |
+
"A red car, A yellow car",
|
37 |
+
"A chair and a book, A building falling",
|
38 |
+
"A man and a horse, A child playing with a dog",
|
39 |
+
"A man and a horse, A child playing with a dog"
|
40 |
+
]
|
41 |
+
examples = [['examples/test_'+str(i)+'.jpg', text_examples[i]] for i in range(10)]
|
42 |
+
|
43 |
+
demo = gr.Interface(inference,
|
44 |
+
inputs = [gr.Image(label="Input image"),
|
45 |
+
gr.Textbox(placeholder="Input text (Multiple entries separated by commas)")],
|
46 |
+
outputs = [gr.Textbox(label="Similarity score between the input image and input text")],
|
47 |
+
title = title,
|
48 |
+
description = description,
|
49 |
+
examples = examples
|
50 |
+
)
|
51 |
+
demo.launch()
|
examples/test_0.jpg
ADDED
![]() |
examples/test_1.jpg
ADDED
![]() |
examples/test_2.jpg
ADDED
![]() |
examples/test_3.jpg
ADDED
![]() |
examples/test_4.jpg
ADDED
![]() |
examples/test_5.jpg
ADDED
![]() |
examples/test_6.jpg
ADDED
![]() |
examples/test_7.jpg
ADDED
![]() |
examples/test_8.jpg
ADDED
![]() |
examples/test_9.jpg
ADDED
![]() |
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
numpy
|
3 |
+
torch
|
4 |
+
transformers
|