AkinyemiAra commited on
Commit
b7f4238
·
verified ·
1 Parent(s): cacbdfe

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -0
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ CLIP Image Embedding Generator
3
+
4
+ A simple Gradio-based application for generating CLIP embeddings from uploaded images.
5
+ Uses OpenAI's CLIP model with proper preprocessing.
6
+ """
7
+
8
+ import gradio as gr
9
+ from transformers import CLIPProcessor, CLIPModel
10
+ from PIL import Image
11
+ import torch
12
+ import numpy as np
13
+ from typing import Tuple
14
+ import spaces
15
+
16
+ # Load model/processor
17
+ model: CLIPModel = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
18
+ processor: CLIPProcessor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
19
+ model.eval()
20
+
21
+ @spaces.GPU
22
+ def get_embedding(image: Image.Image) -> Tuple[str, str]:
23
+ """
24
+ Generate CLIP embedding for an image.
25
+
26
+ Args:
27
+ image (Image.Image): PIL Image object to process
28
+
29
+ Returns:
30
+ Tuple[str, str]: A tuple containing (embedding_info, embedding_values)
31
+ """
32
+ device: str = "cuda" if torch.cuda.is_available() else "cpu"
33
+
34
+ # Use CLIP's built-in preprocessing
35
+ inputs = processor(images=image, return_tensors="pt").to(device)
36
+ model_device = model.to(device)
37
+
38
+ with torch.no_grad():
39
+ emb: torch.Tensor = model_device.get_image_features(**inputs)
40
+
41
+ # L2 normalize the embeddings
42
+ emb = emb / emb.norm(p=2, dim=-1, keepdim=True)
43
+
44
+ # Convert to numpy for easier handling
45
+ emb_numpy = emb.cpu().numpy().squeeze()
46
+
47
+ # Create formatted output
48
+ embedding_info = f"Embedding Shape: {emb_numpy.shape}\nDevice Used: {device}\nNormalized: Yes (L2)"
49
+
50
+ # Format embedding values (show first 10 and last 10 values for readability)
51
+ if len(emb_numpy) > 20:
52
+ embedding_preview = (
53
+ f"First 10 values: {emb_numpy[:10].tolist()}\n"
54
+ f"...\n"
55
+ f"Last 10 values: {emb_numpy[-10:].tolist()}\n\n"
56
+ f"Full embedding array:\n{emb_numpy.tolist()}"
57
+ )
58
+ else:
59
+ embedding_preview = f"Full embedding array:\n{emb_numpy.tolist()}"
60
+
61
+ return embedding_info, embedding_preview
62
+
63
+ # Create Gradio interface
64
+ demo: gr.Interface = gr.Interface(
65
+ fn=get_embedding,
66
+ inputs=gr.Image(type="pil", label="Upload Image"),
67
+ outputs=[
68
+ gr.Textbox(label="Embedding Info", lines=3),
69
+ gr.Textbox(label="Embedding Values", lines=20, max_lines=30)
70
+ ],
71
+ allow_flagging="never",
72
+ title="CLIP Image Embedding Generator",
73
+ description="Upload an image to generate its CLIP embedding vector. The embedding is L2-normalized and ready for similarity computations.",
74
+ theme=gr.themes.Soft()
75
+ )
76
+
77
+ if __name__ == "__main__":
78
+ demo.launch(mcp_server=True)