clip-embeddings / handler.py
SimasJan's picture
added model
e3b2a7f
from typing import Any, Dict, List, AnyStr
import numpy as np
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
from io import BytesIO
import base64
class EndpointHandler():
def __init__(self, path="") -> None:
"Preload all the elements we need at inference."
self.model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
self.path = path
def __call__(self, data: Dict[str, AnyStr]) -> List[Dict[str, AnyStr]]:
"Run the inference."
inputs = data.get('inputs')
text = inputs.get('text')
imageData = inputs.get('image')
image = Image.open(BytesIO(base64.b64decode(imageData)))
inputs = self.processor(text=text, images=image, return_tensors="pt", padding=True)
outputs = self.model(**inputs)
image_embeds = outputs.image_embeds.detach().numpy().flatten().tolist()
text_embeds = outputs.text_embeds.detach().numpy().flatten().tolist()
logits_per_image = outputs.logits_per_image.detach().numpy().flatten().tolist()
return {'image_embeddings': image_embeds, 'text_embeddings': text_embeds, 'logits_per_image': logits_per_image}