File size: 1,351 Bytes
b926327 e62633b b926327 e62633b b926327 8e0a4cd b926327 76292e7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
from io import BytesIO
import base64
from PIL import Image
import torch
from transformers import CLIPProcessor, CLIPTextModel, CLIPVisionModelWithProjection
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class EndpointHandler():
def __init__(self, path=""):
self.text_model = CLIPTextModel.from_pretrained("rbanfield/clip-vit-large-patch14").to(device)
self.image_model = CLIPVisionModelWithProjection.from_pretrained("rbanfield/clip-vit-large-patch14").to(device)
self.processor = CLIPProcessor.from_pretrained("rbanfield/clip-vit-large-patch14")
def __call__(self, data):
inputs = data.pop("inputs", None)
text_input = inputs["text"] if "text" in inputs else None
image_input = inputs["image"] if "image" in inputs else None
if text_input:
processor = self.processor(text=text_input, return_tensors="pt", padding=True)
with torch.no_grad():
return self.text_model(**processor).pooler_output.tolist()
elif image_input:
image = Image.open(BytesIO(base64.b64decode(image_input)))
processor = self.processor(images=image, return_tensors="pt")
with torch.no_grad():
return self.image_model(**processor).image_embeds.tolist()
else:
return None |