File size: 1,126 Bytes
1e05993
7feaf20
 
6a72b4d
1e05993
6a67ddb
 
7feaf20
 
 
58ff319
e6b2e2d
1e05993
 
 
6a67ddb
5701230
e6b2e2d
5701230
9599168
1e05993
6a67ddb
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from typing import Dict, Any
from PIL import Image    
import requests
import torch
import numpy as np
from transformers import AutoProcessor, LlavaForConditionalGeneration, BitsAndBytesConfig


class EndpointHandler():
    def __init__(self, path=""):
        model_id = path
        self.model = LlavaForConditionalGeneration.from_pretrained(
            model_id, 
            torch_dtype=torch.float16, 
            low_cpu_mem_usage=True, 
            load_in_4bit=True
        ).to(0)
        self.processor = AutoProcessor.from_pretrained(model_id)

    def __call__(self, data: Dict[str, Any]):
        parameters = data.pop("inputs", data)
        url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        prompt = "USER: <image>\nWhat are these?\nASSISTANT:"
        raw_image = Image.open(requests.get(url, stream=True).raw)
        inputs = self.processor(prompt, raw_image, return_tensors='pt').to(0, torch.float16)
        output = self.model.generate(**inputs, max_new_tokens=200, do_sample=False)
        print(self.processor.decode(output[0][2:], skip_special_tokens=True))
        return output