IrisDeng commited on
Commit
2cd1bd6
·
verified ·
1 Parent(s): cfffd7a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -0
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Any
2
+
3
+ from transformers import Blip2Processor, Blip2ForConditionalGeneration
4
+
5
+ from PIL import Image
6
+ from io import BytesIO
7
+ import torch, re, base64
8
+
9
+
10
+ class EndpointHandler:
11
+ def __init__(self, path=""):
12
+ # load the optimized model
13
+
14
+ self.processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
15
+ self.model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", device_map="auto")
16
+
17
+
18
+
19
+ def __call__(self, data: Any) -> Dict[str, Any]:
20
+ """
21
+ Args:
22
+ data (:obj:):
23
+ includes the input data and the parameters for the inference.
24
+ Return:
25
+ A :obj:`dict`:. The object returned should be a dict of one list like {"captions": ["A hugging face at the office"]} containing :
26
+ - "caption": A string corresponding to the generated caption.
27
+ """
28
+ # parameters = data.pop("parameters", {})
29
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
30
+
31
+
32
+ inputs = base64.b64decode(re.sub('^data:image/.+;base64,', '', data['inputs']))
33
+
34
+ raw_images = Image.open(BytesIO(inputs))
35
+
36
+ processed_image = self.processor(images=raw_images, return_tensors="pt").to(device)
37
+
38
+ out = self.model.generate(**processed_image)
39
+
40
+ captions = self.processor.decode(out[0], skip_special_tokens=True)
41
+
42
+ # postprocess the prediction
43
+ return {"captions": captions}
44
+
45
+
46
+ EndpointHandler()