desaxce commited on
Commit
c8dd61d
·
verified ·
1 Parent(s): 31ab1c5

Create handler.py

Browse files
Files changed (1) hide show
  1. handler.py +29 -0
handler.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict, List
2
+
3
+ import torch
4
+ from transformers import AutoTokenizer, Qwen2ForCausalLM, pipeline
5
+
6
+ dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
7
+
8
+
9
+ class EndpointHandler:
10
+ def __init__(self, path=""):
11
+ # load the model
12
+ self.tokenizer = AutoTokenizer.from_pretrained(path)
13
+ model = Qwen2ForCausalLM.from_pretrained(
14
+ path, device_map="auto", torch_dtype=dtype
15
+ )
16
+ # create inference pipeline
17
+ self.pipeline = pipeline("text-generation", model=model, tokenizer=self.tokenizer)
18
+
19
+ def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
20
+ inputs = data.pop("inputs", data)
21
+ parameters = data.pop("parameters", None)
22
+
23
+ # pass inputs with all kwargs in data
24
+ if parameters is not None:
25
+ prediction = self.pipeline(inputs, tokenizer=self.tokenizer, **parameters)
26
+ else:
27
+ prediction = self.pipeline(inputs)
28
+ # postprocess the prediction
29
+ return prediction