Update handler.py
Browse files- handler.py +2 -2
handler.py
CHANGED
@@ -11,7 +11,7 @@ class EndpointHandler():
|
|
11 |
model_id = path
|
12 |
self.model = LlavaForConditionalGeneration.from_pretrained(
|
13 |
model_id,
|
14 |
-
torch_dtype=torch.
|
15 |
low_cpu_mem_usage=True,
|
16 |
load_in_4bit=True
|
17 |
)
|
@@ -30,7 +30,7 @@ class EndpointHandler():
|
|
30 |
response.raise_for_status() # Raise an exception for 4xx or 5xx status codes
|
31 |
raw_image = Image.open(response.raw)
|
32 |
# Process image and generate output
|
33 |
-
inputs = self.processor(prompt, raw_image, return_tensors='pt').to(0, torch.
|
34 |
output = self.model.generate(**inputs, max_new_tokens=200, do_sample=False)
|
35 |
readable = self.processor.decode(output[0][2:], skip_special_tokens=True)
|
36 |
outputs.append(readable)
|
|
|
11 |
model_id = path
|
12 |
self.model = LlavaForConditionalGeneration.from_pretrained(
|
13 |
model_id,
|
14 |
+
torch_dtype=torch.float16,
|
15 |
low_cpu_mem_usage=True,
|
16 |
load_in_4bit=True
|
17 |
)
|
|
|
30 |
response.raise_for_status() # Raise an exception for 4xx or 5xx status codes
|
31 |
raw_image = Image.open(response.raw)
|
32 |
# Process image and generate output
|
33 |
+
inputs = self.processor(prompt, raw_image, return_tensors='pt').to(0, torch.float16)
|
34 |
output = self.model.generate(**inputs, max_new_tokens=200, do_sample=False)
|
35 |
readable = self.processor.decode(output[0][2:], skip_special_tokens=True)
|
36 |
outputs.append(readable)
|