withpi
/

glean_full_1b_11-14_4bit

Text Generation

text-generation-inference

Inference Endpoints

4-bit precision

Model card Files Files and versions Community

Nam Nguyen commited on Dec 10, 2024

Commit

1eb2656

·

verified ·

1 Parent(s): 73dabdc

Update handler.py

Files changed (1) hide show

handler.py +0 -5

handler.py CHANGED Viewed

@@ -14,15 +14,10 @@ def extract_assistant_response_simple(response_text):
 class EndpointHandler:
     def __init__(self, path=""):
-        quantization_config = BitsAndBytesConfig(
-            load_in_4bit=True,
-            bnb_4bit_compute_dtype=torch.float16
-        )
         self.tokenizer = AutoTokenizer.from_pretrained(path)
         model = AutoModelForCausalLM.from_pretrained(
             path,
             torch_dtype="auto",
-            quantization_config=quantization_config
         )
         self.model = model.to_bettertransformer()

 class EndpointHandler:
     def __init__(self, path=""):
         self.tokenizer = AutoTokenizer.from_pretrained(path)
         model = AutoModelForCausalLM.from_pretrained(
             path,
             torch_dtype="auto",
         )
         self.model = model.to_bettertransformer()