Plat commited on
Commit
798bcec
·
1 Parent(s): 94d52d9

chore: use int4

Browse files
Files changed (1) hide show
  1. app.py +6 -1
app.py CHANGED
@@ -36,7 +36,12 @@ except:
36
 
37
  MODEL_NAME = "hatakeyama-llm-team/Tanuki-8B-Instruct"
38
 
39
- quantization_config = BitsAndBytesConfig(load_in_8bit=True)
 
 
 
 
 
40
  model = AutoModelForCausalLM.from_pretrained(
41
  MODEL_NAME, quantization_config=quantization_config, device_map="auto"
42
  )
 
36
 
37
  MODEL_NAME = "hatakeyama-llm-team/Tanuki-8B-Instruct"
38
 
39
+ quantization_config = BitsAndBytesConfig(
40
+ load_in_4bit=True,
41
+ bnb_4bit_compute_dtype=torch.bfloat16,
42
+ bnb_4bit_quant_type="nf4",
43
+ bnb_4bit_use_double_quant=True,
44
+ )
45
  model = AutoModelForCausalLM.from_pretrained(
46
  MODEL_NAME, quantization_config=quantization_config, device_map="auto"
47
  )