Joash2024 commited on
Commit
fbca714
·
1 Parent(s): f554210

fix: switch to 4-bit quantization

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -11,9 +11,10 @@ print("Loading tokenizer...")
11
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
12
  tokenizer.pad_token = tokenizer.eos_token
13
 
14
- # Configure quantization
15
  bnb_config = BitsAndBytesConfig(
16
- load_in_8bit=True,
 
17
  )
18
 
19
  print("Loading base model...")
 
11
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
12
  tokenizer.pad_token = tokenizer.eos_token
13
 
14
+ # Configure quantization for 4-bit
15
  bnb_config = BitsAndBytesConfig(
16
+ load_in_4bit=True,
17
+ bnb_4bit_compute_dtype=torch.float16
18
  )
19
 
20
  print("Loading base model...")