Joash2024 commited on
Commit
360349c
·
1 Parent(s): fbca714

fix: remove quantization, use float16 only

Browse files
Files changed (2) hide show
  1. app.py +1 -8
  2. requirements.txt +0 -2
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
4
  from peft import PeftModel
5
 
6
  # Model configurations
@@ -11,16 +11,9 @@ print("Loading tokenizer...")
11
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
12
  tokenizer.pad_token = tokenizer.eos_token
13
 
14
- # Configure quantization for 4-bit
15
- bnb_config = BitsAndBytesConfig(
16
- load_in_4bit=True,
17
- bnb_4bit_compute_dtype=torch.float16
18
- )
19
-
20
  print("Loading base model...")
21
  model = AutoModelForCausalLM.from_pretrained(
22
  BASE_MODEL,
23
- quantization_config=bnb_config,
24
  device_map="auto",
25
  torch_dtype=torch.float16
26
  )
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
  from peft import PeftModel
5
 
6
  # Model configurations
 
11
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
12
  tokenizer.pad_token = tokenizer.eos_token
13
 
 
 
 
 
 
 
14
  print("Loading base model...")
15
  model = AutoModelForCausalLM.from_pretrained(
16
  BASE_MODEL,
 
17
  device_map="auto",
18
  torch_dtype=torch.float16
19
  )
requirements.txt CHANGED
@@ -1,7 +1,5 @@
1
  torch>=2.0.0
2
  transformers>=4.30.0
3
  accelerate>=0.20.0
4
- bitsandbytes>=0.41.1
5
  peft==0.6.2
6
  gradio>=4.8.0
7
- scipy>=1.11.0
 
1
  torch>=2.0.0
2
  transformers>=4.30.0
3
  accelerate>=0.20.0
 
4
  peft==0.6.2
5
  gradio>=4.8.0