suvadityamuk commited on
Commit
ad11fe1
·
1 Parent(s): d946c5c

chore: made optims

Browse files

Signed-off-by: Suvaditya Mukherjee <[email protected]>

Files changed (2) hide show
  1. app.py +8 -5
  2. requirements.txt +3 -2
app.py CHANGED
@@ -10,7 +10,7 @@ import pymupdf
10
  import gradio as gr
11
  from qdrant_client import QdrantClient
12
  from utils import download_pdf_from_gdrive, merge_strings_with_prefix
13
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
14
 
15
  def rag_query(query: str):
16
  """
@@ -181,11 +181,14 @@ if __name__ == "__main__":
181
  model_name,
182
  torch_dtype=torch.bfloat16,
183
  device_map="auto",
184
- quantization_config = BitsAndBytesConfig(
185
- load_in_4bit=True,
186
- # bnb_4bit_compute_dtype=torch.float16,
187
- # bnb_4bit_quant_type="nf4"
188
  )
 
 
 
 
 
189
  )
190
 
191
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
10
  import gradio as gr
11
  from qdrant_client import QdrantClient
12
  from utils import download_pdf_from_gdrive, merge_strings_with_prefix
13
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, QuantoConfig
14
 
15
  def rag_query(query: str):
16
  """
 
181
  model_name,
182
  torch_dtype=torch.bfloat16,
183
  device_map="auto",
184
+ quantization_config=QuantoConfig(
185
+ weights="int8"
 
 
186
  )
187
+ # quantization_config = BitsAndBytesConfig(
188
+ # load_in_4bit=True,
189
+ # # bnb_4bit_compute_dtype=torch.float16,
190
+ # # bnb_4bit_quant_type="nf4"
191
+ # )
192
  )
193
 
194
  tokenizer = AutoTokenizer.from_pretrained(model_name)
requirements.txt CHANGED
@@ -8,7 +8,8 @@ torch
8
  torchvision
9
  torchaudio
10
  accelerate
11
- bitsandbytes==0.44.1
12
  optimum
13
  wandb
14
- psutil
 
 
8
  torchvision
9
  torchaudio
10
  accelerate
11
+ # bitsandbytes==0.44.0
12
  optimum
13
  wandb
14
+ psutil
15
+ optimum-quanto