Spaces:
Runtime error
Runtime error
Commit
·
ad11fe1
1
Parent(s):
d946c5c
chore: made optims
Browse filesSigned-off-by: Suvaditya Mukherjee <[email protected]>
- app.py +8 -5
- requirements.txt +3 -2
app.py
CHANGED
@@ -10,7 +10,7 @@ import pymupdf
|
|
10 |
import gradio as gr
|
11 |
from qdrant_client import QdrantClient
|
12 |
from utils import download_pdf_from_gdrive, merge_strings_with_prefix
|
13 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
14 |
|
15 |
def rag_query(query: str):
|
16 |
"""
|
@@ -181,11 +181,14 @@ if __name__ == "__main__":
|
|
181 |
model_name,
|
182 |
torch_dtype=torch.bfloat16,
|
183 |
device_map="auto",
|
184 |
-
quantization_config
|
185 |
-
|
186 |
-
# bnb_4bit_compute_dtype=torch.float16,
|
187 |
-
# bnb_4bit_quant_type="nf4"
|
188 |
)
|
|
|
|
|
|
|
|
|
|
|
189 |
)
|
190 |
|
191 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
10 |
import gradio as gr
|
11 |
from qdrant_client import QdrantClient
|
12 |
from utils import download_pdf_from_gdrive, merge_strings_with_prefix
|
13 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, QuantoConfig
|
14 |
|
15 |
def rag_query(query: str):
|
16 |
"""
|
|
|
181 |
model_name,
|
182 |
torch_dtype=torch.bfloat16,
|
183 |
device_map="auto",
|
184 |
+
quantization_config=QuantoConfig(
|
185 |
+
weights="int8"
|
|
|
|
|
186 |
)
|
187 |
+
# quantization_config = BitsAndBytesConfig(
|
188 |
+
# load_in_4bit=True,
|
189 |
+
# # bnb_4bit_compute_dtype=torch.float16,
|
190 |
+
# # bnb_4bit_quant_type="nf4"
|
191 |
+
# )
|
192 |
)
|
193 |
|
194 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
requirements.txt
CHANGED
@@ -8,7 +8,8 @@ torch
|
|
8 |
torchvision
|
9 |
torchaudio
|
10 |
accelerate
|
11 |
-
bitsandbytes==0.44.
|
12 |
optimum
|
13 |
wandb
|
14 |
-
psutil
|
|
|
|
8 |
torchvision
|
9 |
torchaudio
|
10 |
accelerate
|
11 |
+
# bitsandbytes==0.44.0
|
12 |
optimum
|
13 |
wandb
|
14 |
+
psutil
|
15 |
+
optimum-quanto
|