Update README.md
Browse files
README.md
CHANGED
@@ -1,29 +1,34 @@
|
|
1 |
# Load and compute scores with the quantized model
|
2 |
```
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
|
|
|
|
|
|
12 |
|
|
|
|
|
|
|
13 |
def compute_score(pairs):
|
14 |
inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt')
|
15 |
with torch.no_grad():
|
16 |
outputs = model(**inputs)
|
17 |
return outputs.logits
|
18 |
|
19 |
-
# Measure memory usage immediately after loading the model
|
20 |
after_load_memory = get_memory_usage()
|
21 |
print(f"Memory Usage after loading model: {after_load_memory:.2f} MB")
|
22 |
|
23 |
-
# Compute scores
|
24 |
scores = compute_score([['what is panda?', 'hi'], ['what is panda?', 'The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.']])
|
25 |
print("Scores:", scores)
|
26 |
|
27 |
-
|
28 |
-
load_and_compute_scores_with_quantized_model(Path(quantized_model_path)
|
29 |
```
|
|
|
1 |
# Load and compute scores with the quantized model
|
2 |
```
|
3 |
+
import torch
|
4 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer, BitsAndBytesConfig
|
5 |
+
from pathlib import Path
|
6 |
+
import psutil
|
7 |
+
|
8 |
+
def get_memory_usage():
|
9 |
+
process = psutil.Process()
|
10 |
+
memory_info = process.memory_info()
|
11 |
+
return memory_info.rss / 1024**2 # Convert to MB
|
12 |
+
|
13 |
+
def load_and_compute_scores_with_quantized_model(model_path):
|
14 |
+
model_name = "BAAI/bge-reranker-v2-m3"
|
15 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
16 |
|
17 |
+
bnb_config = BitsAndBytesConfig(load_in_8bit=True)
|
18 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_path, config=bnb_config)
|
19 |
+
|
20 |
def compute_score(pairs):
|
21 |
inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt')
|
22 |
with torch.no_grad():
|
23 |
outputs = model(**inputs)
|
24 |
return outputs.logits
|
25 |
|
|
|
26 |
after_load_memory = get_memory_usage()
|
27 |
print(f"Memory Usage after loading model: {after_load_memory:.2f} MB")
|
28 |
|
|
|
29 |
scores = compute_score([['what is panda?', 'hi'], ['what is panda?', 'The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.']])
|
30 |
print("Scores:", scores)
|
31 |
|
32 |
+
quantized_model_path = "quantized_bge_reranker_v2_m3"
|
33 |
+
load_and_compute_scores_with_quantized_model(Path(quantized_model_path))
|
34 |
```
|