Create README.md
Browse files
README.md
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Load and compute scores with the quantized model
|
2 |
+
```
|
3 |
+
def load_and_compute_scores_with_quantized_model(model_path, tokenizer):
|
4 |
+
# Load the quantized model manually
|
5 |
+
config = AutoModelForSequenceClassification.from_pretrained(model_path).config
|
6 |
+
model = AutoModelForSequenceClassification.from_config(config)
|
7 |
+
|
8 |
+
# Load the state dict, filtering out unwanted keys
|
9 |
+
state_dict = torch.load(model_path / "pytorch_model.bin")
|
10 |
+
filtered_state_dict = {k: v for k, v in state_dict.items() if not k.endswith(('.SCB', '.weight_format'))}
|
11 |
+
model.load_state_dict(filtered_state_dict, strict=False)
|
12 |
+
|
13 |
+
def compute_score(pairs):
|
14 |
+
inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt')
|
15 |
+
with torch.no_grad():
|
16 |
+
outputs = model(**inputs)
|
17 |
+
return outputs.logits
|
18 |
+
|
19 |
+
# Measure memory usage immediately after loading the model
|
20 |
+
after_load_memory = get_memory_usage()
|
21 |
+
print(f"Memory Usage after loading model: {after_load_memory:.2f} MB")
|
22 |
+
|
23 |
+
# Compute scores
|
24 |
+
scores = compute_score([['what is panda?', 'hi'], ['what is panda?', 'The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.']])
|
25 |
+
print("Scores:", scores)
|
26 |
+
```
|
27 |
+
|
28 |
+
# Load and compute scores with the quantized model
|
29 |
+
load_and_compute_scores_with_quantized_model(Path(quantized_model_path), tokenizer)
|