kandanai commited on
Commit
b6d62ff
·
verified ·
1 Parent(s): 582af9b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +18 -13
README.md CHANGED
@@ -1,29 +1,34 @@
1
  # Load and compute scores with the quantized model
2
  ```
3
- def load_and_compute_scores_with_quantized_model(model_path, tokenizer):
4
- # Load the quantized model manually
5
- config = AutoModelForSequenceClassification.from_pretrained(model_path).config
6
- model = AutoModelForSequenceClassification.from_config(config)
7
-
8
- # Load the state dict, filtering out unwanted keys
9
- state_dict = torch.load(model_path / "pytorch_model.bin")
10
- filtered_state_dict = {k: v for k, v in state_dict.items() if not k.endswith(('.SCB', '.weight_format'))}
11
- model.load_state_dict(filtered_state_dict, strict=False)
 
 
 
 
12
 
 
 
 
13
  def compute_score(pairs):
14
  inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt')
15
  with torch.no_grad():
16
  outputs = model(**inputs)
17
  return outputs.logits
18
 
19
- # Measure memory usage immediately after loading the model
20
  after_load_memory = get_memory_usage()
21
  print(f"Memory Usage after loading model: {after_load_memory:.2f} MB")
22
 
23
- # Compute scores
24
  scores = compute_score([['what is panda?', 'hi'], ['what is panda?', 'The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.']])
25
  print("Scores:", scores)
26
 
27
- # Load and compute scores with the quantized model
28
- load_and_compute_scores_with_quantized_model(Path(quantized_model_path), tokenizer)
29
  ```
 
1
  # Load and compute scores with the quantized model
2
  ```
3
+ import torch
4
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer, BitsAndBytesConfig
5
+ from pathlib import Path
6
+ import psutil
7
+
8
+ def get_memory_usage():
9
+ process = psutil.Process()
10
+ memory_info = process.memory_info()
11
+ return memory_info.rss / 1024**2 # Convert to MB
12
+
13
+ def load_and_compute_scores_with_quantized_model(model_path):
14
+ model_name = "BAAI/bge-reranker-v2-m3"
15
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
16
 
17
+ bnb_config = BitsAndBytesConfig(load_in_8bit=True)
18
+ model = AutoModelForSequenceClassification.from_pretrained(model_path, config=bnb_config)
19
+
20
  def compute_score(pairs):
21
  inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt')
22
  with torch.no_grad():
23
  outputs = model(**inputs)
24
  return outputs.logits
25
 
 
26
  after_load_memory = get_memory_usage()
27
  print(f"Memory Usage after loading model: {after_load_memory:.2f} MB")
28
 
 
29
  scores = compute_score([['what is panda?', 'hi'], ['what is panda?', 'The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.']])
30
  print("Scores:", scores)
31
 
32
+ quantized_model_path = "quantized_bge_reranker_v2_m3"
33
+ load_and_compute_scores_with_quantized_model(Path(quantized_model_path))
34
  ```