kandanai
/

quantized_bge_reranker_v2_m3

Text Classification

Model card Files Files and versions Community

kandanai commited on Jun 2, 2024

Commit

b6d62ff

·

verified ·

1 Parent(s): 582af9b

Update README.md

Files changed (1) hide show

README.md +18 -13

README.md CHANGED Viewed

@@ -1,29 +1,34 @@
 # Load and compute scores with the quantized model
 ```
-def load_and_compute_scores_with_quantized_model(model_path, tokenizer):
-    # Load the quantized model manually
-    config = AutoModelForSequenceClassification.from_pretrained(model_path).config
-    model = AutoModelForSequenceClassification.from_config(config)
-    # Load the state dict, filtering out unwanted keys
-    state_dict = torch.load(model_path / "pytorch_model.bin")
-    filtered_state_dict = {k: v for k, v in state_dict.items() if not k.endswith(('.SCB', '.weight_format'))}
-    model.load_state_dict(filtered_state_dict, strict=False)
     def compute_score(pairs):
         inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt')
         with torch.no_grad():
             outputs = model(**inputs)
         return outputs.logits
-    # Measure memory usage immediately after loading the model
     after_load_memory = get_memory_usage()
     print(f"Memory Usage after loading model: {after_load_memory:.2f} MB")
-    # Compute scores
     scores = compute_score([['what is panda?', 'hi'], ['what is panda?', 'The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.']])
     print("Scores:", scores)
-# Load and compute scores with the quantized model
-load_and_compute_scores_with_quantized_model(Path(quantized_model_path), tokenizer)
 ```

 # Load and compute scores with the quantized model
 ```
+import torch
+from transformers import AutoModelForSequenceClassification, AutoTokenizer, BitsAndBytesConfig
+from pathlib import Path
+import psutil
+def get_memory_usage():
+    process = psutil.Process()
+    memory_info = process.memory_info()
+    return memory_info.rss / 1024**2  # Convert to MB
+def load_and_compute_scores_with_quantized_model(model_path):
+    model_name = "BAAI/bge-reranker-v2-m3"
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    bnb_config = BitsAndBytesConfig(load_in_8bit=True)
+    model = AutoModelForSequenceClassification.from_pretrained(model_path, config=bnb_config)
     def compute_score(pairs):
         inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt')
         with torch.no_grad():
             outputs = model(**inputs)
         return outputs.logits
     after_load_memory = get_memory_usage()
     print(f"Memory Usage after loading model: {after_load_memory:.2f} MB")
     scores = compute_score([['what is panda?', 'hi'], ['what is panda?', 'The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.']])
     print("Scores:", scores)
+quantized_model_path = "quantized_bge_reranker_v2_m3"
+load_and_compute_scores_with_quantized_model(Path(quantized_model_path))
 ```