kandanai
/

quantized_bge_reranker_v2_m3

Text Classification

Inference Endpoints

Model card Files Files and versions Community

kandanai commited on Jun 2, 2024

Commit

327897b

·

verified ·

1 Parent(s): b1302c8

Create README.md

Files changed (1) hide show

README.md +29 -0

README.md ADDED Viewed

	@@ -0,0 +1,29 @@

+# Load and compute scores with the quantized model
+```
+def load_and_compute_scores_with_quantized_model(model_path, tokenizer):
+    # Load the quantized model manually
+    config = AutoModelForSequenceClassification.from_pretrained(model_path).config
+    model = AutoModelForSequenceClassification.from_config(config)
+    # Load the state dict, filtering out unwanted keys
+    state_dict = torch.load(model_path / "pytorch_model.bin")
+    filtered_state_dict = {k: v for k, v in state_dict.items() if not k.endswith(('.SCB', '.weight_format'))}
+    model.load_state_dict(filtered_state_dict, strict=False)
+    def compute_score(pairs):
+        inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt')
+        with torch.no_grad():
+            outputs = model(**inputs)
+        return outputs.logits
+    # Measure memory usage immediately after loading the model
+    after_load_memory = get_memory_usage()
+    print(f"Memory Usage after loading model: {after_load_memory:.2f} MB")
+    # Compute scores
+    scores = compute_score([['what is panda?', 'hi'], ['what is panda?', 'The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.']])
+    print("Scores:", scores)
+```
+# Load and compute scores with the quantized model
+load_and_compute_scores_with_quantized_model(Path(quantized_model_path), tokenizer)