|
8 bit bge-reranker-v2-m3. |
|
|
|
!pip install "transformers==4.35" "bitsandbytes" "optimum" "datasets==2.13.0" "peft==0.9.0" "accelerate==0.27.1" "bitsandbytes==0.40.2" "trl==0.4.7" "safetensors>=0.3.1" "tiktoken" |
|
|
|
# Load and compute scores with the quantized model |
|
``` |
|
import torch |
|
from transformers import AutoModelForSequenceClassification, AutoTokenizer, BitsAndBytesConfig |
|
|
|
def load_and_compute_scores_with_quantized_model(model_path): |
|
tokenizer = AutoTokenizer.from_pretrained(model_path) |
|
model = AutoModelForSequenceClassification.from_pretrained(model_path) |
|
|
|
def compute_score(pairs): |
|
inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt') |
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
return outputs.logits |
|
|
|
scores = compute_score([['what is panda?', 'hi'], ['what is panda?', 'The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.']]) |
|
print("Scores:", scores) |
|
|
|
quantized_model_path = "quantized_bge_reranker_v2_m3" |
|
load_and_compute_scores_with_quantized_model(quantized_model_path) |
|
``` |