itdainb
/

PhoRanker

@@ -55,7 +55,7 @@ pipeline_tag: text-classification
 ```python
 from pyvi import ViTokenizer
-query = "UIT là gì?"
 sentences = [
     "Trường Đại học Công nghệ Thông tin có tên tiếng Anh là University of Information Technology (viết tắt là UIT) là thành viên của Đại học Quốc Gia TP.HCM.",
     "Trường Đại học Kinh tế – Luật (tiếng Anh: University of Economics and Law – UEL) là trường đại học đào tạo và nghiên cứu khối ngành kinh tế, kinh doanh và luật hàng đầu Việt Nam.",
@@ -67,25 +67,30 @@ tokenized_sentences = [ViTokenizer.tokenize(sent) for sent in sentences]
 tokenized_pairs = [[tokenized_query, sent] for sent in tokenized_sentences]
-model_id = 'itdainb/PhoRanker'
 ```
 ## Usage with sentence-transformers
 ```python
 from sentence_transformers import CrossEncoder
-model = CrossEncoder(model_id, max_length=256)
 # For fp16 usage
 model.model.half()
 scores = model.predict(tokenized_pairs)
 ```
 ## Usage with transformers
 ```python
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 model = AutoModelForSequenceClassification.from_pretrained(model_id)
 tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -95,13 +100,18 @@ model.half()
 features = tokenizer(tokenized_pairs, padding=True, truncation="longest_first", return_tensors="pt", max_length=256)
 model.eval()
 with torch.no_grad():
     model_predictions = model(**features, return_dict=True)
     logits = model_predictions.logits
-    scores = [score[0] for score in logits]
-    print(scores)
 ```
 ## Performance

 ```python
 from pyvi import ViTokenizer
+query = "Trường UIT là gì?"
 sentences = [
     "Trường Đại học Công nghệ Thông tin có tên tiếng Anh là University of Information Technology (viết tắt là UIT) là thành viên của Đại học Quốc Gia TP.HCM.",
     "Trường Đại học Kinh tế – Luật (tiếng Anh: University of Economics and Law – UEL) là trường đại học đào tạo và nghiên cứu khối ngành kinh tế, kinh doanh và luật hàng đầu Việt Nam.",
 tokenized_pairs = [[tokenized_query, sent] for sent in tokenized_sentences]
+MODEL_ID = 'itdainb/PhoRanker'
+MAX_LENGTH = 256
 ```
 ## Usage with sentence-transformers
 ```python
 from sentence_transformers import CrossEncoder
+model = CrossEncoder(MODEL_ID, max_length=MAX_LENGTH)
 # For fp16 usage
 model.model.half()
 scores = model.predict(tokenized_pairs)
+# 0.982, 0.2444, 0.9253
+print(scores)
 ```
 ## Usage with transformers
 ```python
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
 model = AutoModelForSequenceClassification.from_pretrained(model_id)
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 features = tokenizer(tokenized_pairs, padding=True, truncation="longest_first", return_tensors="pt", max_length=256)
+activation_function = torch.nn.Sigmoid() if model.config.num_labels == 1 else torch.nn.Identity()
 model.eval()
 with torch.no_grad():
     model_predictions = model(**features, return_dict=True)
     logits = model_predictions.logits
+    logits = activation_function(logits)
+    scores = [logit[0] for logit in logits]
+# 0.9819, 0.2444, 0.9253
+print(scores)
 ```
 ## Performance