Spaces:

davebulaval
/

meaningbert

Runtime error

davebulaval commited on Feb 11, 2024

Commit

4e0f879

1 Parent(s): 04f1736

improve processing and doc

Files changed (2) hide show

code_examples.py ADDED Viewed

+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+tokenizer = AutoTokenizer.from_pretrained("davebulaval/MeaningBERT")
+scorer = AutoModelForSequenceClassification.from_pretrained("davebulaval/MeaningBERT")
+scorer.eval()
+documents = [
+    "He wanted to make them pay.",
+    "This sandwich looks delicious.",
+    "He wants to eat.",
+]
+simplifications = [
+    "He wanted to make them pay.",
+    "This sandwich looks delicious.",
+    "Whatever, whenever, this is a sentence.",
+]
+# We tokenize the text as a pair and return Pytorch Tensors
+tokenize_text = tokenizer(
+    documents, simplifications, truncation=True, padding=True, return_tensors="pt"
+)
+with torch.no_grad():
+    # We process the text
+    scores = scorer(**tokenize_text)
+print(scores.logits.tolist())

meaningbert.py CHANGED Viewed

@@ -24,7 +24,9 @@ from transformers import AutoModelForSequenceClassification, AutoTokenizer
 @contextmanager
 def filter_logging_context():
     def filter_log(record):
-        return False if "This IS expected if you are initializing" in record.msg else True
     logger = datasets.utils.logging.get_logger("transformers.modeling_utils")
     logger.addFilter(filter_log)
@@ -105,23 +107,33 @@ class MeaningBERTScore(evaluate.Metric):
         )
     def _compute(
-            self,
-            documents: List,
-            simplifications: List,
-            verbose: bool = False,
     ) -> Dict:
         assert len(documents) == len(
-            simplifications), "The number of document is different of the number of simplifications."
         hashcode = _HASH
         # We load the MeaningBERT pretrained model
-        scorer = AutoModelForSequenceClassification.from_pretrained("davebulaval/MeaningBERT")
         # We load MeaningBERT tokenizer
         tokenizer = AutoTokenizer.from_pretrained("davebulaval/MeaningBERT")
         # We tokenize the text as a pair and return Pytorch Tensors
-        tokenize_text = tokenizer(documents, simplifications, truncation=True, padding=True, return_tensors="pt")
         with filter_logging_context():
             # We process the text

 @contextmanager
 def filter_logging_context():
     def filter_log(record):
+        return (
+            False if "This IS expected if you are initializing" in record.msg else True
+        )
     logger = datasets.utils.logging.get_logger("transformers.modeling_utils")
     logger.addFilter(filter_log)
         )
     def _compute(
+        self,
+        documents: List,
+        simplifications: List,
+        verbose: bool = False,
     ) -> Dict:
         assert len(documents) == len(
+            simplifications
+        ), "The number of document is different of the number of simplifications."
         hashcode = _HASH
         # We load the MeaningBERT pretrained model
+        scorer = AutoModelForSequenceClassification.from_pretrained(
+            "davebulaval/MeaningBERT"
+        )
+        scorer.eval()
         # We load MeaningBERT tokenizer
         tokenizer = AutoTokenizer.from_pretrained("davebulaval/MeaningBERT")
         # We tokenize the text as a pair and return Pytorch Tensors
+        tokenize_text = tokenizer(
+            documents,
+            simplifications,
+            truncation=True,
+            padding=True,
+            return_tensors="pt",
+        )
         with filter_logging_context():
             # We process the text