open-paws
/

text_performance_prediction_longform

Text Generation

animal-liberation

animal-advocacy

Model card Files Files and versions

Metrics Training metrics Community

stuckdavis commited on Jul 1

Commit

c200376

·

verified ·

1 Parent(s): c9f7808

Update handler.py

Files changed (1) hide show

handler.py +42 -40

handler.py CHANGED Viewed

@@ -1,43 +1,45 @@
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
-# Load once when the endpoint starts
-model_name = "open-paws/text_performance_prediction_longform"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForSequenceClassification.from_pretrained(model_name)
-model.eval()
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model.to(device)
-def predict(inputs):
-    """
-    Hugging Face Inference Endpoints will call this function.
-    `inputs` can be a single string or a list of strings.
-    """
-    if isinstance(inputs, str):
-        inputs = [inputs]
-    results = []
-    for text in inputs:
-        encoded = tokenizer(
-            text,
-            return_tensors="pt",
-            truncation=True,
-            padding="max_length",
-            max_length=4096,
-        )
-        encoded = {k: v.to(device) for k, v in encoded.items()}
-        with torch.no_grad():
-            outputs = model(**encoded)
-        raw_score = outputs.logits.squeeze().item()
-        clipped_score = min(max(raw_score, 0.0), 1.0)
-        results.append({
-            "score": round(clipped_score, 4),
-        })
-    return results

 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
+class EndpointHandler:
+    def __init__(self, path=""):
+        # Load model and tokenizer from the repo path
+        self.tokenizer = AutoTokenizer.from_pretrained(path)
+        self.model = AutoModelForSequenceClassification.from_pretrained(path)
+        self.model.eval()
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model.to(self.device)
+    def __call__(self, data):
+        """
+        This method is called when the endpoint receives a request.
+        Expected input: { "inputs": "some string" } or { "inputs": ["a", "b", ...] }
+        """
+        inputs = data.get("inputs", None)
+        if inputs is None:
+            return {"error": "No input provided"}
+        if isinstance(inputs, str):
+            inputs = [inputs]
+        results = []
+        for text in inputs:
+            encoded = self.tokenizer(
+                text,
+                return_tensors="pt",
+                truncation=True,
+                padding="max_length",
+                max_length=4096,
+            )
+            encoded = {k: v.to(self.device) for k, v in encoded.items()}
+            with torch.no_grad():
+                outputs = self.model(**encoded)
+            raw_score = outputs.logits.squeeze().item()
+            clipped_score = min(max(raw_score, 0.0), 1.0)
+            results.append({"score": round(clipped_score, 4)})
+        return results