stuckdavis commited on
Commit
c9f7808
·
verified ·
1 Parent(s): 5e1a3a2

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +22 -17
handler.py CHANGED
@@ -1,38 +1,43 @@
1
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
2
  import torch
3
 
4
- # Load model and tokenizer once at module level
5
- MODEL_NAME = "open-paws/text_performance_prediction_longform"
6
 
7
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
8
- model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
9
  model.eval()
10
 
11
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
  model.to(device)
13
 
14
- def predict(texts):
15
- """Hugging Face expects a `predict()` function for custom handlers"""
16
- if isinstance(texts, str):
17
- texts = [texts] # Ensure batch input
 
 
 
18
 
19
  results = []
20
- for text in texts:
21
- # Tokenize
22
- inputs = tokenizer(
23
  text,
24
  return_tensors="pt",
25
  truncation=True,
26
  padding="max_length",
27
- max_length=512
28
  )
29
- inputs = {k: v.to(device) for k, v in inputs.items()}
30
 
31
  with torch.no_grad():
32
- outputs = model(**inputs)
33
- score = outputs.logits.squeeze().item()
34
- clipped_score = min(max(score, 0.0), 1.0)
35
 
36
- results.append({"score": clipped_score})
 
 
 
 
 
37
 
38
  return results
 
1
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
2
  import torch
3
 
4
+ # Load once when the endpoint starts
5
+ model_name = "open-paws/text_performance_prediction_longform"
6
 
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
9
  model.eval()
10
 
11
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
  model.to(device)
13
 
14
+ def predict(inputs):
15
+ """
16
+ Hugging Face Inference Endpoints will call this function.
17
+ `inputs` can be a single string or a list of strings.
18
+ """
19
+ if isinstance(inputs, str):
20
+ inputs = [inputs]
21
 
22
  results = []
23
+ for text in inputs:
24
+ encoded = tokenizer(
 
25
  text,
26
  return_tensors="pt",
27
  truncation=True,
28
  padding="max_length",
29
+ max_length=4096,
30
  )
31
+ encoded = {k: v.to(device) for k, v in encoded.items()}
32
 
33
  with torch.no_grad():
34
+ outputs = model(**encoded)
 
 
35
 
36
+ raw_score = outputs.logits.squeeze().item()
37
+ clipped_score = min(max(raw_score, 0.0), 1.0)
38
+
39
+ results.append({
40
+ "score": round(clipped_score, 4),
41
+ })
42
 
43
  return results