JanviMl commited on
Commit
e0084d4
·
verified ·
1 Parent(s): 95613a1

Update classifier.py

Browse files
Files changed (1) hide show
  1. classifier.py +38 -2
classifier.py CHANGED
@@ -3,7 +3,43 @@ import torch
3
  import time
4
  from model_loader import classifier_model
5
  from paraphraser import paraphrase_comment
6
- from metrics import compute_semantic_similarity, compute_empathy_score
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  def classify_toxic_comment(comment):
9
  """
@@ -15,7 +51,7 @@ def classify_toxic_comment(comment):
15
  print("Starting classification...")
16
 
17
  if not comment.strip():
18
- return "Error: Please enter a comment.", None, None, None, None, None, None, None, None, None, None
19
 
20
  # Access the model and tokenizer
21
  model = classifier_model.model
 
3
  import time
4
  from model_loader import classifier_model
5
  from paraphraser import paraphrase_comment
6
+ from metrics import compute_semantic_similarity, compute_empathy_score, compute_bias_score, compute_hallucination_score
7
+
8
+ def compute_reward_scores(original, paraphrased):
9
+ """
10
+ Compute all reward scores for a paraphrase.
11
+ Returns a dictionary with empathy, toxicity, bias, hallucination, and overall reward.
12
+ """
13
+ try:
14
+ # Get toxicity from classifier
15
+ _, _, _, toxicity_score, bias_score, _, _, _, _, paraphrased_toxicity_score, paraphrased_bias_score, _, _ = classify_toxic_comment(paraphrased)
16
+ toxicity = paraphrased_toxicity_score if paraphrased_toxicity_score is not None else 0.5
17
+
18
+ # Compute other metrics
19
+ empathy = compute_empathy_score(paraphrased) or 0.5
20
+ bias = compute_bias_score(paraphrased) or 0.5
21
+ hallucination = compute_hallucination_score(original, paraphrased) or 0.5
22
+
23
+ # Overall reward: Weighted combination (adjust weights as needed)
24
+ reward = (0.4 * empathy) - (0.2 * toxicity) - (0.2 * bias) - (0.2 * hallucination)
25
+ reward = max(0.0, min(1.0, round(reward, 2)))
26
+
27
+ return {
28
+ "empathy": empathy,
29
+ "toxicity": toxicity,
30
+ "bias": bias,
31
+ "hallucination": hallucination,
32
+ "reward": reward
33
+ }
34
+ except Exception as e:
35
+ print(f"Error computing reward scores: {str(e)}")
36
+ return {
37
+ "empathy": 0.5,
38
+ "toxicity": 0.5,
39
+ "bias": 0.5,
40
+ "hallucination": 0.5,
41
+ "reward": 0.5
42
+ }
43
 
44
  def classify_toxic_comment(comment):
45
  """
 
51
  print("Starting classification...")
52
 
53
  if not comment.strip():
54
+ return "Error: Please enter a comment.", None, None, None, None, None, None, None, None, None, None, None, None
55
 
56
  # Access the model and tokenizer
57
  model = classifier_model.model