JanviMl commited on
Commit
5094855
·
verified ·
1 Parent(s): 1612ccf

Update metrics.py

Browse files
Files changed (1) hide show
  1. metrics.py +71 -59
metrics.py CHANGED
@@ -1,68 +1,80 @@
1
  # metrics.py
2
- from model_loader import metrics_models
3
- import re
 
 
4
 
5
- def compute_semantic_similarity(original, paraphrased):
6
- """
7
- Compute semantic similarity between the original and paraphrased comment using Sentence-BERT.
8
- Returns a similarity score between 0 and 1.
9
- """
10
- try:
11
- sentence_bert = metrics_models.load_sentence_bert()
12
- embeddings = sentence_bert.encode([original, paraphrased])
13
- similarity = float(embeddings[0] @ embeddings[1].T)
14
- return round(similarity, 2)
15
- except Exception as e:
16
- print(f"Error computing semantic similarity: {str(e)}")
17
- return None
18
 
19
- def compute_empathy_score(paraphrased):
20
  """
21
- Compute an empathy score for the paraphrased comment.
22
- Enhanced to consider positive sentiment and supportive language.
23
- Returns a score between 0 and 1.
24
  """
25
  try:
26
- empathy_words = ["sorry", "understand", "care", "help", "support", "appreciate", "encourage", "positive"]
27
- words = paraphrased.lower().split()
28
- empathy_count = sum(1 for word in words if word in empathy_words)
29
- # Normalize by length, cap at 1.0
30
- score = min(empathy_count / max(len(words), 1) * 2, 1.0) # Amplify for better sensitivity
31
- return round(score, 2)
32
- except Exception as e:
33
- print(f"Error computing empathy score: {str(e)}")
34
- return None
35
 
36
- def compute_bias_score(paraphrased):
37
- """
38
- Compute a bias score for the paraphrased comment (placeholder).
39
- Detects stereotypical or discriminatory language.
40
- Returns a score between 0 and 1 (lower is less biased).
41
- """
42
- try:
43
- bias_indicators = ["race", "gender", "religion", "stereotype", "discriminate", "bias"]
44
- words = paraphrased.lower().split()
45
- bias_count = sum(1 for word in words if word in bias_indicators)
46
- score = bias_count / max(len(words), 1)
47
- return round(score, 2)
48
- except Exception as e:
49
- print(f"Error computing bias score: {str(e)}")
50
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
- def compute_hallucination_score(original, paraphrased):
53
- """
54
- Compute a hallucination score by checking factual consistency.
55
- High score indicates deviation from original meaning.
56
- Returns a score between 0 and 1 (lower is better).
57
- """
58
- try:
59
- # Use semantic similarity as a proxy; low similarity suggests hallucination
60
- similarity = compute_semantic_similarity(original, paraphrased)
61
- if similarity is None:
62
- return 0.5 # Default if similarity fails
63
- # Inverse similarity scaled to penalize low similarity
64
- score = max(0.0, (1.0 - similarity) * 0.5)
65
- return round(score, 2)
66
  except Exception as e:
67
- print(f"Error computing hallucination score: {str(e)}")
68
- return None
 
 
 
 
 
 
 
1
  # metrics.py
2
+ from model_loader import classifier_model, metrics_models
3
+ import torch
4
+ import numpy as np
5
+ import time
6
 
7
+ def softmax(logits):
8
+ exp_logits = np.exp(logits - np.max(logits))
9
+ return exp_logits / exp_logits.sum()
 
 
 
 
 
 
 
 
 
 
10
 
11
+ def compute_reward_scores(original, paraphrase):
12
  """
13
+ Compute reward scores for a paraphrased comment.
14
+ Returns a dictionary with empathy, toxicity, bias, hallucination, and reward scores.
 
15
  """
16
  try:
17
+ start_time = time.time()
18
+ print("Starting reward computation...")
 
 
 
 
 
 
 
19
 
20
+ # Check if paraphrase is valid
21
+ if not isinstance(paraphrase, str) or "Error: Unable to generate paraphrase" in paraphrase:
22
+ print(f"Invalid paraphrase: {paraphrase}. Returning default scores.")
23
+ return {
24
+ "empathy": 0.0,
25
+ "toxicity": 1.0,
26
+ "bias": 1.0,
27
+ "hallucination": 1.0,
28
+ "reward": 0.0
29
+ }
30
+
31
+ # Classify the paraphrased comment
32
+ print("Starting classification...")
33
+ inputs = classifier_model.tokenizer(
34
+ paraphrase,
35
+ return_tensors="pt",
36
+ truncation=True,
37
+ padding=True,
38
+ max_length=512
39
+ ).to(classifier_model.device)
40
+
41
+ with torch.no_grad():
42
+ outputs = classifier_model.model(**inputs)
43
+ logits = outputs.logits.cpu().numpy()[0]
44
+ probs = softmax(logits)
45
+
46
+ toxicity = probs[1] # Assuming label 1 is toxic
47
+ empathy = 1.0 - toxicity # Simplified empathy score
48
+ bias = probs[1] # Placeholder for bias
49
+ print(f"Classification took {time.time() - start_time:.2f} seconds")
50
+
51
+ # Compute semantic similarity using Sentence-BERT
52
+ print("Computing semantic similarity...")
53
+ sentence_bert = metrics_models.sentence_bert
54
+ embeddings = sentence_bert.encode([original, paraphrase], convert_to_tensor=True)
55
+ similarity = torch.cosine_similarity(embeddings[0], embeddings[1], dim=0).item()
56
+ hallucination = 1.0 - similarity # High difference means potential hallucination
57
+ print(f"Semantic similarity computed: {similarity}")
58
+
59
+ # Compute reward score (weighted combination)
60
+ reward = 0.4 * empathy - 0.2 * toxicity - 0.2 * bias - 0.2 * hallucination
61
+ reward = max(0.0, min(1.0, reward))
62
+
63
+ print(f"Total processing time: {time.time() - start_time:.2f} seconds")
64
+ return {
65
+ "empathy": empathy,
66
+ "toxicity": toxicity,
67
+ "bias": bias,
68
+ "hallucination": hallucination,
69
+ "reward": reward
70
+ }
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  except Exception as e:
73
+ print(f"Error in reward computation: {str(e)}")
74
+ return {
75
+ "empathy": 0.0,
76
+ "toxicity": 1.0,
77
+ "bias": 1.0,
78
+ "hallucination": 1.0,
79
+ "reward": 0.0
80
+ }