Update metrics.py
Browse files- metrics.py +71 -59
metrics.py
CHANGED
@@ -1,68 +1,80 @@
|
|
1 |
# metrics.py
|
2 |
-
from model_loader import metrics_models
|
3 |
-
import
|
|
|
|
|
4 |
|
5 |
-
def
|
6 |
-
|
7 |
-
|
8 |
-
Returns a similarity score between 0 and 1.
|
9 |
-
"""
|
10 |
-
try:
|
11 |
-
sentence_bert = metrics_models.load_sentence_bert()
|
12 |
-
embeddings = sentence_bert.encode([original, paraphrased])
|
13 |
-
similarity = float(embeddings[0] @ embeddings[1].T)
|
14 |
-
return round(similarity, 2)
|
15 |
-
except Exception as e:
|
16 |
-
print(f"Error computing semantic similarity: {str(e)}")
|
17 |
-
return None
|
18 |
|
19 |
-
def
|
20 |
"""
|
21 |
-
Compute
|
22 |
-
|
23 |
-
Returns a score between 0 and 1.
|
24 |
"""
|
25 |
try:
|
26 |
-
|
27 |
-
|
28 |
-
empathy_count = sum(1 for word in words if word in empathy_words)
|
29 |
-
# Normalize by length, cap at 1.0
|
30 |
-
score = min(empathy_count / max(len(words), 1) * 2, 1.0) # Amplify for better sensitivity
|
31 |
-
return round(score, 2)
|
32 |
-
except Exception as e:
|
33 |
-
print(f"Error computing empathy score: {str(e)}")
|
34 |
-
return None
|
35 |
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
-
def compute_hallucination_score(original, paraphrased):
|
53 |
-
"""
|
54 |
-
Compute a hallucination score by checking factual consistency.
|
55 |
-
High score indicates deviation from original meaning.
|
56 |
-
Returns a score between 0 and 1 (lower is better).
|
57 |
-
"""
|
58 |
-
try:
|
59 |
-
# Use semantic similarity as a proxy; low similarity suggests hallucination
|
60 |
-
similarity = compute_semantic_similarity(original, paraphrased)
|
61 |
-
if similarity is None:
|
62 |
-
return 0.5 # Default if similarity fails
|
63 |
-
# Inverse similarity scaled to penalize low similarity
|
64 |
-
score = max(0.0, (1.0 - similarity) * 0.5)
|
65 |
-
return round(score, 2)
|
66 |
except Exception as e:
|
67 |
-
print(f"Error
|
68 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# metrics.py
|
2 |
+
from model_loader import classifier_model, metrics_models
|
3 |
+
import torch
|
4 |
+
import numpy as np
|
5 |
+
import time
|
6 |
|
7 |
+
def softmax(logits):
|
8 |
+
exp_logits = np.exp(logits - np.max(logits))
|
9 |
+
return exp_logits / exp_logits.sum()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
+
def compute_reward_scores(original, paraphrase):
|
12 |
"""
|
13 |
+
Compute reward scores for a paraphrased comment.
|
14 |
+
Returns a dictionary with empathy, toxicity, bias, hallucination, and reward scores.
|
|
|
15 |
"""
|
16 |
try:
|
17 |
+
start_time = time.time()
|
18 |
+
print("Starting reward computation...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
+
# Check if paraphrase is valid
|
21 |
+
if not isinstance(paraphrase, str) or "Error: Unable to generate paraphrase" in paraphrase:
|
22 |
+
print(f"Invalid paraphrase: {paraphrase}. Returning default scores.")
|
23 |
+
return {
|
24 |
+
"empathy": 0.0,
|
25 |
+
"toxicity": 1.0,
|
26 |
+
"bias": 1.0,
|
27 |
+
"hallucination": 1.0,
|
28 |
+
"reward": 0.0
|
29 |
+
}
|
30 |
+
|
31 |
+
# Classify the paraphrased comment
|
32 |
+
print("Starting classification...")
|
33 |
+
inputs = classifier_model.tokenizer(
|
34 |
+
paraphrase,
|
35 |
+
return_tensors="pt",
|
36 |
+
truncation=True,
|
37 |
+
padding=True,
|
38 |
+
max_length=512
|
39 |
+
).to(classifier_model.device)
|
40 |
+
|
41 |
+
with torch.no_grad():
|
42 |
+
outputs = classifier_model.model(**inputs)
|
43 |
+
logits = outputs.logits.cpu().numpy()[0]
|
44 |
+
probs = softmax(logits)
|
45 |
+
|
46 |
+
toxicity = probs[1] # Assuming label 1 is toxic
|
47 |
+
empathy = 1.0 - toxicity # Simplified empathy score
|
48 |
+
bias = probs[1] # Placeholder for bias
|
49 |
+
print(f"Classification took {time.time() - start_time:.2f} seconds")
|
50 |
+
|
51 |
+
# Compute semantic similarity using Sentence-BERT
|
52 |
+
print("Computing semantic similarity...")
|
53 |
+
sentence_bert = metrics_models.sentence_bert
|
54 |
+
embeddings = sentence_bert.encode([original, paraphrase], convert_to_tensor=True)
|
55 |
+
similarity = torch.cosine_similarity(embeddings[0], embeddings[1], dim=0).item()
|
56 |
+
hallucination = 1.0 - similarity # High difference means potential hallucination
|
57 |
+
print(f"Semantic similarity computed: {similarity}")
|
58 |
+
|
59 |
+
# Compute reward score (weighted combination)
|
60 |
+
reward = 0.4 * empathy - 0.2 * toxicity - 0.2 * bias - 0.2 * hallucination
|
61 |
+
reward = max(0.0, min(1.0, reward))
|
62 |
+
|
63 |
+
print(f"Total processing time: {time.time() - start_time:.2f} seconds")
|
64 |
+
return {
|
65 |
+
"empathy": empathy,
|
66 |
+
"toxicity": toxicity,
|
67 |
+
"bias": bias,
|
68 |
+
"hallucination": hallucination,
|
69 |
+
"reward": reward
|
70 |
+
}
|
71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
except Exception as e:
|
73 |
+
print(f"Error in reward computation: {str(e)}")
|
74 |
+
return {
|
75 |
+
"empathy": 0.0,
|
76 |
+
"toxicity": 1.0,
|
77 |
+
"bias": 1.0,
|
78 |
+
"hallucination": 1.0,
|
79 |
+
"reward": 0.0
|
80 |
+
}
|