JanviMl's picture
Update metrics.py
581f008 verified
raw
history blame
2.76 kB
# metrics.py
from model_loader import metrics_models
import re
def compute_semantic_similarity(original, paraphrased):
"""
Compute semantic similarity between the original and paraphrased comment using Sentence-BERT.
Returns a similarity score between 0 and 1.
"""
try:
sentence_bert = metrics_models.load_sentence_bert()
embeddings = sentence_bert.encode([original, paraphrased])
similarity = float(embeddings[0] @ embeddings[1].T)
return round(similarity, 2)
except Exception as e:
print(f"Error computing semantic similarity: {str(e)}")
return None
def compute_empathy_score(paraphrased):
"""
Compute an empathy score for the paraphrased comment.
Enhanced to consider positive sentiment and supportive language.
Returns a score between 0 and 1.
"""
try:
empathy_words = ["sorry", "understand", "care", "help", "support", "appreciate", "encourage", "positive"]
words = paraphrased.lower().split()
empathy_count = sum(1 for word in words if word in empathy_words)
# Normalize by length, cap at 1.0
score = min(empathy_count / max(len(words), 1) * 2, 1.0) # Amplify for better sensitivity
return round(score, 2)
except Exception as e:
print(f"Error computing empathy score: {str(e)}")
return None
def compute_bias_score(paraphrased):
"""
Compute a bias score for the paraphrased comment (placeholder).
Detects stereotypical or discriminatory language.
Returns a score between 0 and 1 (lower is less biased).
"""
try:
bias_indicators = ["race", "gender", "religion", "stereotype", "discriminate", "bias"]
words = paraphrased.lower().split()
bias_count = sum(1 for word in words if word in bias_indicators)
score = bias_count / max(len(words), 1)
return round(score, 2)
except Exception as e:
print(f"Error computing bias score: {str(e)}")
return None
def compute_hallucination_score(original, paraphrased):
"""
Compute a hallucination score by checking factual consistency.
High score indicates deviation from original meaning.
Returns a score between 0 and 1 (lower is better).
"""
try:
# Use semantic similarity as a proxy; low similarity suggests hallucination
similarity = compute_semantic_similarity(original, paraphrased)
if similarity is None:
return 0.5 # Default if similarity fails
# Inverse similarity scaled to penalize low similarity
score = max(0.0, (1.0 - similarity) * 0.5)
return round(score, 2)
except Exception as e:
print(f"Error computing hallucination score: {str(e)}")
return None