Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
import torch | |
# Load the model and tokenizer | |
tokenizer = AutoTokenizer.from_pretrained("BenjaminOcampo/peace_hatebert") | |
model = AutoModelForSequenceClassification.from_pretrained("BenjaminOcampo/peace_hatebert") | |
# Define more nuanced labels for the model output | |
nuanced_labels = { | |
0: "Non-Hate Speech", | |
1: "Explicit Hate", | |
2: "Implicit Hate", | |
3: "White Grievance" | |
} | |
# Microaggressions detection rules | |
microaggressions = { | |
"You're so articulate": "This phrase can imply surprise that the individual can speak well, often used in a way that suggests it is unexpected for someone of their background.", | |
"Where are you really from": "This question implies that the individual does not belong or is not truly part of the community.", | |
"I don't see color": "This statement can negate the experiences and identities of people of different races.", | |
"You're a credit to your race": "This phrase implies that most people of the individual’s race are not successful or commendable." | |
} | |
# A sample set of explanations and suggestions | |
bias_suggestions = { | |
"Explicit Hate": { | |
"suggestion": "Consider using language that promotes inclusivity and respect.", | |
"explanation": "The text contains explicit hate speech, which is overtly harmful and discriminatory. It is important to foster communication that is inclusive and respectful of all individuals." | |
}, | |
"Implicit Hate": { | |
"suggestion": "Try rephrasing to avoid subtle bias and ensure clarity.", | |
"explanation": "The text contains implicit hate speech, which can perpetuate stereotypes and bias in a less overt manner. Aim for language that is clear and free from insinuations." | |
}, | |
"White Grievance": { | |
"suggestion": "Reconsider any generalized claims about racial groups.", | |
"explanation": "The text appears to express grievances linked to racial identity, which can contribute to divisive narratives. Strive for dialogue that acknowledges diversity and avoids stereotyping." | |
}, | |
"Non-Hate Speech": { | |
"suggestion": "No problematic content detected.", | |
"explanation": "The text does not appear to contain hate speech or bias. It seems respectful and neutral." | |
}, | |
"Microaggression": { | |
"suggestion": "Be mindful of how certain phrases can be interpreted by others.", | |
"explanation": "The text includes phrases that may be considered microaggressions, which can subtly perpetuate stereotypes or biases." | |
} | |
} | |
def analyze_text(text): | |
# Tokenize input text | |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) | |
# Get model predictions | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
# Get prediction probabilities | |
probs = torch.nn.functional.softmax(outputs.logits, dim=-1) | |
predicted_class = torch.argmax(probs, dim=-1).item() | |
# Map the predicted class to the nuanced label | |
label = nuanced_labels.get(predicted_class, "Unknown") | |
# Check for microaggressions using the predefined rules | |
for phrase, explanation in microaggressions.items(): | |
if phrase.lower() in text.lower(): | |
label = "Microaggression" | |
suggestion = bias_suggestions[label]["suggestion"] | |
explanation = bias_suggestions[label]["explanation"] | |
return label, suggestion, explanation | |
# Fetch suggestion and explanation based on label | |
suggestion = bias_suggestions[label]["suggestion"] | |
explanation = bias_suggestions[label]["explanation"] | |
return label, suggestion, explanation | |
# Create the Gradio interface | |
interface = gr.Interface( | |
fn=analyze_text, | |
inputs=gr.Textbox(lines=5, placeholder="Enter text to analyze..."), | |
outputs=[ | |
gr.Textbox(label="Classification"), | |
gr.Textbox(label="Suggestion"), | |
gr.Textbox(label="Explanation") | |
], | |
title="Proofreading for Implicit Bias, Microagressions - inital model test", | |
description="Analyze text for nuanced bias categories such as implicit hate, explicit hate, or white grievance, and detect microaggressions to provide suggestions for improvement - step 1: prompt testing. Credit to https://huggingface.co/BenjaminOcampo" | |
) | |
# Launch the interface | |
interface.launch() | |