Spaces:

maximuspowers
/

bias-detection-ner

Sleeping

App Files Files Community

maximuspowers commited on Dec 14, 2024

Commit

7cd8165

verified ·

1 Parent(s): 2dab34c

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -9

app.py CHANGED Viewed

@@ -3,13 +3,13 @@ import torch
 from transformers import BertTokenizerFast, BertForTokenClassification
 import gradio as gr
-# init important things
 tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
 model = BertForTokenClassification.from_pretrained('maximuspowers/bias-detection-ner')
 model.eval()
 model.to('cuda' if torch.cuda.is_available() else 'cpu')
-# ids to labels we want to display
 id2label = {
     0: 'O',
     1: 'B-STEREO',
@@ -20,7 +20,18 @@ id2label = {
     6: 'I-UNFAIR'
 }
-# predict function you'll want to use if using in your own code
 def predict_ner_tags(sentence):
     inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True, max_length=128)
     input_ids = inputs['input_ids'].to(model.device)
@@ -30,23 +41,27 @@ def predict_ner_tags(sentence):
         outputs = model(input_ids=input_ids, attention_mask=attention_mask)
         logits = outputs.logits
         probabilities = torch.sigmoid(logits)
-        predicted_labels = (probabilities > 0.5).int() # remember to try your own threshold
     result = []
     tokens = tokenizer.convert_ids_to_tokens(input_ids[0])
     for i, token in enumerate(tokens):
         if token not in tokenizer.all_special_tokens:
             label_indices = (predicted_labels[0][i] == 1).nonzero(as_tuple=False).squeeze(-1)
             labels = [id2label[idx.item()] for idx in label_indices] if label_indices.numel() > 0 else ['O']
-            result.append({"token": token, "labels": labels})
-    return json.dumps(result, indent=4)
-# startup gradio
 iface = gr.Interface(
     fn=predict_ner_tags,
     inputs="text",
-    outputs="text",
     title="Social Bias Named Entity Recognition (with BERT) 🕵",
     description=("Enter a sentence to predict biased parts of speech tags. This model uses multi-label BertForTokenClassification, to label the entities: (GEN)eralizations, (UNFAIR)ness, and (STEREO)types. Labels follow BIO format. Try it out :)."
                  "<br><br>Read more about how this model was trained in this <a href='https://huggingface.co/blog/maximuspowers/bias-entity-recognition' target='_blank'>blog post</a>."
@@ -55,4 +70,4 @@ iface = gr.Interface(
 )
 if __name__ == "__main__":
-    iface.launch()

 from transformers import BertTokenizerFast, BertForTokenClassification
 import gradio as gr
+# Initialize important things
 tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
 model = BertForTokenClassification.from_pretrained('maximuspowers/bias-detection-ner')
 model.eval()
 model.to('cuda' if torch.cuda.is_available() else 'cpu')
+# IDs to labels we want to display
 id2label = {
     0: 'O',
     1: 'B-STEREO',
     6: 'I-UNFAIR'
 }
+# Color map for entities
+label_colors = {
+    "B-STEREO": "#FFCDD2",
+    "I-STEREO": "#E57373",
+    "B-GEN": "#C8E6C9",
+    "I-GEN": "#81C784",
+    "B-UNFAIR": "#BBDEFB",
+    "I-UNFAIR": "#64B5F6",
+    "O": "#FFFFFF"  # Default for no label
+}
+# Predict function
 def predict_ner_tags(sentence):
     inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True, max_length=128)
     input_ids = inputs['input_ids'].to(model.device)
         outputs = model(input_ids=input_ids, attention_mask=attention_mask)
         logits = outputs.logits
         probabilities = torch.sigmoid(logits)
+        predicted_labels = (probabilities > 0.5).int()  # Threshold
     result = []
     tokens = tokenizer.convert_ids_to_tokens(input_ids[0])
+    highlighted_sentence = ""
     for i, token in enumerate(tokens):
         if token not in tokenizer.all_special_tokens:
             label_indices = (predicted_labels[0][i] == 1).nonzero(as_tuple=False).squeeze(-1)
             labels = [id2label[idx.item()] for idx in label_indices] if label_indices.numel() > 0 else ['O']
+            # Get the most prominent label for coloring (arbitrary choice for multiple labels)
+            primary_label = labels[0] if labels else "O"
+            color = label_colors.get(primary_label, "#FFFFFF")
+            highlighted_sentence += f"<span style='background-color:{color}'>{token}</span> "
+    return highlighted_sentence.strip()
+# Gradio Interface
 iface = gr.Interface(
     fn=predict_ner_tags,
     inputs="text",
+    outputs=gr.outputs.HTML(label="Highlighted Sentence"),
     title="Social Bias Named Entity Recognition (with BERT) 🕵",
     description=("Enter a sentence to predict biased parts of speech tags. This model uses multi-label BertForTokenClassification, to label the entities: (GEN)eralizations, (UNFAIR)ness, and (STEREO)types. Labels follow BIO format. Try it out :)."
                  "<br><br>Read more about how this model was trained in this <a href='https://huggingface.co/blog/maximuspowers/bias-entity-recognition' target='_blank'>blog post</a>."
 )
 if __name__ == "__main__":
+    iface.launch()