Spaces:

maximuspowers
/

bias-detection-ner

Sleeping

App Files Files Community

maximuspowers commited on Aug 23, 2024

Commit

53a7262

verified ·

1 Parent(s): 7a0674a

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -23

app.py CHANGED Viewed

@@ -1,14 +1,15 @@
 import torch
 from transformers import BertTokenizerFast, BertForTokenClassification
 import gradio as gr
-# Initialize tokenizer and model
 tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
 model = BertForTokenClassification.from_pretrained('maximuspowers/bias-detection-ner')
 model.eval()
 model.to('cuda' if torch.cuda.is_available() else 'cpu')
-# Define label mappings
 id2label = {
     0: 'O',
     1: 'B-STEREO',
@@ -19,6 +20,7 @@ id2label = {
     6: 'I-UNFAIR'
 }
 def predict_ner_tags(sentence):
     inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True, max_length=128)
     input_ids = inputs['input_ids'].to(model.device)
@@ -36,32 +38,17 @@ def predict_ner_tags(sentence):
         if token not in tokenizer.all_special_tokens:
             label_indices = (predicted_labels[0][i] == 1).nonzero(as_tuple=False).squeeze(-1)
             labels = [id2label[idx.item()] for idx in label_indices] if label_indices.numel() > 0 else ['O']
-            result.append((token, labels))
-    return result
-def format_output(result):
-    formatted_output = "<div style='font-family: Arial;'>"
-    for token, labels in result:
-        styles = []
-        if "B-STEREO" in labels or "I-STEREO" in labels:
-            styles.append("border-bottom: 2px solid blue;")
-        if "B-GEN" in labels or "I-GEN" in labels:
-            styles.append("background-color: green; color: white;")
-        if "B-UNFAIR" in labels or "I-UNFAIR" in labels:
-            styles.append("border: 2px dashed red;")
-        style_string = " ".join(styles) if styles else ""
-        formatted_output += f"<span style='{style_string} padding: 3px; margin: 2px;'>{token}</span> "
-    formatted_output += "</div>"
-    return formatted_output
 iface = gr.Interface(
     fn=predict_ner_tags,
     inputs="text",
-    outputs="html",  # Directly use "html" here
-    title="Named Entity Recognition with BERT",
-    description="Enter a sentence to predict NER tags using a BERT model trained for multi-label classification. Different styles represent different entity types.",
     examples=["Tall men are so clumsy."],
     allow_flagging="never"
 )

+import json
 import torch
 from transformers import BertTokenizerFast, BertForTokenClassification
 import gradio as gr
+# init important things
 tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
 model = BertForTokenClassification.from_pretrained('maximuspowers/bias-detection-ner')
 model.eval()
 model.to('cuda' if torch.cuda.is_available() else 'cpu')
+# ids to labels we want to display
 id2label = {
     0: 'O',
     1: 'B-STEREO',
     6: 'I-UNFAIR'
 }
+# predict function you'll want to use if using in your own code
 def predict_ner_tags(sentence):
     inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True, max_length=128)
     input_ids = inputs['input_ids'].to(model.device)
         if token not in tokenizer.all_special_tokens:
             label_indices = (predicted_labels[0][i] == 1).nonzero(as_tuple=False).squeeze(-1)
             labels = [id2label[idx.item()] for idx in label_indices] if label_indices.numel() > 0 else ['O']
+            result.append({"token": token, "labels": labels})
+    return json.dumps(result, indent=4)
+# startup gradio
 iface = gr.Interface(
     fn=predict_ner_tags,
     inputs="text",
+    outputs="text",
+    title="Social Bias Named Entity Recognition (with BERT) 🕵",
+    description="Enter a sentence to predict biased parts of speech tags using a BERT model trained for multi-label token classification.",
     examples=["Tall men are so clumsy."],
     allow_flagging="never"
 )