maximuspowers commited on
Commit
53a7262
·
verified ·
1 Parent(s): 7a0674a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -23
app.py CHANGED
@@ -1,14 +1,15 @@
 
1
  import torch
2
  from transformers import BertTokenizerFast, BertForTokenClassification
3
  import gradio as gr
4
 
5
- # Initialize tokenizer and model
6
  tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
7
  model = BertForTokenClassification.from_pretrained('maximuspowers/bias-detection-ner')
8
  model.eval()
9
  model.to('cuda' if torch.cuda.is_available() else 'cpu')
10
 
11
- # Define label mappings
12
  id2label = {
13
  0: 'O',
14
  1: 'B-STEREO',
@@ -19,6 +20,7 @@ id2label = {
19
  6: 'I-UNFAIR'
20
  }
21
 
 
22
  def predict_ner_tags(sentence):
23
  inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True, max_length=128)
24
  input_ids = inputs['input_ids'].to(model.device)
@@ -36,32 +38,17 @@ def predict_ner_tags(sentence):
36
  if token not in tokenizer.all_special_tokens:
37
  label_indices = (predicted_labels[0][i] == 1).nonzero(as_tuple=False).squeeze(-1)
38
  labels = [id2label[idx.item()] for idx in label_indices] if label_indices.numel() > 0 else ['O']
39
- result.append((token, labels))
40
 
41
- return result
42
-
43
- def format_output(result):
44
- formatted_output = "<div style='font-family: Arial;'>"
45
- for token, labels in result:
46
- styles = []
47
- if "B-STEREO" in labels or "I-STEREO" in labels:
48
- styles.append("border-bottom: 2px solid blue;")
49
- if "B-GEN" in labels or "I-GEN" in labels:
50
- styles.append("background-color: green; color: white;")
51
- if "B-UNFAIR" in labels or "I-UNFAIR" in labels:
52
- styles.append("border: 2px dashed red;")
53
-
54
- style_string = " ".join(styles) if styles else ""
55
- formatted_output += f"<span style='{style_string} padding: 3px; margin: 2px;'>{token}</span> "
56
- formatted_output += "</div>"
57
- return formatted_output
58
 
 
59
  iface = gr.Interface(
60
  fn=predict_ner_tags,
61
  inputs="text",
62
- outputs="html", # Directly use "html" here
63
- title="Named Entity Recognition with BERT",
64
- description="Enter a sentence to predict NER tags using a BERT model trained for multi-label classification. Different styles represent different entity types.",
65
  examples=["Tall men are so clumsy."],
66
  allow_flagging="never"
67
  )
 
1
+ import json
2
  import torch
3
  from transformers import BertTokenizerFast, BertForTokenClassification
4
  import gradio as gr
5
 
6
+ # init important things
7
  tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
8
  model = BertForTokenClassification.from_pretrained('maximuspowers/bias-detection-ner')
9
  model.eval()
10
  model.to('cuda' if torch.cuda.is_available() else 'cpu')
11
 
12
+ # ids to labels we want to display
13
  id2label = {
14
  0: 'O',
15
  1: 'B-STEREO',
 
20
  6: 'I-UNFAIR'
21
  }
22
 
23
+ # predict function you'll want to use if using in your own code
24
  def predict_ner_tags(sentence):
25
  inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True, max_length=128)
26
  input_ids = inputs['input_ids'].to(model.device)
 
38
  if token not in tokenizer.all_special_tokens:
39
  label_indices = (predicted_labels[0][i] == 1).nonzero(as_tuple=False).squeeze(-1)
40
  labels = [id2label[idx.item()] for idx in label_indices] if label_indices.numel() > 0 else ['O']
41
+ result.append({"token": token, "labels": labels})
42
 
43
+ return json.dumps(result, indent=4)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
+ # startup gradio
46
  iface = gr.Interface(
47
  fn=predict_ner_tags,
48
  inputs="text",
49
+ outputs="text",
50
+ title="Social Bias Named Entity Recognition (with BERT) 🕵",
51
+ description="Enter a sentence to predict biased parts of speech tags using a BERT model trained for multi-label token classification.",
52
  examples=["Tall men are so clumsy."],
53
  allow_flagging="never"
54
  )