cindyangelira commited on
Commit
a9b9054
·
verified ·
1 Parent(s): 85e68e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -13
app.py CHANGED
@@ -3,7 +3,7 @@ from transformers import pipeline
3
  from spacy import displacy
4
 
5
  # load model pipeline globally
6
- ner_pipe = pipeline("token-classification", model="cindyangelira/ner-roberta-large-bahasa-indonesia-finetuned", aggregation_strategy = "average")
7
 
8
  # define colors for each tag
9
  def get_colors():
@@ -21,16 +21,33 @@ def get_colors():
21
 
22
  def process_prediction(text, pred):
23
  colors = get_colors()
 
24
 
25
- # Correctly refer to 'label' instead of 'entity'
26
- # for token in pred:
27
- # token['label'] = token['label'].replace('B-', '').replace('I-', '')
28
 
29
- ents = [{'start': token['start'], 'end': token['end'], 'label': token['label']} for token in pred]
 
 
 
30
 
31
- doc = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  "text": text,
33
- "ents": ents,
34
  "title": None
35
  }
36
 
@@ -38,18 +55,16 @@ def process_prediction(text, pred):
38
  html = displacy.render(doc, style="ent", manual=True, options=options)
39
  return html
40
 
41
-
42
  def ner_visualization(text):
43
  predictions = ner_pipe(text)
44
  return process_prediction(text, predictions)
45
 
46
-
47
  def build_interface():
48
  iface = gr.Interface(
49
- fn=ner_visualization, # Main function for NER visualization
50
- inputs=gr.Textbox(label="Input Text"),# Input textbox
51
- outputs="html", # Output is HTML with rendered NER
52
- title="NER Bahasa Indonesia", # Title of the app
53
  description="Enter text to see named entity recognition results highlighted."
54
  )
55
  return iface
 
3
  from spacy import displacy
4
 
5
  # load model pipeline globally
6
+ ner_pipe = pipeline("token-classification", model="cindyangelira/ner-roberta-large-bahasa-indonesia-finetuned", aggregation_strategy = "simple")
7
 
8
  # define colors for each tag
9
  def get_colors():
 
21
 
22
  def process_prediction(text, pred):
23
  colors = get_colors()
24
+ combined_ents = [] # initialize an empty list to store combined entities
25
 
26
+ current_ent = None # var to track current entitiy
 
 
27
 
28
+ for token in pred:
29
+ token_label = token['entity_group'] #.replace('B-', '').replace('I-', '')
30
+ token_start = token['start']
31
+ token_end = token['end']
32
 
33
+ if current_ent is None or current_ent['label'] != token_label:
34
+ if current_ent:
35
+ combined_ents.append(current_ent)
36
+ current_ent = {
37
+ 'start': token_start,
38
+ 'end': token_end,
39
+ 'label': token_label
40
+ }
41
+ else:
42
+ current_ent['end'] = token_end
43
+
44
+ if current_ent:
45
+ combined_ents.append(current_ent) # add the last entity after the loop finishes
46
+
47
+
48
+ doc = { # doc for viz
49
  "text": text,
50
+ "ents": combined_ents,
51
  "title": None
52
  }
53
 
 
55
  html = displacy.render(doc, style="ent", manual=True, options=options)
56
  return html
57
 
 
58
  def ner_visualization(text):
59
  predictions = ner_pipe(text)
60
  return process_prediction(text, predictions)
61
 
 
62
  def build_interface():
63
  iface = gr.Interface(
64
+ fn=ner_visualization,
65
+ inputs=gr.Textbox(label="Input Text"),
66
+ outputs="html",
67
+ title="NER Bahasa Indonesia",
68
  description="Enter text to see named entity recognition results highlighted."
69
  )
70
  return iface