Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,7 @@ from transformers import pipeline
|
|
3 |
from spacy import displacy
|
4 |
|
5 |
# load model pipeline globally
|
6 |
-
ner_pipe = pipeline("token-classification", model="cindyangelira/ner-roberta-large-bahasa-indonesia-finetuned", aggregation_strategy = "
|
7 |
|
8 |
# define colors for each tag
|
9 |
def get_colors():
|
@@ -21,16 +21,33 @@ def get_colors():
|
|
21 |
|
22 |
def process_prediction(text, pred):
|
23 |
colors = get_colors()
|
|
|
24 |
|
25 |
-
#
|
26 |
-
# for token in pred:
|
27 |
-
# token['label'] = token['label'].replace('B-', '').replace('I-', '')
|
28 |
|
29 |
-
|
|
|
|
|
|
|
30 |
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
"text": text,
|
33 |
-
"ents":
|
34 |
"title": None
|
35 |
}
|
36 |
|
@@ -38,18 +55,16 @@ def process_prediction(text, pred):
|
|
38 |
html = displacy.render(doc, style="ent", manual=True, options=options)
|
39 |
return html
|
40 |
|
41 |
-
|
42 |
def ner_visualization(text):
|
43 |
predictions = ner_pipe(text)
|
44 |
return process_prediction(text, predictions)
|
45 |
|
46 |
-
|
47 |
def build_interface():
|
48 |
iface = gr.Interface(
|
49 |
-
fn=ner_visualization,
|
50 |
-
inputs=gr.Textbox(label="Input Text")
|
51 |
-
outputs="html",
|
52 |
-
title="NER Bahasa Indonesia",
|
53 |
description="Enter text to see named entity recognition results highlighted."
|
54 |
)
|
55 |
return iface
|
|
|
3 |
from spacy import displacy
|
4 |
|
5 |
# load model pipeline globally
|
6 |
+
ner_pipe = pipeline("token-classification", model="cindyangelira/ner-roberta-large-bahasa-indonesia-finetuned", aggregation_strategy = "simple")
|
7 |
|
8 |
# define colors for each tag
|
9 |
def get_colors():
|
|
|
21 |
|
22 |
def process_prediction(text, pred):
|
23 |
colors = get_colors()
|
24 |
+
combined_ents = [] # initialize an empty list to store combined entities
|
25 |
|
26 |
+
current_ent = None # var to track current entitiy
|
|
|
|
|
27 |
|
28 |
+
for token in pred:
|
29 |
+
token_label = token['entity_group'] #.replace('B-', '').replace('I-', '')
|
30 |
+
token_start = token['start']
|
31 |
+
token_end = token['end']
|
32 |
|
33 |
+
if current_ent is None or current_ent['label'] != token_label:
|
34 |
+
if current_ent:
|
35 |
+
combined_ents.append(current_ent)
|
36 |
+
current_ent = {
|
37 |
+
'start': token_start,
|
38 |
+
'end': token_end,
|
39 |
+
'label': token_label
|
40 |
+
}
|
41 |
+
else:
|
42 |
+
current_ent['end'] = token_end
|
43 |
+
|
44 |
+
if current_ent:
|
45 |
+
combined_ents.append(current_ent) # add the last entity after the loop finishes
|
46 |
+
|
47 |
+
|
48 |
+
doc = { # doc for viz
|
49 |
"text": text,
|
50 |
+
"ents": combined_ents,
|
51 |
"title": None
|
52 |
}
|
53 |
|
|
|
55 |
html = displacy.render(doc, style="ent", manual=True, options=options)
|
56 |
return html
|
57 |
|
|
|
58 |
def ner_visualization(text):
|
59 |
predictions = ner_pipe(text)
|
60 |
return process_prediction(text, predictions)
|
61 |
|
|
|
62 |
def build_interface():
|
63 |
iface = gr.Interface(
|
64 |
+
fn=ner_visualization,
|
65 |
+
inputs=gr.Textbox(label="Input Text"),
|
66 |
+
outputs="html",
|
67 |
+
title="NER Bahasa Indonesia",
|
68 |
description="Enter text to see named entity recognition results highlighted."
|
69 |
)
|
70 |
return iface
|