Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,79 +1,111 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline
|
3 |
from spacy import displacy
|
|
|
4 |
|
5 |
-
#
|
6 |
-
|
7 |
-
ner_pipe = pipeline(task = "ner",
|
8 |
-
model="cindyangelira/ner-roberta-large-bahasa-indonesia-finetuned",
|
9 |
-
aggregation_strategy = "simple",
|
10 |
-
device = "cpu")
|
11 |
|
12 |
-
#
|
13 |
-
|
14 |
-
|
15 |
-
"
|
16 |
-
"
|
17 |
-
"
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
|
|
|
|
25 |
|
26 |
def process_prediction(text, pred):
|
|
|
|
|
|
|
27 |
colors = get_colors()
|
28 |
-
combined_ents = []
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
combined_ents.append(current_ent) # add the last entity after the loop finishes
|
50 |
-
|
51 |
-
|
52 |
-
doc = { # doc for viz
|
53 |
-
"text": text,
|
54 |
-
"ents": combined_ents,
|
55 |
-
"title": None
|
56 |
-
}
|
57 |
-
|
58 |
-
options = {"ents": list(colors.keys()), "colors": colors}
|
59 |
-
html = displacy.render(doc, style="ent", manual=True, options=options)
|
60 |
-
return html
|
61 |
|
62 |
def ner_visualization(text):
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
title="NER Bahasa Indonesia",
|
72 |
-
description="Enter text to see named entity recognition results highlighted."
|
73 |
-
)
|
74 |
-
return iface
|
75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
if __name__ == "__main__":
|
78 |
-
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline
|
3 |
from spacy import displacy
|
4 |
+
import torch
|
5 |
|
6 |
+
# initialize ZeroGPU
|
7 |
+
torch.zeros(1).cpu()
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
# load model pipeline globally
|
10 |
+
try:
|
11 |
+
ner_pipe = pipeline(
|
12 |
+
task="ner",
|
13 |
+
model="cindyangelira/ner-roberta-large-bahasa-indonesia-finetuned",
|
14 |
+
aggregation_strategy="simple",
|
15 |
+
device=-1
|
16 |
+
)
|
17 |
+
except Exception as e:
|
18 |
+
print(f"Error loading model: {e}")
|
19 |
+
raise
|
20 |
+
|
21 |
+
# Define colors for each tag
|
22 |
+
ENTITY_COLORS = {
|
23 |
+
"O": "#ffffff", # White for 'O'
|
24 |
+
"PER": "#ffadad", # Light red for 'PERSON'
|
25 |
+
"LOC": "#ffda83", # Light yellow for 'LOCATION'
|
26 |
+
"DATE_TIME": "#ffa500", # Light orange for 'DOB'
|
27 |
+
"EMAIL": "#85e0e0", # Light cyan for 'EMAIL'
|
28 |
+
"GENDER": "#c3c3e0", # Light gray for 'GENDER'
|
29 |
+
"SSN": "#800080", # Purple for 'ID'
|
30 |
+
"PHONE": "#d1ff85" # Light green for 'PHONE NUMBER'
|
31 |
+
}
|
32 |
|
33 |
+
def get_colors():
|
34 |
+
return ENTITY_COLORS.copy()
|
35 |
|
36 |
def process_prediction(text, pred):
|
37 |
+
if not text or not pred:
|
38 |
+
return "<p>No text or predictions to process</p>"
|
39 |
+
|
40 |
colors = get_colors()
|
41 |
+
combined_ents = []
|
42 |
+
current_ent = None
|
43 |
+
|
44 |
+
try:
|
45 |
+
for token in pred:
|
46 |
+
token_label = token['entity_group']
|
47 |
+
token_start = token['start']
|
48 |
+
token_end = token['end']
|
49 |
+
|
50 |
+
if current_ent is None or current_ent['label'] != token_label:
|
51 |
+
if current_ent:
|
52 |
+
combined_ents.append(current_ent)
|
53 |
+
current_ent = {
|
54 |
+
'start': token_start,
|
55 |
+
'end': token_end,
|
56 |
+
'label': token_label
|
57 |
+
}
|
58 |
+
else:
|
59 |
+
current_ent['end'] = token_end
|
60 |
+
|
61 |
+
if current_ent:
|
62 |
+
combined_ents.append(current_ent)
|
63 |
|
64 |
+
doc = {
|
65 |
+
"text": text,
|
66 |
+
"ents": combined_ents,
|
67 |
+
"title": None
|
68 |
+
}
|
69 |
+
|
70 |
+
options = {"ents": list(colors.keys()), "colors": colors}
|
71 |
+
html = displacy.render(doc, style="ent", manual=True, options=options)
|
72 |
+
return html
|
73 |
+
|
74 |
+
except Exception as e:
|
75 |
+
return f"<p>Error processing predictions: {str(e)}</p>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
def ner_visualization(text):
|
78 |
+
if not text or not text.strip():
|
79 |
+
return "<p>Please enter some text</p>"
|
80 |
+
|
81 |
+
try:
|
82 |
+
predictions = ner_pipe(text)
|
83 |
+
return process_prediction(text, predictions)
|
84 |
+
except Exception as e:
|
85 |
+
return f"<p>Error during NER processing: {str(e)}</p>"
|
|
|
|
|
|
|
|
|
86 |
|
87 |
+
# create Gradio interface
|
88 |
+
iface = gr.Interface(
|
89 |
+
fn=ner_visualization,
|
90 |
+
inputs=gr.Textbox(
|
91 |
+
label="Input Text",
|
92 |
+
placeholder="Enter text in Bahasa Indonesia..."
|
93 |
+
),
|
94 |
+
outputs="html",
|
95 |
+
title="NER Bahasa Indonesia",
|
96 |
+
description="Enter text to see named entity recognition results highlighted.",
|
97 |
+
examples=[
|
98 |
+
["Joko Widodo lahir di Surakarta pada tanggal 21 Juni 1961."],
|
99 |
+
["Email saya adalah [email protected] dan nomor HP 081234567890."]
|
100 |
+
]
|
101 |
+
)
|
102 |
|
103 |
if __name__ == "__main__":
|
104 |
+
try:
|
105 |
+
iface.launch(
|
106 |
+
server_name="0.0.0.0",
|
107 |
+
server_port=7860,
|
108 |
+
share=False
|
109 |
+
)
|
110 |
+
except Exception as e:
|
111 |
+
print(f"Error launching interface: {e}")
|