cindyangelira commited on
Commit
a334f2a
·
verified ·
1 Parent(s): 6e70afb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -64
app.py CHANGED
@@ -1,79 +1,111 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
  from spacy import displacy
 
4
 
5
- # load model pipeline globally
6
- # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
7
- ner_pipe = pipeline(task = "ner",
8
- model="cindyangelira/ner-roberta-large-bahasa-indonesia-finetuned",
9
- aggregation_strategy = "simple",
10
- device = "cpu")
11
 
12
- # define colors for each tag
13
- def get_colors():
14
- return {
15
- "O": "#ffffff", # White for 'O'
16
- "PER": "#ffadad", # Light red for 'PERSON'
17
- "LOC": "#ffda83", # Light yellow for 'LOCATION'
18
- "DATE_TIME": "#ffa500", # Light orange for 'DOB'
19
- "EMAIL": "#85e0e0", # Light cyan for 'EMAIL'
20
- "GENDER": "#c3c3e0", # Light gray for 'GENDER'
21
- "SSN": "#800080", # Purple for 'ID'
22
- "PHONE": "#d1ff85" # Light green for 'PHONE NUMBER'
23
- }
 
 
 
 
 
 
 
 
 
 
 
24
 
 
 
25
 
26
  def process_prediction(text, pred):
 
 
 
27
  colors = get_colors()
28
- combined_ents = [] # initialize an empty list to store combined entities
29
-
30
- current_ent = None # var to track current entitiy
31
-
32
- for token in pred:
33
- token_label = token['entity_group'] #.replace('B-', '').replace('I-', '')
34
- token_start = token['start']
35
- token_end = token['end']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- if current_ent is None or current_ent['label'] != token_label:
38
- if current_ent:
39
- combined_ents.append(current_ent)
40
- current_ent = {
41
- 'start': token_start,
42
- 'end': token_end,
43
- 'label': token_label
44
- }
45
- else:
46
- current_ent['end'] = token_end
47
-
48
- if current_ent:
49
- combined_ents.append(current_ent) # add the last entity after the loop finishes
50
-
51
-
52
- doc = { # doc for viz
53
- "text": text,
54
- "ents": combined_ents,
55
- "title": None
56
- }
57
-
58
- options = {"ents": list(colors.keys()), "colors": colors}
59
- html = displacy.render(doc, style="ent", manual=True, options=options)
60
- return html
61
 
62
  def ner_visualization(text):
63
- predictions = ner_pipe(text)
64
- return process_prediction(text, predictions)
65
-
66
- def build_interface():
67
- iface = gr.Interface(
68
- fn=ner_visualization,
69
- inputs=gr.Textbox(label="Input Text"),
70
- outputs="html",
71
- title="NER Bahasa Indonesia",
72
- description="Enter text to see named entity recognition results highlighted."
73
- )
74
- return iface
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
  if __name__ == "__main__":
78
- app = build_interface()
79
- app.launch()
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from transformers import pipeline
3
  from spacy import displacy
4
+ import torch
5
 
6
+ # initialize ZeroGPU
7
+ torch.zeros(1).cpu()
 
 
 
 
8
 
9
+ # load model pipeline globally
10
+ try:
11
+ ner_pipe = pipeline(
12
+ task="ner",
13
+ model="cindyangelira/ner-roberta-large-bahasa-indonesia-finetuned",
14
+ aggregation_strategy="simple",
15
+ device=-1
16
+ )
17
+ except Exception as e:
18
+ print(f"Error loading model: {e}")
19
+ raise
20
+
21
+ # Define colors for each tag
22
+ ENTITY_COLORS = {
23
+ "O": "#ffffff", # White for 'O'
24
+ "PER": "#ffadad", # Light red for 'PERSON'
25
+ "LOC": "#ffda83", # Light yellow for 'LOCATION'
26
+ "DATE_TIME": "#ffa500", # Light orange for 'DOB'
27
+ "EMAIL": "#85e0e0", # Light cyan for 'EMAIL'
28
+ "GENDER": "#c3c3e0", # Light gray for 'GENDER'
29
+ "SSN": "#800080", # Purple for 'ID'
30
+ "PHONE": "#d1ff85" # Light green for 'PHONE NUMBER'
31
+ }
32
 
33
+ def get_colors():
34
+ return ENTITY_COLORS.copy()
35
 
36
  def process_prediction(text, pred):
37
+ if not text or not pred:
38
+ return "<p>No text or predictions to process</p>"
39
+
40
  colors = get_colors()
41
+ combined_ents = []
42
+ current_ent = None
43
+
44
+ try:
45
+ for token in pred:
46
+ token_label = token['entity_group']
47
+ token_start = token['start']
48
+ token_end = token['end']
49
+
50
+ if current_ent is None or current_ent['label'] != token_label:
51
+ if current_ent:
52
+ combined_ents.append(current_ent)
53
+ current_ent = {
54
+ 'start': token_start,
55
+ 'end': token_end,
56
+ 'label': token_label
57
+ }
58
+ else:
59
+ current_ent['end'] = token_end
60
+
61
+ if current_ent:
62
+ combined_ents.append(current_ent)
63
 
64
+ doc = {
65
+ "text": text,
66
+ "ents": combined_ents,
67
+ "title": None
68
+ }
69
+
70
+ options = {"ents": list(colors.keys()), "colors": colors}
71
+ html = displacy.render(doc, style="ent", manual=True, options=options)
72
+ return html
73
+
74
+ except Exception as e:
75
+ return f"<p>Error processing predictions: {str(e)}</p>"
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
  def ner_visualization(text):
78
+ if not text or not text.strip():
79
+ return "<p>Please enter some text</p>"
80
+
81
+ try:
82
+ predictions = ner_pipe(text)
83
+ return process_prediction(text, predictions)
84
+ except Exception as e:
85
+ return f"<p>Error during NER processing: {str(e)}</p>"
 
 
 
 
86
 
87
+ # create Gradio interface
88
+ iface = gr.Interface(
89
+ fn=ner_visualization,
90
+ inputs=gr.Textbox(
91
+ label="Input Text",
92
+ placeholder="Enter text in Bahasa Indonesia..."
93
+ ),
94
+ outputs="html",
95
+ title="NER Bahasa Indonesia",
96
+ description="Enter text to see named entity recognition results highlighted.",
97
+ examples=[
98
+ ["Joko Widodo lahir di Surakarta pada tanggal 21 Juni 1961."],
99
+ ["Email saya adalah [email protected] dan nomor HP 081234567890."]
100
+ ]
101
+ )
102
 
103
  if __name__ == "__main__":
104
+ try:
105
+ iface.launch(
106
+ server_name="0.0.0.0",
107
+ server_port=7860,
108
+ share=False
109
+ )
110
+ except Exception as e:
111
+ print(f"Error launching interface: {e}")