Spaces:
Running
Running
donbr
commited on
Commit
·
36b7b70
1
Parent(s):
b495719
simplify approach
Browse files- app.py +176 -99
- app2.py +0 -279
- examples-nuner.json +0 -38
- examples-pii.json +0 -92
- examples.json +129 -3
app.py
CHANGED
@@ -1,21 +1,20 @@
|
|
1 |
-
|
2 |
import json
|
3 |
-
|
4 |
-
with open("examples.json", "r") as f:
|
5 |
-
examples = json.load(f)
|
6 |
-
|
7 |
-
from typing import Dict, Union
|
8 |
-
from gliner import GLiNER
|
9 |
import gradio as gr
|
|
|
10 |
|
11 |
-
|
|
|
|
|
12 |
|
|
|
13 |
def merge_entities(entities):
|
14 |
if not entities:
|
15 |
return []
|
16 |
merged = []
|
17 |
current = entities[0]
|
18 |
for next_entity in entities[1:]:
|
|
|
19 |
if next_entity['entity'] == current['entity'] and (next_entity['start'] == current['end'] + 1 or next_entity['start'] == current['end']):
|
20 |
current['word'] += ' ' + next_entity['word']
|
21 |
current['end'] = next_entity['end']
|
@@ -25,103 +24,181 @@ def merge_entities(entities):
|
|
25 |
merged.append(current)
|
26 |
return merged
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
)
|
31 |
-
|
32 |
-
r = {
|
33 |
-
"text": text,
|
34 |
-
"entities": [
|
35 |
-
{
|
36 |
-
"entity": entity["label"],
|
37 |
-
"word": entity["text"],
|
38 |
-
"start": entity["start"],
|
39 |
-
"end": entity["end"],
|
40 |
-
"score": 0,
|
41 |
-
}
|
42 |
-
for entity in model.predict_entities(
|
43 |
-
text, labels, flat_ner=not nested_ner, threshold=threshold
|
44 |
-
)
|
45 |
-
],
|
46 |
-
}
|
47 |
-
# r["entities"] = merge_entities(r["entities"])
|
48 |
-
return r
|
49 |
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
-
|
60 |
-
|
61 |
-
* Paper: https://arxiv.org/abs/2311.08526
|
62 |
-
* Repository: https://github.com/urchade/GLiNER
|
63 |
-
"""
|
64 |
-
)
|
65 |
|
66 |
-
|
67 |
-
|
68 |
-
)
|
69 |
-
with gr.
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
label="Threshold",
|
82 |
-
info="Lower the threshold to increase how many entities get predicted.",
|
83 |
-
scale=1,
|
84 |
)
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
)
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
inputs=[input_text, labels, threshold, nested_ner],
|
105 |
-
outputs=output,
|
106 |
-
cache_examples=True,
|
107 |
-
)
|
108 |
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
-
|
127 |
-
demo.
|
|
|
|
1 |
+
import os
|
2 |
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
import gradio as gr
|
4 |
+
from gliner import GLiNER
|
5 |
|
6 |
+
# Load the common examples from the JSON file
|
7 |
+
with open("examples.json", "r", encoding="utf-8") as f:
|
8 |
+
common_examples = json.load(f)
|
9 |
|
10 |
+
# Utility function to merge adjacent entities (used in NuNER Zero)
|
11 |
def merge_entities(entities):
|
12 |
if not entities:
|
13 |
return []
|
14 |
merged = []
|
15 |
current = entities[0]
|
16 |
for next_entity in entities[1:]:
|
17 |
+
# Merge if same label and adjacent
|
18 |
if next_entity['entity'] == current['entity'] and (next_entity['start'] == current['end'] + 1 or next_entity['start'] == current['end']):
|
19 |
current['word'] += ' ' + next_entity['word']
|
20 |
current['end'] = next_entity['end']
|
|
|
24 |
merged.append(current)
|
25 |
return merged
|
26 |
|
27 |
+
# Load the three models
|
28 |
+
model_nuner = GLiNER.from_pretrained("numind/NuZero_token")
|
29 |
+
model_pii = GLiNER.from_pretrained("urchade/gliner_multi_pii-v1")
|
30 |
+
model_med = GLiNER.from_pretrained("urchade/gliner_medium-v2.1")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
+
# Define NER functions for each model
|
33 |
+
def ner_nuner(text, labels, threshold, nested_ner):
|
34 |
+
label_list = [lbl.strip() for lbl in labels.split(",")]
|
35 |
+
pred_entities = model_nuner.predict_entities(text, label_list, flat_ner=not nested_ner, threshold=threshold)
|
36 |
+
entities = [
|
37 |
+
{"entity": entity["label"], "word": entity["text"], "start": entity["start"], "end": entity["end"], "score": 0}
|
38 |
+
for entity in pred_entities
|
39 |
+
]
|
40 |
+
merged_entities = merge_entities(entities)
|
41 |
+
return {"text": text, "entities": merged_entities}
|
42 |
|
43 |
+
def ner_pii(text, labels, threshold, nested_ner):
|
44 |
+
label_list = [lbl.strip() for lbl in labels.split(",")]
|
45 |
+
pred_entities = model_pii.predict_entities(text, label_list, flat_ner=not nested_ner, threshold=threshold)
|
46 |
+
entities = [
|
47 |
+
{"entity": entity["label"], "word": entity["text"], "start": entity["start"], "end": entity["end"], "score": 0}
|
48 |
+
for entity in pred_entities
|
49 |
+
]
|
50 |
+
return {"text": text, "entities": entities}
|
51 |
|
52 |
+
def ner_med(text, labels, threshold, nested_ner):
|
53 |
+
label_list = [lbl.strip() for lbl in labels.split(",")]
|
54 |
+
pred_entities = model_med.predict_entities(text, label_list, flat_ner=not nested_ner, threshold=threshold)
|
55 |
+
entities = [
|
56 |
+
{"entity": entity["label"], "word": entity["text"], "start": entity["start"], "end": entity["end"], "score": 0}
|
57 |
+
for entity in pred_entities
|
58 |
+
]
|
59 |
+
return {"text": text, "entities": entities}
|
60 |
|
61 |
+
# Use the first example from the common examples for default values
|
62 |
+
default_text, default_labels, default_threshold, default_nested = common_examples[0]
|
|
|
|
|
|
|
|
|
63 |
|
64 |
+
# Build the combined Gradio app with three tabs
|
65 |
+
with gr.Blocks(title="GLiNER NER Testbed") as demo:
|
66 |
+
gr.Markdown("# GLiNER NER Testbed")
|
67 |
+
with gr.Accordion("Detailed information on the approach", open=True):
|
68 |
+
gr.Markdown(
|
69 |
+
"""
|
70 |
+
**GLiNER** is a state-of-the-art Named Entity Recognition (NER) system that leverages a BERT-like bidirectional transformer encoder to identify a wide range of entity types in text. Unlike conventional NER models that are restricted to fixed entity categories, GLiNER supports flexible, zero-shot extraction, making it ideal for diverse real-world applications. It also provides a resource-efficient alternative to large language models (LLMs) for scenarios where cost and speed are critical. Distributed under the Apache 2.0 license, GLiNER is commercially friendly and readily deployable.
|
71 |
+
|
72 |
+
**Useful Links**
|
73 |
+
|
74 |
+
- **Model:** [gliner_medium-v2.1](https://huggingface.co/urchade/gliner_medium-v2.1)
|
75 |
+
- **All GLiNER Models:** [Hugging Face GLiNER Models](https://huggingface.co/models?library=gliner)
|
76 |
+
- **Research Paper:** [arXiv:2311.08526](https://arxiv.org/abs/2311.08526)
|
77 |
+
- **Repository:** [GitHub - GLiNER](https://github.com/urchade/GLiNER)
|
78 |
+
"""
|
|
|
|
|
|
|
79 |
)
|
80 |
+
|
81 |
+
with gr.Tabs():
|
82 |
+
# Tab for GLiNER-medium
|
83 |
+
with gr.Tab("GLiNER-medium"):
|
84 |
+
gr.Markdown("## GLiNER-medium-v2.1")
|
85 |
+
with gr.Accordion("How to run this model locally", open=False):
|
86 |
+
gr.Markdown(
|
87 |
+
"""
|
88 |
+
**Installation:**
|
89 |
+
```
|
90 |
+
!pip install gliner
|
91 |
+
```
|
92 |
+
**Usage:**
|
93 |
+
Load the model with `GLiNER.from_pretrained("urchade/gliner_medium-v2.1")`
|
94 |
+
and call `predict_entities` to perform zero-shot NER.
|
95 |
+
"""
|
96 |
+
)
|
97 |
+
gr.Code(
|
98 |
+
'''from gliner import GLiNER
|
99 |
+
model = GLiNER.from_pretrained("urchade/gliner_medium-v2.1")''',
|
100 |
+
language="python",
|
101 |
+
)
|
102 |
+
input_text_med = gr.Textbox(value=default_text, label="Text input", placeholder="Enter your text here")
|
103 |
+
with gr.Row():
|
104 |
+
labels_med = gr.Textbox(value=default_labels, label="Labels", placeholder="Enter labels (comma separated)", scale=2)
|
105 |
+
threshold_med = gr.Slider(0, 1, value=default_threshold, step=0.01, label="Threshold", info="Lower threshold to increase predictions", scale=1)
|
106 |
+
nested_ner_med = gr.Checkbox(value=default_nested, label="Nested NER", info="Allow for nested NER?", scale=0)
|
107 |
+
output_med = gr.HighlightedText(label="Predicted Entities")
|
108 |
+
submit_btn_med = gr.Button("Submit")
|
109 |
+
gr.Examples(
|
110 |
+
common_examples,
|
111 |
+
fn=ner_med,
|
112 |
+
inputs=[input_text_med, labels_med, threshold_med, nested_ner_med],
|
113 |
+
outputs=output_med,
|
114 |
+
cache_examples=False,
|
115 |
)
|
116 |
+
input_text_med.submit(ner_med, inputs=[input_text_med, labels_med, threshold_med, nested_ner_med], outputs=output_med)
|
117 |
+
labels_med.submit(ner_med, inputs=[input_text_med, labels_med, threshold_med, nested_ner_med], outputs=output_med)
|
118 |
+
threshold_med.release(ner_med, inputs=[input_text_med, labels_med, threshold_med, nested_ner_med], outputs=output_med)
|
119 |
+
submit_btn_med.click(ner_med, inputs=[input_text_med, labels_med, threshold_med, nested_ner_med], outputs=output_med)
|
120 |
+
nested_ner_med.change(ner_med, inputs=[input_text_med, labels_med, threshold_med, nested_ner_med], outputs=output_med)
|
|
|
|
|
|
|
|
|
121 |
|
122 |
+
# Tab for GLiNER-PII
|
123 |
+
with gr.Tab("GLiNER-PII"):
|
124 |
+
gr.Markdown("## GLiNER-PII")
|
125 |
+
with gr.Accordion("How to run this model locally", open=False):
|
126 |
+
gr.Markdown(
|
127 |
+
"""
|
128 |
+
**Installation:**
|
129 |
+
```
|
130 |
+
!pip install gliner
|
131 |
+
```
|
132 |
+
**Usage:**
|
133 |
+
Load the model with `GLiNER.from_pretrained("urchade/gliner_multi_pii-v1")`
|
134 |
+
and call `predict_entities` to extract PII.
|
135 |
+
"""
|
136 |
+
)
|
137 |
+
gr.Code(
|
138 |
+
'''from gliner import GLiNER
|
139 |
+
model = GLiNER.from_pretrained("urchade/gliner_multi_pii-v1")''',
|
140 |
+
language="python",
|
141 |
+
)
|
142 |
+
input_text_pii = gr.Textbox(value=default_text, label="Text input", placeholder="Enter your text here")
|
143 |
+
with gr.Row():
|
144 |
+
labels_pii = gr.Textbox(value=default_labels, label="Labels", placeholder="Enter labels (comma separated)", scale=2)
|
145 |
+
threshold_pii = gr.Slider(0, 1, value=default_threshold, step=0.01, label="Threshold", info="Lower threshold to increase predictions", scale=1)
|
146 |
+
nested_ner_pii = gr.Checkbox(value=default_nested, label="Nested NER", info="Allow for nested NER?", scale=0)
|
147 |
+
output_pii = gr.HighlightedText(label="Predicted Entities")
|
148 |
+
submit_btn_pii = gr.Button("Submit")
|
149 |
+
gr.Examples(
|
150 |
+
common_examples,
|
151 |
+
fn=ner_pii,
|
152 |
+
inputs=[input_text_pii, labels_pii, threshold_pii, nested_ner_pii],
|
153 |
+
outputs=output_pii,
|
154 |
+
cache_examples=False,
|
155 |
+
)
|
156 |
+
input_text_pii.submit(ner_pii, inputs=[input_text_pii, labels_pii, threshold_pii, nested_ner_pii], outputs=output_pii)
|
157 |
+
labels_pii.submit(ner_pii, inputs=[input_text_pii, labels_pii, threshold_pii, nested_ner_pii], outputs=output_pii)
|
158 |
+
threshold_pii.release(ner_pii, inputs=[input_text_pii, labels_pii, threshold_pii, nested_ner_pii], outputs=output_pii)
|
159 |
+
submit_btn_pii.click(ner_pii, inputs=[input_text_pii, labels_pii, threshold_pii, nested_ner_pii], outputs=output_pii)
|
160 |
+
nested_ner_pii.change(ner_pii, inputs=[input_text_pii, labels_pii, threshold_pii, nested_ner_pii], outputs=output_pii)
|
161 |
+
|
162 |
+
# Tab for NuNER Zero
|
163 |
+
with gr.Tab("NuNER Zero"):
|
164 |
+
gr.Markdown("## NuNER Zero")
|
165 |
+
with gr.Accordion("How to run this model locally", open=False):
|
166 |
+
gr.Markdown(
|
167 |
+
"""
|
168 |
+
**Installation:**
|
169 |
+
```
|
170 |
+
!pip install gliner
|
171 |
+
```
|
172 |
+
**Usage:**
|
173 |
+
Load the model with `GLiNER.from_pretrained("numind/NuZero_token")`
|
174 |
+
and call `predict_entities` to perform zero-shot NER.
|
175 |
+
"""
|
176 |
+
)
|
177 |
+
gr.Code(
|
178 |
+
'''from gliner import GLiNER
|
179 |
+
model = GLiNER.from_pretrained("numind/NuZero_token")''',
|
180 |
+
language="python",
|
181 |
+
)
|
182 |
+
input_text_nuner = gr.Textbox(value=default_text, label="Text input", placeholder="Enter your text here")
|
183 |
+
with gr.Row():
|
184 |
+
labels_nuner = gr.Textbox(value=default_labels, label="Labels", placeholder="Enter labels (comma separated)", scale=2)
|
185 |
+
threshold_nuner = gr.Slider(0, 1, value=default_threshold, step=0.01, label="Threshold", info="Lower threshold to increase predictions", scale=1)
|
186 |
+
nested_ner_nuner = gr.Checkbox(value=default_nested, label="Nested NER", info="Allow for nested NER?", scale=0)
|
187 |
+
output_nuner = gr.HighlightedText(label="Predicted Entities")
|
188 |
+
submit_btn_nuner = gr.Button("Submit")
|
189 |
+
gr.Examples(
|
190 |
+
common_examples,
|
191 |
+
fn=ner_nuner,
|
192 |
+
inputs=[input_text_nuner, labels_nuner, threshold_nuner, nested_ner_nuner],
|
193 |
+
outputs=output_nuner,
|
194 |
+
cache_examples=False,
|
195 |
+
)
|
196 |
+
input_text_nuner.submit(ner_nuner, inputs=[input_text_nuner, labels_nuner, threshold_nuner, nested_ner_nuner], outputs=output_nuner)
|
197 |
+
labels_nuner.submit(ner_nuner, inputs=[input_text_nuner, labels_nuner, threshold_nuner, nested_ner_nuner], outputs=output_nuner)
|
198 |
+
threshold_nuner.release(ner_nuner, inputs=[input_text_nuner, labels_nuner, threshold_nuner, nested_ner_nuner], outputs=output_nuner)
|
199 |
+
submit_btn_nuner.click(ner_nuner, inputs=[input_text_nuner, labels_nuner, threshold_nuner, nested_ner_nuner], outputs=output_nuner)
|
200 |
+
nested_ner_nuner.change(ner_nuner, inputs=[input_text_nuner, labels_nuner, threshold_nuner, nested_ner_nuner], outputs=output_nuner)
|
201 |
|
202 |
+
# Enable queuing and launch the app
|
203 |
+
demo.queue()
|
204 |
+
demo.launch(debug=True)
|
app2.py
DELETED
@@ -1,279 +0,0 @@
|
|
1 |
-
import json
|
2 |
-
from typing import Dict, Union, List
|
3 |
-
from gliner import GLiNER
|
4 |
-
import gradio as gr
|
5 |
-
import os
|
6 |
-
|
7 |
-
# Load available models
|
8 |
-
MODELS = {
|
9 |
-
"GLiNER Medium v2.1": "urchade/gliner_medium-v2.1",
|
10 |
-
"NuNER Zero": "numind/NuZero_token",
|
11 |
-
"GLiNER Multi PII": "urchade/gliner_multi_pii-v1"
|
12 |
-
}
|
13 |
-
|
14 |
-
# Example datasets with descriptions
|
15 |
-
EXAMPLE_SETS = {
|
16 |
-
"General NER": "examples.json",
|
17 |
-
"NuNER Zero": "examples-nuner.json",
|
18 |
-
"PII Detection": "examples-pii.json"
|
19 |
-
}
|
20 |
-
|
21 |
-
# Initialize models (will be loaded on demand)
|
22 |
-
loaded_models = {}
|
23 |
-
|
24 |
-
# Current examples
|
25 |
-
current_examples = []
|
26 |
-
|
27 |
-
def load_example_set(example_set_name):
|
28 |
-
"""Load a set of examples from the specified file"""
|
29 |
-
try:
|
30 |
-
file_path = EXAMPLE_SETS[example_set_name]
|
31 |
-
with open(file_path, "r", encoding="utf-8") as f:
|
32 |
-
examples = json.load(f)
|
33 |
-
return examples
|
34 |
-
except (KeyError, FileNotFoundError, json.JSONDecodeError) as e:
|
35 |
-
print(f"Error loading example set {example_set_name}: {e}")
|
36 |
-
return []
|
37 |
-
|
38 |
-
# Load default example set
|
39 |
-
current_examples = load_example_set("General NER")
|
40 |
-
|
41 |
-
def get_model(model_name):
|
42 |
-
"""Load model if not already loaded"""
|
43 |
-
if model_name not in loaded_models:
|
44 |
-
model_path = MODELS[model_name]
|
45 |
-
loaded_models[model_name] = GLiNER.from_pretrained(model_path)
|
46 |
-
return loaded_models[model_name]
|
47 |
-
|
48 |
-
def merge_entities(entities):
|
49 |
-
"""Merge adjacent entities of the same type"""
|
50 |
-
if not entities:
|
51 |
-
return []
|
52 |
-
merged = []
|
53 |
-
current = entities[0]
|
54 |
-
for next_entity in entities[1:]:
|
55 |
-
if (next_entity['entity'] == current['entity'] and
|
56 |
-
(next_entity['start'] == current['end'] + 1 or next_entity['start'] == current['end'])):
|
57 |
-
current['word'] += ' ' + next_entity['word']
|
58 |
-
current['end'] = next_entity['end']
|
59 |
-
else:
|
60 |
-
merged.append(current)
|
61 |
-
current = next_entity
|
62 |
-
merged.append(current)
|
63 |
-
return merged
|
64 |
-
|
65 |
-
def ner(
|
66 |
-
text: str,
|
67 |
-
labels: str,
|
68 |
-
model_name: str,
|
69 |
-
threshold: float,
|
70 |
-
nested_ner: bool,
|
71 |
-
merge_entities_toggle: bool
|
72 |
-
) -> Dict[str, Union[str, List]]:
|
73 |
-
"""Run named entity recognition with selected model and parameters"""
|
74 |
-
|
75 |
-
# Get the selected model
|
76 |
-
model = get_model(model_name)
|
77 |
-
|
78 |
-
# Split labels
|
79 |
-
label_list = [label.strip() for label in labels.split(",")]
|
80 |
-
|
81 |
-
# Predict entities
|
82 |
-
entities = [
|
83 |
-
{
|
84 |
-
"entity": entity["label"],
|
85 |
-
"word": entity["text"],
|
86 |
-
"start": entity["start"],
|
87 |
-
"end": entity["end"],
|
88 |
-
"score": entity.get("score", 0),
|
89 |
-
}
|
90 |
-
for entity in model.predict_entities(
|
91 |
-
text, label_list, flat_ner=not nested_ner, threshold=threshold
|
92 |
-
)
|
93 |
-
]
|
94 |
-
|
95 |
-
# Merge entities if enabled
|
96 |
-
if merge_entities_toggle:
|
97 |
-
entities = merge_entities(entities)
|
98 |
-
|
99 |
-
# Return results
|
100 |
-
return {
|
101 |
-
"text": text,
|
102 |
-
"entities": entities,
|
103 |
-
}
|
104 |
-
|
105 |
-
def load_example(example_idx):
|
106 |
-
"""Load a specific example by index from the current example set"""
|
107 |
-
if not current_examples or example_idx >= len(current_examples):
|
108 |
-
return "", "", 0.3, False, False
|
109 |
-
|
110 |
-
example = current_examples[example_idx]
|
111 |
-
return example[0], example[1], example[2], example[3], False
|
112 |
-
|
113 |
-
def switch_example_set(example_set_name):
|
114 |
-
"""Switch to a different example set and update the interface"""
|
115 |
-
global current_examples
|
116 |
-
current_examples = load_example_set(example_set_name)
|
117 |
-
|
118 |
-
# Return the first example from the new set
|
119 |
-
if current_examples:
|
120 |
-
example = current_examples[0]
|
121 |
-
# Return example text, labels, threshold, nested_ner, merge status, example names for dropdown
|
122 |
-
example_names = [f"Example {i+1}" for i in range(len(current_examples))]
|
123 |
-
return example[0], example[1], example[2], example[3], False, gr.Dropdown.update(choices=example_names, value="Example 1")
|
124 |
-
else:
|
125 |
-
return "", "", 0.3, False, False, gr.Dropdown.update(choices=[], value=None)
|
126 |
-
|
127 |
-
with gr.Blocks(title="Unified NER Interface") as demo:
|
128 |
-
gr.Markdown(
|
129 |
-
"""
|
130 |
-
# Unified Zero-shot Named Entity Recognition Interface
|
131 |
-
|
132 |
-
This interface allows you to compare different zero-shot Named Entity Recognition models.
|
133 |
-
|
134 |
-
## Models Available:
|
135 |
-
- **GLiNER Medium v2.1**: The original GLiNER medium model
|
136 |
-
- **NuNER Zero**: A specialized token-based NER model
|
137 |
-
- **GLiNER Multi PII**: Fine-tuned for detecting personally identifiable information across multiple languages
|
138 |
-
|
139 |
-
## Features:
|
140 |
-
- Select different models
|
141 |
-
- Switch between example sets for different use cases
|
142 |
-
- Toggle nested entity recognition
|
143 |
-
- Toggle entity merging (combining adjacent entities of the same type)
|
144 |
-
- Select from various examples within each set
|
145 |
-
"""
|
146 |
-
)
|
147 |
-
|
148 |
-
with gr.Row():
|
149 |
-
model_dropdown = gr.Dropdown(
|
150 |
-
choices=list(MODELS.keys()),
|
151 |
-
value=list(MODELS.keys())[0],
|
152 |
-
label="Model",
|
153 |
-
info="Select the NER model to use"
|
154 |
-
)
|
155 |
-
example_set_dropdown = gr.Dropdown(
|
156 |
-
choices=list(EXAMPLE_SETS.keys()),
|
157 |
-
value="General NER",
|
158 |
-
label="Example Set",
|
159 |
-
info="Select a set of example texts"
|
160 |
-
)
|
161 |
-
|
162 |
-
with gr.Row():
|
163 |
-
example_dropdown = gr.Dropdown(
|
164 |
-
choices=[f"Example {i+1}" for i in range(len(current_examples))],
|
165 |
-
value="Example 1",
|
166 |
-
label="Example",
|
167 |
-
info="Select a specific example text"
|
168 |
-
)
|
169 |
-
|
170 |
-
input_text = gr.Textbox(
|
171 |
-
value=current_examples[0][0] if current_examples else "",
|
172 |
-
label="Text input",
|
173 |
-
placeholder="Enter your text here",
|
174 |
-
lines=5
|
175 |
-
)
|
176 |
-
|
177 |
-
with gr.Row():
|
178 |
-
labels = gr.Textbox(
|
179 |
-
value=current_examples[0][1] if current_examples else "",
|
180 |
-
label="Entity Labels",
|
181 |
-
placeholder="Enter your labels here (comma separated)",
|
182 |
-
scale=2,
|
183 |
-
)
|
184 |
-
threshold = gr.Slider(
|
185 |
-
0,
|
186 |
-
1,
|
187 |
-
value=current_examples[0][2] if current_examples else 0.3,
|
188 |
-
step=0.01,
|
189 |
-
label="Confidence Threshold",
|
190 |
-
info="Lower the threshold to increase how many entities get predicted.",
|
191 |
-
scale=1,
|
192 |
-
)
|
193 |
-
|
194 |
-
with gr.Row():
|
195 |
-
nested_ner = gr.Checkbox(
|
196 |
-
value=current_examples[0][3] if current_examples else False,
|
197 |
-
label="Nested NER",
|
198 |
-
info="Allow entities to be contained within other entities",
|
199 |
-
)
|
200 |
-
merge_entities_toggle = gr.Checkbox(
|
201 |
-
value=False,
|
202 |
-
label="Merge Adjacent Entities",
|
203 |
-
info="Combine adjacent entities of the same type into a single entity",
|
204 |
-
)
|
205 |
-
|
206 |
-
output = gr.HighlightedText(label="Predicted Entities")
|
207 |
-
submit_btn = gr.Button("Submit")
|
208 |
-
|
209 |
-
# Handling example set selection
|
210 |
-
example_set_dropdown.change(
|
211 |
-
fn=switch_example_set,
|
212 |
-
inputs=[example_set_dropdown],
|
213 |
-
outputs=[input_text, labels, threshold, nested_ner, merge_entities_toggle, example_dropdown]
|
214 |
-
)
|
215 |
-
|
216 |
-
# Handling example selection within a set
|
217 |
-
example_dropdown.change(
|
218 |
-
fn=lambda idx: load_example(int(idx.split()[1]) - 1),
|
219 |
-
inputs=[example_dropdown],
|
220 |
-
outputs=[input_text, labels, threshold, nested_ner, merge_entities_toggle]
|
221 |
-
)
|
222 |
-
|
223 |
-
# Add a model recommendation for the example set
|
224 |
-
def recommend_model(example_set_name):
|
225 |
-
"""Recommend appropriate model based on example set"""
|
226 |
-
if example_set_name == "PII Detection":
|
227 |
-
return gr.Dropdown.update(value="GLiNER Multi PII")
|
228 |
-
elif example_set_name == "NuNER Zero":
|
229 |
-
return gr.Dropdown.update(value="NuNER Zero")
|
230 |
-
else:
|
231 |
-
return gr.Dropdown.update(value="GLiNER Medium v2.1")
|
232 |
-
|
233 |
-
# Auto-suggest model when changing example set
|
234 |
-
example_set_dropdown.change(
|
235 |
-
fn=recommend_model,
|
236 |
-
inputs=[example_set_dropdown],
|
237 |
-
outputs=[model_dropdown]
|
238 |
-
)
|
239 |
-
|
240 |
-
# Submitting
|
241 |
-
submit_btn.click(
|
242 |
-
fn=ner,
|
243 |
-
inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle],
|
244 |
-
outputs=output
|
245 |
-
)
|
246 |
-
input_text.submit(
|
247 |
-
fn=ner,
|
248 |
-
inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle],
|
249 |
-
outputs=output
|
250 |
-
)
|
251 |
-
|
252 |
-
# Other interactions
|
253 |
-
model_dropdown.change(
|
254 |
-
fn=ner,
|
255 |
-
inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle],
|
256 |
-
outputs=output
|
257 |
-
)
|
258 |
-
|
259 |
-
threshold.release(
|
260 |
-
fn=ner,
|
261 |
-
inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle],
|
262 |
-
outputs=output
|
263 |
-
)
|
264 |
-
|
265 |
-
nested_ner.change(
|
266 |
-
fn=ner,
|
267 |
-
inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle],
|
268 |
-
outputs=output
|
269 |
-
)
|
270 |
-
|
271 |
-
merge_entities_toggle.change(
|
272 |
-
fn=ner,
|
273 |
-
inputs=[input_text, labels, model_dropdown, threshold, nested_ner, merge_entities_toggle],
|
274 |
-
outputs=output
|
275 |
-
)
|
276 |
-
|
277 |
-
if __name__ == "__main__":
|
278 |
-
demo.queue()
|
279 |
-
demo.launch(debug=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples-nuner.json
DELETED
@@ -1,38 +0,0 @@
|
|
1 |
-
[
|
2 |
-
[
|
3 |
-
"The Moon is Earth's only natural satellite. It orbits at an average distance of 384,400 km (238,900 mi), about 30 times the diameter of Earth. Over time Earth's gravity has caused tidal locking, causing the same side of the Moon to always face Earth. Because of this, the lunar day and the lunar month are the same length, at 29.5 Earth days. The Moon's gravitational pull – and to a lesser extent, the Sun's – are the main drivers of Earth's tides.",
|
4 |
-
"celestial body,quantity,physical concept",
|
5 |
-
0.3,
|
6 |
-
false
|
7 |
-
],
|
8 |
-
[
|
9 |
-
"Their creation was inspired by the ancient Olympic Games, held in Olympia, Greece from the 8th century BC to the 4th century AD. Baron Pierre de Coubertin founded the International Olympic Committee (IOC) in 1894, leading to the first modern Games in Athens in 1896. The IOC is the governing body of the Olympic Movement, which encompasses all entities and individuals involved in the Olympic Games. The Olympic Charter defines their structure and authority.",
|
10 |
-
"location,date,person,event",
|
11 |
-
0.3,
|
12 |
-
true
|
13 |
-
],
|
14 |
-
[
|
15 |
-
"Cells were discovered by Robert Hooke in 1665, who named them after their resemblance to cells inhabited by Christian monks in a monastery. Cell theory, developed in 1839 by Matthias Jakob Schleiden and Theodor Schwann, states that all organisms are composed of one or more cells, that cells are the fundamental unit of structure and function in all living organisms, and that all cells come from pre-existing cells.",
|
16 |
-
"biological concept,person,date",
|
17 |
-
0.3,
|
18 |
-
true
|
19 |
-
],
|
20 |
-
[
|
21 |
-
"During the quarterly review, the CEO emphasized that the International Conference on 'Climate Change and Sustainable Practices for Emerging Economies in Southeast Asia', scheduled to be held next month in Paris, is a crucial platform for our team to present their groundbreaking research on renewable energy advancements. This conference, a collaboration between the United Nations Environmental Programme and various national governments, is recognized globally for facilitating critical discussions among world leaders, environmental scientists, and policymakers. The event aims to forge new partnerships and launch initiatives like 'Renewable Energy Deployment in Developing Regions: Challenges and Opportunities', a multi-year program seeking to address the unique energy needs of underdeveloped areas.",
|
22 |
-
"event,program",
|
23 |
-
0.3,
|
24 |
-
false
|
25 |
-
],
|
26 |
-
[
|
27 |
-
"During the city council's strategic planning session, extensive references were made to the document titled 'Guidelines for Comprehensive Environmental Strategies in Urban Areas for the 21st Century: A Blueprint for Sustainable Urban Development'. This document serves as a foundational text for urban planners and local governments seeking to implement cutting-edge strategies for managing environmental impacts in rapidly growing metropolitan areas. It is complemented by the 'Metropolitan Environmental and Infrastructure Coordination Framework', which outlines specific policies and practices designed to enhance infrastructure resilience and sustainability in urban settings",
|
28 |
-
"document,framework",
|
29 |
-
0.3,
|
30 |
-
false
|
31 |
-
],
|
32 |
-
[
|
33 |
-
"While preparing his thesis on the evolution of scientific thought, John delved into numerous sources, one of which was 'The Impact of Early Exploration on Modern Scientific Developments and Their Influence on Contemporary Scientific Thought: A Comprehensive Study of Geographical Discoveries and Their Lasting Impact on Modern Physics, Biology, and Sociopolitical Structures'. He found this text particularly enlightening, not only for its detailed analysis on how geographical discoveries influenced modern physics and biology but also for its exploration into the sociopolitical impacts these discoveries had on the scientific communities of the 17th and 18th centuries. In addition to this monumental work, he referenced 'Global Shifts in Technological Innovation During the Industrial Revolution', a book that examines the intersection of technology and industrial growth, and 'Philosophical Underpinnings of Modern Science', which offers insights into how Enlightenment philosophies molded scientific methods and inquiries.",
|
34 |
-
"book",
|
35 |
-
0.3,
|
36 |
-
false
|
37 |
-
]
|
38 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples-pii.json
DELETED
@@ -1,92 +0,0 @@
|
|
1 |
-
[
|
2 |
-
[
|
3 |
-
"Pierre Dubois, résident de Paris, a fondé sa propre entreprise, Le Petit Café, située au 15 Rue de la Paix. Son numéro d'entreprise est FR-987654321-1, et il utilise le compte bancaire 9876543210 pour les transactions.",
|
4 |
-
"person, organization, address, company registration number, bank account number",
|
5 |
-
0.5,
|
6 |
-
false
|
7 |
-
],
|
8 |
-
[
|
9 |
-
"Leticia Ramírez, una habitante de Barcelona, tiene una cita médica programada en el Hospital General de Cataluña, situado en 10 Calle de los Ángeles. Su número de la seguridad social es ES-123456789-A y su grupo sanguíneo es AB+.",
|
10 |
-
"person, location, address, social security number, blood type",
|
11 |
-
0.5,
|
12 |
-
false
|
13 |
-
],
|
14 |
-
[
|
15 |
-
"John Smith, from London, teaches mathematics at Royal Academy located at 25 King’s Road. His employee ID is UK-987654-321 and he has been working there since 2015.",
|
16 |
-
"person, profession, organization, address, employee ID number",
|
17 |
-
0.5,
|
18 |
-
false
|
19 |
-
],
|
20 |
-
[
|
21 |
-
"In Frankfurt, Claudia Weber frequently visits her local bank branch, Deutsche Bank, at 48 Hauptstraße. Her account number is DE-1234567890123456, used primarily for her mortgage payments.",
|
22 |
-
"person, location, address, bank account number",
|
23 |
-
0.5,
|
24 |
-
false
|
25 |
-
],
|
26 |
-
[
|
27 |
-
"Marta Rossi, residente a Roma, ha acquistato un appartamento al 123 Via Condotti. Il numero di registrazione della proprietà è IT-654321-2018 e il mutuo è gestito tramite la Banca d'Italia con numero di conto 3216549870.",
|
28 |
-
"person, address, property registration number, bank account number",
|
29 |
-
0.5,
|
30 |
-
false
|
31 |
-
],
|
32 |
-
[
|
33 |
-
"Paulo Coelho, um turista do Brasil, fez um seguro de viagem com a empresa Seguros PT antes de sua viagem para Lisboa. O número da apólice é BR-987654321-123 e inclui cobertura médica.",
|
34 |
-
"person, nationality, company, insurance policy number, coverage",
|
35 |
-
0.5,
|
36 |
-
false
|
37 |
-
],
|
38 |
-
[
|
39 |
-
"Julia Fischer, eine Kundin aus München, hat bei der BayWa AG, einem großen Anbieter von Baustoffen mit Sitz am 77 Industriestraße, einen Kredit aufgenommen. Die Kreditnummer lautet DE-12345678.",
|
40 |
-
"person, city, organization, address, loan number",
|
41 |
-
0.5,
|
42 |
-
false
|
43 |
-
],
|
44 |
-
[
|
45 |
-
"Carlos Sánchez, profesor en la Universidad de Madrid, reside en el 5 Calle de Alcalá. Su número de identificación de profesor es ES-192837465 y tiene un doctorado en filosofía.",
|
46 |
-
"person, profession, address, teacher ID number, degree",
|
47 |
-
0.5,
|
48 |
-
false
|
49 |
-
],
|
50 |
-
[
|
51 |
-
"Sophie Dupont, une journaliste française, travaille pour Le Monde, basé au 33 rue des Écoles à Paris. Son numéro d'identification de presse est FR-75649023.",
|
52 |
-
"person, profession, organization, address, press ID number",
|
53 |
-
0.5,
|
54 |
-
false
|
55 |
-
],
|
56 |
-
[
|
57 |
-
"Manuel Oliveira, um agricultor em Porto, possui uma grande plantação de vinhas na Rua da Estrada, 120. O número de registro agrícola é PT-5678912345.",
|
58 |
-
"person, profession, address, agricultural registration number",
|
59 |
-
0.5,
|
60 |
-
false
|
61 |
-
],
|
62 |
-
[
|
63 |
-
"Elisa Müller, eine Künstlerin aus Berlin, hat ihre neueste Skulptur im öffentlichen Park am Alexanderplatz ausgestellt. Ihre Künstlernummer lautet DE-112233445.",
|
64 |
-
"person, profession, location, artist ID number",
|
65 |
-
0.5,
|
66 |
-
false
|
67 |
-
],
|
68 |
-
[
|
69 |
-
"Federico García, un jugador de fútbol de Sevilla, ha firmado un contrato de tres años con el club Real Betis. Su número de licencia deportiva es ES-9876543210.",
|
70 |
-
"person, profession, organization, sports license number",
|
71 |
-
0.5,
|
72 |
-
false
|
73 |
-
],
|
74 |
-
[
|
75 |
-
"Sarah White, a London-based actress, will be performing in 'Hamlet' at the Globe Theatre located at 21 New Globe Walk. Her Equity membership number is UK-1234567.",
|
76 |
-
"person, profession, location, address, membership number",
|
77 |
-
0.5,
|
78 |
-
false
|
79 |
-
],
|
80 |
-
[
|
81 |
-
"Ricardo Mello, engenheiro civil, trabalha na construção da nova barragem no Rio Douro, Portugal. Seu número de registro profissional é PT-987654321.",
|
82 |
-
"person, profession, project location, professional registration number",
|
83 |
-
0.5,
|
84 |
-
false
|
85 |
-
],
|
86 |
-
[
|
87 |
-
"Giuseppe Conti, un cliente di Milano, ha fatto un acquisto presso il negozio La Rinascente situato in Piazza Duomo. Il numero della sua carta di credito è IT-4567891234567891.",
|
88 |
-
"person, location, address, credit card number",
|
89 |
-
0.5,
|
90 |
-
false
|
91 |
-
]
|
92 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples.json
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
[
|
2 |
[
|
3 |
-
"Libretto by Marius Petipa, based on the 1822 novella ``Trilby, ou Le Lutin d'Argail`` by Charles Nodier, first presented by the Ballet of the Moscow Imperial Bolshoi Theatre on January 25/February 6 (Julian/Gregorian calendar dates), 1870, in Moscow with Polina Karpakova as Trilby and Ludiia Geiten as Miranda and restaged by Petipa for the Imperial Ballet at the Imperial Bolshoi Kamenny Theatre on January 17
|
4 |
"person, book, location, date, actor, character",
|
5 |
0.3,
|
6 |
true
|
7 |
],
|
8 |
[
|
9 |
-
"
|
10 |
"software package, programing language, software tool, degree, job title",
|
11 |
0.3,
|
12 |
false
|
@@ -66,7 +66,7 @@
|
|
66 |
false
|
67 |
],
|
68 |
[
|
69 |
-
"From November 29, 2011 to March 31, 2012, Karimloo returned to ``Les
|
70 |
"person, actor, award, date, location",
|
71 |
0.3,
|
72 |
false
|
@@ -82,5 +82,131 @@
|
|
82 |
"date, person, location, organization, event, flag",
|
83 |
0.3,
|
84 |
false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
]
|
86 |
]
|
|
|
1 |
[
|
2 |
[
|
3 |
+
"Libretto by Marius Petipa, based on the 1822 novella ``Trilby, ou Le Lutin d'Argail`` by Charles Nodier, first presented by the Ballet of the Moscow Imperial Bolshoi Theatre on January 25/February 6 (Julian/Gregorian calendar dates), 1870, in Moscow with Polina Karpakova as Trilby and Ludiia Geiten as Miranda and restaged by Petipa for the Imperial Ballet at the Imperial Bolshoi Kamenny Theatre on January 17–29, 1871 in St. Petersburg with Adèle Grantzow as Trilby and Lev Ivanov as Count Leopold.",
|
4 |
"person, book, location, date, actor, character",
|
5 |
0.3,
|
6 |
true
|
7 |
],
|
8 |
[
|
9 |
+
"* Data Scientist, Data Analyst, or Data Engineer with 1+ years of experience.\n* Experience with technologies such as Docker, Kubernetes, or Kubeflow\n* Machine Learning experience preferred\n* Experience with programming languages such as Python, C++, or SQL preferred\n* Experience with technologies such as Databricks, Qlik, TensorFlow, PyTorch, Python, Dash, Pandas, or NumPy preferred\n* BA or BS degree\n* Active Secret OR Active Top Secret or Active TS/SCI clearance",
|
10 |
"software package, programing language, software tool, degree, job title",
|
11 |
0.3,
|
12 |
false
|
|
|
66 |
false
|
67 |
],
|
68 |
[
|
69 |
+
"From November 29, 2011 to March 31, 2012, Karimloo returned to ``Les Misérables`` to play the lead role of Jean Valjean at The Queen's Theatre, London, for which he won the 2013 Theatregoers' Choice Award for Best Takeover in a Role.",
|
70 |
"person, actor, award, date, location",
|
71 |
0.3,
|
72 |
false
|
|
|
82 |
"date, person, location, organization, event, flag",
|
83 |
0.3,
|
84 |
false
|
85 |
+
],
|
86 |
+
[
|
87 |
+
"Pierre Dubois, résident de Paris, a fondé sa propre entreprise, Le Petit Café, située au 15 Rue de la Paix. Son numéro d'entreprise est FR-987654321-1, et il utilise le compte bancaire 9876543210 pour les transactions.",
|
88 |
+
"person, organization, address, company registration number, bank account number",
|
89 |
+
0.5,
|
90 |
+
false
|
91 |
+
],
|
92 |
+
[
|
93 |
+
"Leticia Ramírez, una habitante de Barcelona, tiene una cita médica programada en el Hospital General de Cataluña, situado en 10 Calle de los Ángeles. Su número de la seguridad social es ES-123456789-A y su grupo sanguíneo es AB+.",
|
94 |
+
"person, location, address, social security number, blood type",
|
95 |
+
0.5,
|
96 |
+
false
|
97 |
+
],
|
98 |
+
[
|
99 |
+
"John Smith, from London, teaches mathematics at Royal Academy located at 25 King’s Road. His employee ID is UK-987654-321 and he has been working there since 2015.",
|
100 |
+
"person, profession, organization, address, employee ID number",
|
101 |
+
0.5,
|
102 |
+
false
|
103 |
+
],
|
104 |
+
[
|
105 |
+
"In Frankfurt, Claudia Weber frequently visits her local bank branch, Deutsche Bank, at 48 Hauptstraße. Her account number is DE-1234567890123456, used primarily for her mortgage payments.",
|
106 |
+
"person, location, address, bank account number",
|
107 |
+
0.5,
|
108 |
+
false
|
109 |
+
],
|
110 |
+
[
|
111 |
+
"Marta Rossi, residente a Roma, ha acquistato un appartamento al 123 Via Condotti. Il numero di registrazione della proprietà è IT-654321-2018 e il mutuo è gestito tramite la Banca d'Italia con numero di conto 3216549870.",
|
112 |
+
"person, address, property registration number, bank account number",
|
113 |
+
0.5,
|
114 |
+
false
|
115 |
+
],
|
116 |
+
[
|
117 |
+
"Paulo Coelho, um turista do Brasil, fez um seguro de viagem com a empresa Seguros PT antes de sua viagem para Lisboa. O número da apólice é BR-987654321-123 e inclui cobertura médica.",
|
118 |
+
"person, nationality, company, insurance policy number, coverage",
|
119 |
+
0.5,
|
120 |
+
false
|
121 |
+
],
|
122 |
+
[
|
123 |
+
"Julia Fischer, eine Kundin aus München, hat bei der BayWa AG, einem großen Anbieter von Baustoffen mit Sitz am 77 Industriestraße, einen Kredit aufgenommen. Die Kreditnummer lautet DE-12345678.",
|
124 |
+
"person, city, organization, address, loan number",
|
125 |
+
0.5,
|
126 |
+
false
|
127 |
+
],
|
128 |
+
[
|
129 |
+
"Carlos Sánchez, profesor en la Universidad de Madrid, reside en el 5 Calle de Alcalá. Su número de identificación de profesor es ES-192837465 y tiene un doctorado en filosofía.",
|
130 |
+
"person, profession, address, teacher ID number, degree",
|
131 |
+
0.5,
|
132 |
+
false
|
133 |
+
],
|
134 |
+
[
|
135 |
+
"Sophie Dupont, une journaliste française, travaille pour Le Monde, basé au 33 rue des Écoles à Paris. Son numéro d'identification de presse est FR-75649023.",
|
136 |
+
"person, profession, organization, address, press ID number",
|
137 |
+
0.5,
|
138 |
+
false
|
139 |
+
],
|
140 |
+
[
|
141 |
+
"Manuel Oliveira, um agricultor em Porto, possui uma grande plantação de vinhas na Rua da Estrada, 120. O número de registro agrícola é PT-5678912345.",
|
142 |
+
"person, profession, address, agricultural registration number",
|
143 |
+
0.5,
|
144 |
+
false
|
145 |
+
],
|
146 |
+
[
|
147 |
+
"Elisa Müller, eine Künstlerin aus Berlin, hat ihre neueste Skulptur im öffentlichen Park am Alexanderplatz ausgestellt. Ihre Künstlernummer lautet DE-112233445.",
|
148 |
+
"person, profession, location, artist ID number",
|
149 |
+
0.5,
|
150 |
+
false
|
151 |
+
],
|
152 |
+
[
|
153 |
+
"Federico García, un jugador de fútbol de Sevilla, ha firmado un contrato de tres años con el club Real Betis. Su número de licencia deportiva es ES-9876543210.",
|
154 |
+
"person, profession, organization, sports license number",
|
155 |
+
0.5,
|
156 |
+
false
|
157 |
+
],
|
158 |
+
[
|
159 |
+
"Sarah White, a London-based actress, will be performing in 'Hamlet' at the Globe Theatre located at 21 New Globe Walk. Her Equity membership number is UK-1234567.",
|
160 |
+
"person, profession, location, address, membership number",
|
161 |
+
0.5,
|
162 |
+
false
|
163 |
+
],
|
164 |
+
[
|
165 |
+
"Ricardo Mello, engenheiro civil, trabalha na construção da nova barragem no Rio Douro, Portugal. Seu número de registro profissional é PT-987654321.",
|
166 |
+
"person, profession, project location, professional registration number",
|
167 |
+
0.5,
|
168 |
+
false
|
169 |
+
],
|
170 |
+
[
|
171 |
+
"Giuseppe Conti, un cliente di Milano, ha fatto un acquisto presso il negozio La Rinascente situato in Piazza Duomo. Il numero della sua carta di credito è IT-4567891234567891.",
|
172 |
+
"person, location, address, credit card number",
|
173 |
+
0.5,
|
174 |
+
false
|
175 |
+
],
|
176 |
+
[
|
177 |
+
"The Moon is Earth's only natural satellite. It orbits at an average distance of 384,400 km (238,900 mi), about 30 times the diameter of Earth. Over time Earth's gravity has caused tidal locking, causing the same side of the Moon to always face Earth. Because of this, the lunar day and the lunar month are the same length, at 29.5 Earth days. The Moon's gravitational pull – and to a lesser extent, the Sun's – are the main drivers of Earth's tides.",
|
178 |
+
"celestial body,quantity,physical concept",
|
179 |
+
0.3,
|
180 |
+
false
|
181 |
+
],
|
182 |
+
[
|
183 |
+
"Their creation was inspired by the ancient Olympic Games, held in Olympia, Greece from the 8th century BC to the 4th century AD. Baron Pierre de Coubertin founded the International Olympic Committee (IOC) in 1894, leading to the first modern Games in Athens in 1896. The IOC is the governing body of the Olympic Movement, which encompasses all entities and individuals involved in the Olympic Games. The Olympic Charter defines their structure and authority.",
|
184 |
+
"location,date,person,event",
|
185 |
+
0.3,
|
186 |
+
true
|
187 |
+
],
|
188 |
+
[
|
189 |
+
"Cells were discovered by Robert Hooke in 1665, who named them after their resemblance to cells inhabited by Christian monks in a monastery. Cell theory, developed in 1839 by Matthias Jakob Schleiden and Theodor Schwann, states that all organisms are composed of one or more cells, that cells are the fundamental unit of structure and function in all living organisms, and that all cells come from pre-existing cells.",
|
190 |
+
"biological concept,person,date",
|
191 |
+
0.3,
|
192 |
+
true
|
193 |
+
],
|
194 |
+
[
|
195 |
+
"During the quarterly review, the CEO emphasized that the International Conference on 'Climate Change and Sustainable Practices for Emerging Economies in Southeast Asia', scheduled to be held next month in Paris, is a crucial platform for our team to present their groundbreaking research on renewable energy advancements. This conference, a collaboration between the United Nations Environmental Programme and various national governments, is recognized globally for facilitating critical discussions among world leaders, environmental scientists, and policymakers. The event aims to forge new partnerships and launch initiatives like 'Renewable Energy Deployment in Developing Regions: Challenges and Opportunities', a multi-year program seeking to address the unique energy needs of underdeveloped areas.",
|
196 |
+
"event,program",
|
197 |
+
0.3,
|
198 |
+
false
|
199 |
+
],
|
200 |
+
[
|
201 |
+
"During the city council's strategic planning session, extensive references were made to the document titled 'Guidelines for Comprehensive Environmental Strategies in Urban Areas for the 21st Century: A Blueprint for Sustainable Urban Development'. This document serves as a foundational text for urban planners and local governments seeking to implement cutting-edge strategies for managing environmental impacts in rapidly growing metropolitan areas. It is complemented by the 'Metropolitan Environmental and Infrastructure Coordination Framework', which outlines specific policies and practices designed to enhance infrastructure resilience and sustainability in urban settings",
|
202 |
+
"document,framework",
|
203 |
+
0.3,
|
204 |
+
false
|
205 |
+
],
|
206 |
+
[
|
207 |
+
"While preparing his thesis on the evolution of scientific thought, John delved into numerous sources, one of which was 'The Impact of Early Exploration on Modern Scientific Developments and Their Influence on Contemporary Scientific Thought: A Comprehensive Study of Geographical Discoveries and Their Lasting Impact on Modern Physics, Biology, and Sociopolitical Structures'. He found this text particularly enlightening, not only for its detailed analysis on how geographical discoveries influenced modern physics and biology but also for its exploration into the sociopolitical impacts these discoveries had on the scientific communities of the 17th and 18th centuries. In addition to this monumental work, he referenced 'Global Shifts in Technological Innovation During the Industrial Revolution', a book that examines the intersection of technology and industrial growth, and 'Philosophical Underpinnings of Modern Science', which offers insights into how Enlightenment philosophies molded scientific methods and inquiries.",
|
208 |
+
"book",
|
209 |
+
0.3,
|
210 |
+
false
|
211 |
]
|
212 |
]
|