Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from transformers import pipeline
|
3 |
+
import time
|
4 |
+
import torch
|
5 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
6 |
+
print(device)
|
7 |
+
# Load the NER pipeline
|
8 |
+
print('Preparing pipeline ...\n')
|
9 |
+
pipe = pipeline("ner",
|
10 |
+
model="seddiktrk/xlm-roberta-base-finetuned-panx-en",
|
11 |
+
device=device)
|
12 |
+
print('\nPipe Ready !!!')
|
13 |
+
# Example texts
|
14 |
+
examples = {
|
15 |
+
"en": "My name is Clara and I live in Berkeley, California.",
|
16 |
+
"fr": "Je m'appelle Marie et je travaille dans un café à Lyon.",
|
17 |
+
"ar": "اسمي أحمد وأدرس في جامعة القاهرة.",
|
18 |
+
"de": "Mein Name ist Hans und ich komme aus München.",
|
19 |
+
"es": "Mi nombre es Lucía y vivo en una pequeña ciudad en México.",
|
20 |
+
"it": "Mi chiamo Giulia e faccio il medico a Roma.",
|
21 |
+
"pt": "Chamo-me Ana e moro em uma fazenda no Brasil.",
|
22 |
+
"ru": "Меня зовут Ольга, и я живу в Санкт-Петербурге.",
|
23 |
+
"jp": "私の名前は佐藤です。東京でITエンジニアとして働いています",
|
24 |
+
"zh": "我叫李华,在北京的一家公司上班"
|
25 |
+
|
26 |
+
}
|
27 |
+
|
28 |
+
# Define colors for each entity type
|
29 |
+
ENTITY_COLORS = {
|
30 |
+
"PER": ("#F7D4DA", "#E31A1C"), # Light pink background, red text
|
31 |
+
"ORG": ("#D4E2F4", "#2171B5"), # Light blue background, blue text
|
32 |
+
"LOC": ("#E8DAEF", "#6A51A3"), # Light purple background, purple text
|
33 |
+
#"MISC": ("#FFE5B4", "#FF8C00"), # Light orange background, dark orange text
|
34 |
+
}
|
35 |
+
def get_colored_text(text, entities):
|
36 |
+
offset = 0
|
37 |
+
for entity in entities:
|
38 |
+
start = entity['start'] + offset
|
39 |
+
end = entity['end'] + offset
|
40 |
+
label = entity['entity_group']
|
41 |
+
background_color, text_color = ENTITY_COLORS.get(label, ("#FFD700", "#FF4500"))
|
42 |
+
|
43 |
+
# HTML structure for styled entity display
|
44 |
+
entity_text = f'''
|
45 |
+
<span style="
|
46 |
+
background-color:{background_color};
|
47 |
+
padding: 3px 5px;
|
48 |
+
border-radius: 5px;
|
49 |
+
margin: 0 2px;
|
50 |
+
display: inline-block;
|
51 |
+
">
|
52 |
+
{text[start:end]}
|
53 |
+
<span style="
|
54 |
+
background-color:{text_color};
|
55 |
+
color: white;
|
56 |
+
padding: 1px 5px;
|
57 |
+
border-radius: 5px;
|
58 |
+
margin-left: 5px;
|
59 |
+
font-size: 0.85em;
|
60 |
+
vertical-align: middle;
|
61 |
+
">
|
62 |
+
{label}
|
63 |
+
</span>
|
64 |
+
</span>
|
65 |
+
'''
|
66 |
+
|
67 |
+
# Replace the original text with the colored entity text
|
68 |
+
text = text[:start] + entity_text + text[end:]
|
69 |
+
|
70 |
+
# Update offset to adjust for the added characters in entity_text
|
71 |
+
offset += len(entity_text) - (end - start)
|
72 |
+
|
73 |
+
return text
|
74 |
+
# Streamlit interface
|
75 |
+
|
76 |
+
# Streamlit app
|
77 |
+
st.title('Multilingual NER')
|
78 |
+
st.markdown(
|
79 |
+
"""
|
80 |
+
<p style='color: grey; font-size: 0.85em;'>
|
81 |
+
This application performs Named Entity Recognition (NER) across 100+ languages.
|
82 |
+
The model excels in cross-lingual transfer and capable of processing text that contains multiple languages simultaneously.
|
83 |
+
</p>
|
84 |
+
""",
|
85 |
+
unsafe_allow_html=True
|
86 |
+
)
|
87 |
+
st.write("### 🔠 Token Classification")
|
88 |
+
|
89 |
+
|
90 |
+
# Create a two-column layout
|
91 |
+
col1, col2 = st.columns([4, 1]) # Adjust column widths as needed
|
92 |
+
|
93 |
+
# Dropdown in the right column
|
94 |
+
with col2:
|
95 |
+
selected_example = st.selectbox(
|
96 |
+
'Select an example:',
|
97 |
+
list(examples.keys()),
|
98 |
+
)
|
99 |
+
|
100 |
+
# Text area in the left column
|
101 |
+
with col1:
|
102 |
+
user_input = st.text_area('Enter your text here:', value=examples[selected_example])
|
103 |
+
|
104 |
+
|
105 |
+
# Button to compute
|
106 |
+
if st.button("Compute"):
|
107 |
+
with st.spinner():
|
108 |
+
start_time = time.time()
|
109 |
+
# Get NER results
|
110 |
+
ner_results = pipe(user_input,aggregation_strategy="simple")
|
111 |
+
|
112 |
+
# Display the results
|
113 |
+
colored_text = get_colored_text(user_input, ner_results)
|
114 |
+
|
115 |
+
# Display the results
|
116 |
+
st.markdown(colored_text, unsafe_allow_html=True)
|
117 |
+
end_time = time.time()
|
118 |
+
st.write(f"Inference time: {end_time - start_time:.2f} seconds")
|
119 |
+
with st.expander("Show raw output"):
|
120 |
+
raw_results = pipe(user_input)
|
121 |
+
st.json(raw_results)
|