Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import pipeline | |
import time | |
import torch | |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
print(device) | |
# Load the NER pipeline | |
print('Preparing pipeline ...\n') | |
pipe = pipeline("ner", | |
model="seddiktrk/xlm-roberta-base-finetuned-panx-all", | |
device=device) | |
print('\nPipe Ready !!!') | |
# Example texts | |
examples = { | |
"en": "My name is Clara and I live in Berkeley, California.", | |
"fr": "Je m'appelle Marie et je travaille dans un café à Lyon.", | |
"ar": "اسمي أحمد وأدرس في جامعة القاهرة.", | |
"de": "Mein Name ist Hans und ich komme aus München.", | |
"es": "Mi nombre es Lucía y vivo en una pequeña ciudad en México.", | |
"it": "Mi chiamo Giulia e faccio il medico a Roma.", | |
"pt": "Chamo-me Ana e moro em uma fazenda no Brasil.", | |
"ru": "Меня зовут Ольга, и я живу в Санкт-Петербурге.", | |
"jp": "私の名前は佐藤です。東京でITエンジニアとして働いています", | |
"zh": "我叫李华,在北京的一家公司上班" | |
} | |
# Define colors for each entity type | |
ENTITY_COLORS = { | |
"PER": ("#F7D4DA", "#E31A1C"), # Light pink background, red text | |
"ORG": ("#D4E2F4", "#2171B5"), # Light blue background, blue text | |
"LOC": ("#E8DAEF", "#6A51A3"), # Light purple background, purple text | |
#"MISC": ("#FFE5B4", "#FF8C00"), # Light orange background, dark orange text | |
} | |
def get_colored_text(text, entities): | |
offset = 0 | |
for entity in entities: | |
start = entity['start'] + offset | |
end = entity['end'] + offset | |
label = entity['entity_group'] | |
background_color, text_color = ENTITY_COLORS.get(label, ("#FFD700", "#FF4500")) | |
# HTML structure for styled entity display | |
entity_text = f''' | |
<span style=" | |
background-color:{background_color}; | |
padding: 3px 5px; | |
border-radius: 5px; | |
margin: 0 2px; | |
display: inline-block; | |
"> | |
{text[start:end]} | |
<span style=" | |
background-color:{text_color}; | |
color: white; | |
padding: 1px 5px; | |
border-radius: 5px; | |
margin-left: 5px; | |
font-size: 0.85em; | |
vertical-align: middle; | |
"> | |
{label} | |
</span> | |
</span> | |
''' | |
# Replace the original text with the colored entity text | |
text = text[:start] + entity_text + text[end:] | |
# Update offset to adjust for the added characters in entity_text | |
offset += len(entity_text) - (end - start) | |
return text | |
# Streamlit interface | |
# Streamlit app | |
st.title('Multilingual NER') | |
st.markdown( | |
""" | |
<p style='color: grey; font-size: 0.85em;'> | |
This application performs Named Entity Recognition (NER) across 100+ languages. | |
The model excels in cross-lingual transfer and capable of processing text that contains multiple languages simultaneously. | |
</p> | |
""", | |
unsafe_allow_html=True | |
) | |
st.write("### 🔠 Token Classification") | |
# Create a two-column layout | |
col1, col2 = st.columns([4, 1]) # Adjust column widths as needed | |
# Dropdown in the right column | |
with col2: | |
selected_example = st.selectbox( | |
'Select an example:', | |
list(examples.keys()), | |
) | |
# Text area in the left column | |
with col1: | |
user_input = st.text_area('Enter your text here:', value=examples[selected_example]) | |
# Button to compute | |
if st.button("Compute"): | |
with st.spinner(): | |
start_time = time.time() | |
# Get NER results | |
ner_results = pipe(user_input,aggregation_strategy="simple") | |
# Display the results | |
colored_text = get_colored_text(user_input, ner_results) | |
# Display the results | |
st.markdown(colored_text, unsafe_allow_html=True) | |
end_time = time.time() | |
st.write(f"Inference time: {end_time - start_time:.2f} seconds") | |
with st.expander("Show raw output"): | |
raw_results = pipe(user_input) | |
st.json(raw_results) | |