seddiktrk's picture
Update app.py
f3ab87f verified
import streamlit as st
from transformers import pipeline
import time
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
# Load the NER pipeline
print('Preparing pipeline ...\n')
pipe = pipeline("ner",
model="seddiktrk/xlm-roberta-base-finetuned-panx-all",
device=device)
print('\nPipe Ready !!!')
# Example texts
examples = {
"en": "My name is Clara and I live in Berkeley, California.",
"fr": "Je m'appelle Marie et je travaille dans un café à Lyon.",
"ar": "اسمي أحمد وأدرس في جامعة القاهرة.",
"de": "Mein Name ist Hans und ich komme aus München.",
"es": "Mi nombre es Lucía y vivo en una pequeña ciudad en México.",
"it": "Mi chiamo Giulia e faccio il medico a Roma.",
"pt": "Chamo-me Ana e moro em uma fazenda no Brasil.",
"ru": "Меня зовут Ольга, и я живу в Санкт-Петербурге.",
"jp": "私の名前は佐藤です。東京でITエンジニアとして働いています",
"zh": "我叫李华,在北京的一家公司上班"
}
# Define colors for each entity type
ENTITY_COLORS = {
"PER": ("#F7D4DA", "#E31A1C"), # Light pink background, red text
"ORG": ("#D4E2F4", "#2171B5"), # Light blue background, blue text
"LOC": ("#E8DAEF", "#6A51A3"), # Light purple background, purple text
#"MISC": ("#FFE5B4", "#FF8C00"), # Light orange background, dark orange text
}
def get_colored_text(text, entities):
offset = 0
for entity in entities:
start = entity['start'] + offset
end = entity['end'] + offset
label = entity['entity_group']
background_color, text_color = ENTITY_COLORS.get(label, ("#FFD700", "#FF4500"))
# HTML structure for styled entity display
entity_text = f'''
<span style="
background-color:{background_color};
padding: 3px 5px;
border-radius: 5px;
margin: 0 2px;
display: inline-block;
">
{text[start:end]}
<span style="
background-color:{text_color};
color: white;
padding: 1px 5px;
border-radius: 5px;
margin-left: 5px;
font-size: 0.85em;
vertical-align: middle;
">
{label}
</span>
</span>
'''
# Replace the original text with the colored entity text
text = text[:start] + entity_text + text[end:]
# Update offset to adjust for the added characters in entity_text
offset += len(entity_text) - (end - start)
return text
# Streamlit interface
# Streamlit app
st.title('Multilingual NER')
st.markdown(
"""
<p style='color: grey; font-size: 0.85em;'>
This application performs Named Entity Recognition (NER) across 100+ languages.
The model excels in cross-lingual transfer and capable of processing text that contains multiple languages simultaneously.
</p>
""",
unsafe_allow_html=True
)
st.write("### 🔠 Token Classification")
# Create a two-column layout
col1, col2 = st.columns([4, 1]) # Adjust column widths as needed
# Dropdown in the right column
with col2:
selected_example = st.selectbox(
'Select an example:',
list(examples.keys()),
)
# Text area in the left column
with col1:
user_input = st.text_area('Enter your text here:', value=examples[selected_example])
# Button to compute
if st.button("Compute"):
with st.spinner():
start_time = time.time()
# Get NER results
ner_results = pipe(user_input,aggregation_strategy="simple")
# Display the results
colored_text = get_colored_text(user_input, ner_results)
# Display the results
st.markdown(colored_text, unsafe_allow_html=True)
end_time = time.time()
st.write(f"Inference time: {end_time - start_time:.2f} seconds")
with st.expander("Show raw output"):
raw_results = pipe(user_input)
st.json(raw_results)