import streamlit as st from transformers import pipeline import time import torch device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(device) # Load the NER pipeline print('Preparing pipeline ...\n') pipe = pipeline("ner", model="seddiktrk/xlm-roberta-base-finetuned-panx-all", device=device) print('\nPipe Ready !!!') # Example texts examples = { "en": "My name is Clara and I live in Berkeley, California.", "fr": "Je m'appelle Marie et je travaille dans un café à Lyon.", "ar": "اسمي أحمد وأدرس في جامعة القاهرة.", "de": "Mein Name ist Hans und ich komme aus München.", "es": "Mi nombre es Lucía y vivo en una pequeña ciudad en México.", "it": "Mi chiamo Giulia e faccio il medico a Roma.", "pt": "Chamo-me Ana e moro em uma fazenda no Brasil.", "ru": "Меня зовут Ольга, и я живу в Санкт-Петербурге.", "jp": "私の名前は佐藤です。東京でITエンジニアとして働いています", "zh": "我叫李华,在北京的一家公司上班" } # Define colors for each entity type ENTITY_COLORS = { "PER": ("#F7D4DA", "#E31A1C"), # Light pink background, red text "ORG": ("#D4E2F4", "#2171B5"), # Light blue background, blue text "LOC": ("#E8DAEF", "#6A51A3"), # Light purple background, purple text #"MISC": ("#FFE5B4", "#FF8C00"), # Light orange background, dark orange text } def get_colored_text(text, entities): offset = 0 for entity in entities: start = entity['start'] + offset end = entity['end'] + offset label = entity['entity_group'] background_color, text_color = ENTITY_COLORS.get(label, ("#FFD700", "#FF4500")) # HTML structure for styled entity display entity_text = f''' {text[start:end]} {label} ''' # Replace the original text with the colored entity text text = text[:start] + entity_text + text[end:] # Update offset to adjust for the added characters in entity_text offset += len(entity_text) - (end - start) return text # Streamlit interface # Streamlit app st.title('Multilingual NER') st.markdown( """
This application performs Named Entity Recognition (NER) across 100+ languages. The model excels in cross-lingual transfer and capable of processing text that contains multiple languages simultaneously.
""", unsafe_allow_html=True ) st.write("### 🔠 Token Classification") # Create a two-column layout col1, col2 = st.columns([4, 1]) # Adjust column widths as needed # Dropdown in the right column with col2: selected_example = st.selectbox( 'Select an example:', list(examples.keys()), ) # Text area in the left column with col1: user_input = st.text_area('Enter your text here:', value=examples[selected_example]) # Button to compute if st.button("Compute"): with st.spinner(): start_time = time.time() # Get NER results ner_results = pipe(user_input,aggregation_strategy="simple") # Display the results colored_text = get_colored_text(user_input, ner_results) # Display the results st.markdown(colored_text, unsafe_allow_html=True) end_time = time.time() st.write(f"Inference time: {end_time - start_time:.2f} seconds") with st.expander("Show raw output"): raw_results = pipe(user_input) st.json(raw_results)