Spaces:

seddiktrk
/

Multilingual-NER

Sleeping

App Files Files Community

seddiktrk commited on Sep 4, 2024

Commit

8da3546

verified ·

1 Parent(s): d2ef8ea

Create app.py

Browse files

Files changed (1) hide show

app.py +121 -0

app.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import streamlit as st
+from transformers import pipeline
+import time
+import torch
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+print(device)
+# Load the NER pipeline
+print('Preparing pipeline ...\n')
+pipe = pipeline("ner",
+                model="seddiktrk/xlm-roberta-base-finetuned-panx-en",
+                device=device)
+print('\nPipe Ready !!!')
+# Example texts
+examples = {
+    "en": "My name is Clara and I live in Berkeley, California.",
+    "fr": "Je m'appelle Marie et je travaille dans un café à Lyon.",
+    "ar": "اسمي أحمد وأدرس في جامعة القاهرة.",
+    "de": "Mein Name ist Hans und ich komme aus München.",
+    "es": "Mi nombre es Lucía y vivo en una pequeña ciudad en México.",
+    "it": "Mi chiamo Giulia e faccio il medico a Roma.",
+    "pt": "Chamo-me Ana e moro em uma fazenda no Brasil.",
+    "ru": "Меня зовут Ольга, и я живу в Санкт-Петербурге.",
+    "jp": "私の名前は佐藤です。東京でITエンジニアとして働いています",
+    "zh": "我叫李华，在北京的一家公司上班"
+}
+# Define colors for each entity type
+ENTITY_COLORS = {
+    "PER": ("#F7D4DA", "#E31A1C"),  # Light pink background, red text
+    "ORG": ("#D4E2F4", "#2171B5"),  # Light blue background, blue text
+    "LOC": ("#E8DAEF", "#6A51A3"),  # Light purple background, purple text
+    #"MISC": ("#FFE5B4", "#FF8C00"),  # Light orange background, dark orange text
+}
+def get_colored_text(text, entities):
+    offset = 0
+    for entity in entities:
+        start = entity['start'] + offset
+        end = entity['end'] + offset
+        label = entity['entity_group']
+        background_color, text_color = ENTITY_COLORS.get(label, ("#FFD700", "#FF4500"))
+        # HTML structure for styled entity display
+        entity_text = f'''
+        <span style="
+            background-color:{background_color};
+            padding: 3px 5px;
+            border-radius: 5px;
+            margin: 0 2px;
+            display: inline-block;
+            ">
+            {text[start:end]}
+            <span style="
+                background-color:{text_color};
+                color: white;
+                padding: 1px 5px;
+                border-radius: 5px;
+                margin-left: 5px;
+                font-size: 0.85em;
+                vertical-align: middle;
+                ">
+                {label}
+            </span>
+        </span>
+        '''
+        # Replace the original text with the colored entity text
+        text = text[:start] + entity_text + text[end:]
+        # Update offset to adjust for the added characters in entity_text
+        offset += len(entity_text) - (end - start)
+    return text
+# Streamlit interface
+# Streamlit app
+st.title('Multilingual NER')
+st.markdown(
+    """
+    <p style='color: grey; font-size: 0.85em;'>
+    This application performs Named Entity Recognition (NER) across 100+ languages.
+    The model excels in cross-lingual transfer and capable of processing text that contains multiple languages simultaneously.
+    </p>
+    """,
+    unsafe_allow_html=True
+)
+st.write("### 🔠 Token Classification")
+# Create a two-column layout
+col1, col2 = st.columns([4, 1])  # Adjust column widths as needed
+# Dropdown in the right column
+with col2:
+    selected_example = st.selectbox(
+        'Select an example:',
+        list(examples.keys()),
+    )
+# Text area in the left column
+with col1:
+    user_input = st.text_area('Enter your text here:', value=examples[selected_example])
+# Button to compute
+if st.button("Compute"):
+    with st.spinner():
+        start_time = time.time()
+        # Get NER results
+        ner_results = pipe(user_input,aggregation_strategy="simple")
+        # Display the results
+        colored_text = get_colored_text(user_input, ner_results)
+        # Display the results
+        st.markdown(colored_text, unsafe_allow_html=True)
+        end_time = time.time()
+        st.write(f"Inference time: {end_time - start_time:.2f} seconds")
+        with st.expander("Show raw output"):
+            raw_results = pipe(user_input)
+            st.json(raw_results)