seddiktrk commited on
Commit
8da3546
·
verified ·
1 Parent(s): d2ef8ea

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -0
app.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+ import time
4
+ import torch
5
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
6
+ print(device)
7
+ # Load the NER pipeline
8
+ print('Preparing pipeline ...\n')
9
+ pipe = pipeline("ner",
10
+ model="seddiktrk/xlm-roberta-base-finetuned-panx-en",
11
+ device=device)
12
+ print('\nPipe Ready !!!')
13
+ # Example texts
14
+ examples = {
15
+ "en": "My name is Clara and I live in Berkeley, California.",
16
+ "fr": "Je m'appelle Marie et je travaille dans un café à Lyon.",
17
+ "ar": "اسمي أحمد وأدرس في جامعة القاهرة.",
18
+ "de": "Mein Name ist Hans und ich komme aus München.",
19
+ "es": "Mi nombre es Lucía y vivo en una pequeña ciudad en México.",
20
+ "it": "Mi chiamo Giulia e faccio il medico a Roma.",
21
+ "pt": "Chamo-me Ana e moro em uma fazenda no Brasil.",
22
+ "ru": "Меня зовут Ольга, и я живу в Санкт-Петербурге.",
23
+ "jp": "私の名前は佐藤です。東京でITエンジニアとして働いています",
24
+ "zh": "我叫李华,在北京的一家公司上班"
25
+
26
+ }
27
+
28
+ # Define colors for each entity type
29
+ ENTITY_COLORS = {
30
+ "PER": ("#F7D4DA", "#E31A1C"), # Light pink background, red text
31
+ "ORG": ("#D4E2F4", "#2171B5"), # Light blue background, blue text
32
+ "LOC": ("#E8DAEF", "#6A51A3"), # Light purple background, purple text
33
+ #"MISC": ("#FFE5B4", "#FF8C00"), # Light orange background, dark orange text
34
+ }
35
+ def get_colored_text(text, entities):
36
+ offset = 0
37
+ for entity in entities:
38
+ start = entity['start'] + offset
39
+ end = entity['end'] + offset
40
+ label = entity['entity_group']
41
+ background_color, text_color = ENTITY_COLORS.get(label, ("#FFD700", "#FF4500"))
42
+
43
+ # HTML structure for styled entity display
44
+ entity_text = f'''
45
+ <span style="
46
+ background-color:{background_color};
47
+ padding: 3px 5px;
48
+ border-radius: 5px;
49
+ margin: 0 2px;
50
+ display: inline-block;
51
+ ">
52
+ {text[start:end]}
53
+ <span style="
54
+ background-color:{text_color};
55
+ color: white;
56
+ padding: 1px 5px;
57
+ border-radius: 5px;
58
+ margin-left: 5px;
59
+ font-size: 0.85em;
60
+ vertical-align: middle;
61
+ ">
62
+ {label}
63
+ </span>
64
+ </span>
65
+ '''
66
+
67
+ # Replace the original text with the colored entity text
68
+ text = text[:start] + entity_text + text[end:]
69
+
70
+ # Update offset to adjust for the added characters in entity_text
71
+ offset += len(entity_text) - (end - start)
72
+
73
+ return text
74
+ # Streamlit interface
75
+
76
+ # Streamlit app
77
+ st.title('Multilingual NER')
78
+ st.markdown(
79
+ """
80
+ <p style='color: grey; font-size: 0.85em;'>
81
+ This application performs Named Entity Recognition (NER) across 100+ languages.
82
+ The model excels in cross-lingual transfer and capable of processing text that contains multiple languages simultaneously.
83
+ </p>
84
+ """,
85
+ unsafe_allow_html=True
86
+ )
87
+ st.write("### 🔠 Token Classification")
88
+
89
+
90
+ # Create a two-column layout
91
+ col1, col2 = st.columns([4, 1]) # Adjust column widths as needed
92
+
93
+ # Dropdown in the right column
94
+ with col2:
95
+ selected_example = st.selectbox(
96
+ 'Select an example:',
97
+ list(examples.keys()),
98
+ )
99
+
100
+ # Text area in the left column
101
+ with col1:
102
+ user_input = st.text_area('Enter your text here:', value=examples[selected_example])
103
+
104
+
105
+ # Button to compute
106
+ if st.button("Compute"):
107
+ with st.spinner():
108
+ start_time = time.time()
109
+ # Get NER results
110
+ ner_results = pipe(user_input,aggregation_strategy="simple")
111
+
112
+ # Display the results
113
+ colored_text = get_colored_text(user_input, ner_results)
114
+
115
+ # Display the results
116
+ st.markdown(colored_text, unsafe_allow_html=True)
117
+ end_time = time.time()
118
+ st.write(f"Inference time: {end_time - start_time:.2f} seconds")
119
+ with st.expander("Show raw output"):
120
+ raw_results = pipe(user_input)
121
+ st.json(raw_results)