Spaces:

seddiktrk
/

Multilingual-NER

Sleeping

App Files Files Community

Multilingual-NER / app.py

seddiktrk

Update app.py

f3ab87f verified 6 months ago

raw

history blame contribute delete

4.17 kB

	import streamlit as st
	from transformers import pipeline
	import time
	import torch
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	print(device)
	# Load the NER pipeline
	print('Preparing pipeline ...\n')
	pipe = pipeline("ner",
	model="seddiktrk/xlm-roberta-base-finetuned-panx-all",
	device=device)
	print('\nPipe Ready !!!')
	# Example texts
	examples = {
	"en": "My name is Clara and I live in Berkeley, California.",
	"fr": "Je m'appelle Marie et je travaille dans un café à Lyon.",
	"ar": "اسمي أحمد وأدرس في جامعة القاهرة.",
	"de": "Mein Name ist Hans und ich komme aus München.",
	"es": "Mi nombre es Lucía y vivo en una pequeña ciudad en México.",
	"it": "Mi chiamo Giulia e faccio il medico a Roma.",
	"pt": "Chamo-me Ana e moro em uma fazenda no Brasil.",
	"ru": "Меня зовут Ольга, и я живу в Санкт-Петербурге.",
	"jp": "私の名前は佐藤です。東京でITエンジニアとして働いています",
	"zh": "我叫李华，在北京的一家公司上班"

	}

	# Define colors for each entity type
	ENTITY_COLORS = {
	"PER": ("#F7D4DA", "#E31A1C"), # Light pink background, red text
	"ORG": ("#D4E2F4", "#2171B5"), # Light blue background, blue text
	"LOC": ("#E8DAEF", "#6A51A3"), # Light purple background, purple text
	#"MISC": ("#FFE5B4", "#FF8C00"), # Light orange background, dark orange text
	}
	def get_colored_text(text, entities):
	offset = 0
	for entity in entities:
	start = entity['start'] + offset
	end = entity['end'] + offset
	label = entity['entity_group']
	background_color, text_color = ENTITY_COLORS.get(label, ("#FFD700", "#FF4500"))

	# HTML structure for styled entity display
	entity_text = f'''
	<span style="
	background-color:{background_color};
	padding: 3px 5px;
	border-radius: 5px;
	margin: 0 2px;
	display: inline-block;
	">
	{text[start:end]}
	<span style="
	background-color:{text_color};
	color: white;
	padding: 1px 5px;
	border-radius: 5px;
	margin-left: 5px;
	font-size: 0.85em;
	vertical-align: middle;
	">
	{label}
	</span>
	</span>
	'''

	# Replace the original text with the colored entity text
	text = text[:start] + entity_text + text[end:]

	# Update offset to adjust for the added characters in entity_text
	offset += len(entity_text) - (end - start)

	return text
	# Streamlit interface

	# Streamlit app
	st.title('Multilingual NER')
	st.markdown(
	"""
	<p style='color: grey; font-size: 0.85em;'>
	This application performs Named Entity Recognition (NER) across 100+ languages.
	The model excels in cross-lingual transfer and capable of processing text that contains multiple languages simultaneously.
	</p>
	""",
	unsafe_allow_html=True
	)
	st.write("### 🔠 Token Classification")


	# Create a two-column layout
	col1, col2 = st.columns([4, 1]) # Adjust column widths as needed

	# Dropdown in the right column
	with col2:
	selected_example = st.selectbox(
	'Select an example:',
	list(examples.keys()),
	)

	# Text area in the left column
	with col1:
	user_input = st.text_area('Enter your text here:', value=examples[selected_example])


	# Button to compute
	if st.button("Compute"):
	with st.spinner():
	start_time = time.time()
	# Get NER results
	ner_results = pipe(user_input,aggregation_strategy="simple")

	# Display the results
	colored_text = get_colored_text(user_input, ner_results)

	# Display the results
	st.markdown(colored_text, unsafe_allow_html=True)
	end_time = time.time()
	st.write(f"Inference time: {end_time - start_time:.2f} seconds")
	with st.expander("Show raw output"):
	raw_results = pipe(user_input)
	st.json(raw_results)