Spaces:

hamdah926
/

NER_model_with_gradio

Sleeping

App Files Files Community

NER_model_with_gradio / app.py

hamdah926

Update app.py

265bcac verified 6 months ago

raw

history blame contribute delete

3.67 kB

	#The libraries used
	import gradio as gr
	import pandas as pd
	from transformers import pipeline


	#Implementing the Hugging Face NER model

	ner = pipeline('ner', model = 'FacebookAI/xlm-roberta-large-finetuned-conll03-english', grouped_entities = True)

	#a function to split each sentence containing an entity in the text by commas.
	#start to comma, comma to comma, last comma to the remaining text
	def split_sentences(text, start, end):

	#comma before entity
	start_comma = text.rfind(',', 0, start)
	if start_comma == -1: #if rfind did not find a comma before the entity:
	start_comma = 0 #start from the beginning (first sentence)
	else:
	start_comma += 1 #if comma found, then start from the char after the comma

	# comma after the entity
	end_comma = text.find(',', end)
	if end_comma == -1:
	return text[start_comma:].strip() #if it did not find a comma, return the text from the last comma to the end
	else: #if it did find a comma, go to that comma
	return text[start_comma:end_comma].strip()


	#Conveting the NER output into a DataFrame:
	def entities_to_df(text):
	all_entities = []
	entities = ner(text)#the NER model will be used on the input text

	#putting the entities into a data frame with the needed keys + calling the split sentences fumction in the for loop
	for entity in entities:
	sentence = split_sentences(text, entity['start'], entity['end'])
	all_entities.append({
	"Entity": entity['word'],
	"Type" : entity['entity_group'], #loc, org, per, misc
	"Score": float((entity['score'])),
	"Start": entity['start'],
	"End": entity['end'],
	"Sentence": sentence,
	})

	df = pd.DataFrame(all_entities)

	#the df in the output did not round the score above so I rounded it after creating the df
	df['Score'] = df['Score'].round(4)

	return df


	#a function to highlight the entitties of the Dataframe using HTML
	def highlight_entities(text):
	df = entities_to_df(text)
	highlighted_text = ""
	last_idx = 0

	# Iterating the DF rows in order
	for i, entity in df.iterrows(): #iterrows is a function in the df to iterate by rows
	# Add the text before the entity
	highlighted_text += text[last_idx:entity['Start']]
	#highlighting the entities in RED by using HTML div and css and thiers types(per, org,loc or misc)
	highlighted_text += f"<div style='background-color: red; display: inline;'>{entity['Entity']} ({entity['Type']})</div>"
	#updating the index after the current entity
	last_idx = entity['End']

	# add the text after the last entity
	highlighted_text += text[last_idx:]

	# again we will use an HTML div block to make the output looks better :)
	return f"<div>{highlighted_text}</div>"

	# The last function which will combine the two previous functions and will be used in the interface
	def NER_output(text):
	html = highlight_entities(text)
	df = entities_to_df(text)
	return html,df

	#a defualt value that will be used in the gradio interface input
	default_value ="J.K. Rowling wrote the Harry Potter series, which was published by Bloomsbury Publishing."

	# Gradio Interface
	demo = gr.Interface(
	fn=NER_output,
	inputs=gr.Textbox(label="Enter text:", lines=6, value = default_value),
	outputs=[gr.HTML(label="Entities Highlighted"), gr.Dataframe(label="Entities in DataFrame format")],
	title = "NER model with highlighted entities"
	#above, we used the NER_output, and since that function return the html and the df there will be two outputs
	#The first is gr.HTML and the second gr.Datagrame
	)


	demo.launch()