Spaces:
Sleeping
Sleeping
#The libraries used | |
import gradio as gr | |
import pandas as pd | |
from transformers import pipeline | |
#Implementing the Hugging Face NER model | |
ner = pipeline('ner', model = 'FacebookAI/xlm-roberta-large-finetuned-conll03-english', grouped_entities = True) | |
#a function to split each sentence containing an entity in the text by commas. | |
#start to comma, comma to comma, last comma to the remaining text | |
def split_sentences(text, start, end): | |
#comma before entity | |
start_comma = text.rfind(',', 0, start) | |
if start_comma == -1: #if rfind did not find a comma before the entity: | |
start_comma = 0 #start from the beginning (first sentence) | |
else: | |
start_comma += 1 #if comma found, then start from the char after the comma | |
# comma after the entity | |
end_comma = text.find(',', end) | |
if end_comma == -1: | |
return text[start_comma:].strip() #if it did not find a comma, return the text from the last comma to the end | |
else: #if it did find a comma, go to that comma | |
return text[start_comma:end_comma].strip() | |
#Conveting the NER output into a DataFrame: | |
def entities_to_df(text): | |
all_entities = [] | |
entities = ner(text)#the NER model will be used on the input text | |
#putting the entities into a data frame with the needed keys + calling the split sentences fumction in the for loop | |
for entity in entities: | |
sentence = split_sentences(text, entity['start'], entity['end']) | |
all_entities.append({ | |
"Entity": entity['word'], | |
"Type" : entity['entity_group'], #loc, org, per, misc | |
"Score": float((entity['score'])), | |
"Start": entity['start'], | |
"End": entity['end'], | |
"Sentence": sentence, | |
}) | |
df = pd.DataFrame(all_entities) | |
#the df in the output did not round the score above so I rounded it after creating the df | |
df['Score'] = df['Score'].round(4) | |
return df | |
#a function to highlight the entitties of the Dataframe using HTML | |
def highlight_entities(text): | |
df = entities_to_df(text) | |
highlighted_text = "" | |
last_idx = 0 | |
# Iterating the DF rows in order | |
for i, entity in df.iterrows(): #iterrows is a function in the df to iterate by rows | |
# Add the text before the entity | |
highlighted_text += text[last_idx:entity['Start']] | |
#highlighting the entities in RED by using HTML div and css and thiers types(per, org,loc or misc) | |
highlighted_text += f"<div style='background-color: red; display: inline;'>{entity['Entity']} ({entity['Type']})</div>" | |
#updating the index after the current entity | |
last_idx = entity['End'] | |
# add the text after the last entity | |
highlighted_text += text[last_idx:] | |
# again we will use an HTML div block to make the output looks better :) | |
return f"<div>{highlighted_text}</div>" | |
# The last function which will combine the two previous functions and will be used in the interface | |
def NER_output(text): | |
html = highlight_entities(text) | |
df = entities_to_df(text) | |
return html,df | |
#a defualt value that will be used in the gradio interface input | |
default_value ="J.K. Rowling wrote the Harry Potter series, which was published by Bloomsbury Publishing." | |
# Gradio Interface | |
demo = gr.Interface( | |
fn=NER_output, | |
inputs=gr.Textbox(label="Enter text:", lines=6, value = default_value), | |
outputs=[gr.HTML(label="Entities Highlighted"), gr.Dataframe(label="Entities in DataFrame format")], | |
title = "NER model with highlighted entities" | |
#above, we used the NER_output, and since that function return the html and the df there will be two outputs | |
#The first is gr.HTML and the second gr.Datagrame | |
) | |
demo.launch() |