Spaces:
Sleeping
Sleeping
File size: 3,670 Bytes
d5e8ef9 265bcac d5e8ef9 265bcac d5e8ef9 265bcac d5e8ef9 265bcac d5e8ef9 265bcac d5e8ef9 265bcac d5e8ef9 265bcac d5e8ef9 265bcac d5e8ef9 265bcac d5e8ef9 265bcac d5e8ef9 265bcac d5e8ef9 265bcac d5e8ef9 265bcac d5e8ef9 265bcac d5e8ef9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
#The libraries used
import gradio as gr
import pandas as pd
from transformers import pipeline
#Implementing the Hugging Face NER model
ner = pipeline('ner', model = 'FacebookAI/xlm-roberta-large-finetuned-conll03-english', grouped_entities = True)
#a function to split each sentence containing an entity in the text by commas.
#start to comma, comma to comma, last comma to the remaining text
def split_sentences(text, start, end):
#comma before entity
start_comma = text.rfind(',', 0, start)
if start_comma == -1: #if rfind did not find a comma before the entity:
start_comma = 0 #start from the beginning (first sentence)
else:
start_comma += 1 #if comma found, then start from the char after the comma
# comma after the entity
end_comma = text.find(',', end)
if end_comma == -1:
return text[start_comma:].strip() #if it did not find a comma, return the text from the last comma to the end
else: #if it did find a comma, go to that comma
return text[start_comma:end_comma].strip()
#Conveting the NER output into a DataFrame:
def entities_to_df(text):
all_entities = []
entities = ner(text)#the NER model will be used on the input text
#putting the entities into a data frame with the needed keys + calling the split sentences fumction in the for loop
for entity in entities:
sentence = split_sentences(text, entity['start'], entity['end'])
all_entities.append({
"Entity": entity['word'],
"Type" : entity['entity_group'], #loc, org, per, misc
"Score": float((entity['score'])),
"Start": entity['start'],
"End": entity['end'],
"Sentence": sentence,
})
df = pd.DataFrame(all_entities)
#the df in the output did not round the score above so I rounded it after creating the df
df['Score'] = df['Score'].round(4)
return df
#a function to highlight the entitties of the Dataframe using HTML
def highlight_entities(text):
df = entities_to_df(text)
highlighted_text = ""
last_idx = 0
# Iterating the DF rows in order
for i, entity in df.iterrows(): #iterrows is a function in the df to iterate by rows
# Add the text before the entity
highlighted_text += text[last_idx:entity['Start']]
#highlighting the entities in RED by using HTML div and css and thiers types(per, org,loc or misc)
highlighted_text += f"<div style='background-color: red; display: inline;'>{entity['Entity']} ({entity['Type']})</div>"
#updating the index after the current entity
last_idx = entity['End']
# add the text after the last entity
highlighted_text += text[last_idx:]
# again we will use an HTML div block to make the output looks better :)
return f"<div>{highlighted_text}</div>"
# The last function which will combine the two previous functions and will be used in the interface
def NER_output(text):
html = highlight_entities(text)
df = entities_to_df(text)
return html,df
#a defualt value that will be used in the gradio interface input
default_value ="J.K. Rowling wrote the Harry Potter series, which was published by Bloomsbury Publishing."
# Gradio Interface
demo = gr.Interface(
fn=NER_output,
inputs=gr.Textbox(label="Enter text:", lines=6, value = default_value),
outputs=[gr.HTML(label="Entities Highlighted"), gr.Dataframe(label="Entities in DataFrame format")],
title = "NER model with highlighted entities"
#above, we used the NER_output, and since that function return the html and the df there will be two outputs
#The first is gr.HTML and the second gr.Datagrame
)
demo.launch() |