Spaces:

hamdah926
/

NER_model_with_gradio

Sleeping

File size: 3,670 Bytes

d5e8ef9
 
 
 
 
 
 
 
 
 
265bcac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5e8ef9
265bcac
 
d5e8ef9
 
265bcac
d5e8ef9
265bcac
d5e8ef9
265bcac
d5e8ef9
 
265bcac
d5e8ef9
 
 
265bcac
d5e8ef9
 
 
265bcac
d5e8ef9
 
 
 
 
 
 
 
 
 
 
265bcac
 
d5e8ef9
 
 
265bcac
d5e8ef9
 
 
265bcac
d5e8ef9
 
265bcac
 
d5e8ef9
 
 
 
265bcac
d5e8ef9
 
 
 
 
 
 
 
265bcac
 
 
 
d5e8ef9

#The libraries used
import gradio as gr
import pandas as pd
from transformers import pipeline


#Implementing the Hugging Face NER model

ner = pipeline('ner', model = 'FacebookAI/xlm-roberta-large-finetuned-conll03-english', grouped_entities = True)

#a function to split each sentence containing an entity in the text by commas.
#start to comma, comma to comma, last comma to the remaining text
def split_sentences(text, start, end):

    #comma before entity
    start_comma = text.rfind(',', 0, start)
    if start_comma == -1: #if rfind did not find a comma before the entity:
        start_comma = 0  #start from the beginning (first sentence)
    else:
        start_comma += 1  #if comma found, then start from the char after the comma

    # comma after the entity 
    end_comma = text.find(',', end)
    if end_comma == -1:
        return text[start_comma:].strip() #if it did not find a comma, return the text from the last comma to the end
    else: #if it did find a comma, go to that comma
        return  text[start_comma:end_comma].strip() 


#Conveting the NER output into a DataFrame:
def entities_to_df(text):
  all_entities = []
  entities = ner(text)#the NER model will be used on the input text

#putting the entities into a data frame with the needed keys + calling the split sentences fumction in the for loop
  for entity in entities:
    sentence = split_sentences(text, entity['start'], entity['end'])  
    all_entities.append({
          "Entity": entity['word'],
          "Type" : entity['entity_group'], #loc, org, per, misc
          "Score": float((entity['score'])),
          "Start": entity['start'],
          "End": entity['end'],
          "Sentence": sentence,
      })

  df = pd.DataFrame(all_entities)

  #the df in the output did not round the score above so I rounded it after creating the df
  df['Score'] = df['Score'].round(4)

  return df


#a function to highlight the entitties of the Dataframe using HTML
def highlight_entities(text):
    df = entities_to_df(text)
    highlighted_text = ""
    last_idx = 0

    # Iterating the DF rows in order
    for i, entity in df.iterrows(): #iterrows is a function in the df to iterate by rows
        # Add the text before the entity
        highlighted_text += text[last_idx:entity['Start']]
        #highlighting the entities in RED by using HTML div and css and thiers types(per, org,loc or misc)
        highlighted_text += f"<div style='background-color: red; display: inline;'>{entity['Entity']} ({entity['Type']})</div>"
        #updating the index after the current entity
        last_idx = entity['End']

    # add the text after the last entity
    highlighted_text += text[last_idx:]

    # again we will use an HTML div block to make the output looks better :)
    return f"<div>{highlighted_text}</div>"

# The last function which will combine the two previous functions and will be used in the interface
def NER_output(text):
    html = highlight_entities(text)
    df = entities_to_df(text)
    return html,df

#a defualt value that will be used in the gradio interface input
default_value ="J.K. Rowling wrote the Harry Potter series, which was published by Bloomsbury Publishing."

# Gradio Interface
demo = gr.Interface(
    fn=NER_output,
    inputs=gr.Textbox(label="Enter text:", lines=6, value = default_value),
    outputs=[gr.HTML(label="Entities Highlighted"), gr.Dataframe(label="Entities in DataFrame format")],
    title = "NER model with highlighted entities"
    #above, we used the NER_output, and since that function return the html and the df there will be two outputs
    #The first is gr.HTML and the second gr.Datagrame
)


demo.launch()