#The libraries used import gradio as gr import pandas as pd from transformers import pipeline #Implementing the Hugging Face NER model ner = pipeline('ner', model = 'FacebookAI/xlm-roberta-large-finetuned-conll03-english', grouped_entities = True) #a function to split each sentence containing an entity in the text by commas. #start to comma, comma to comma, last comma to the remaining text def split_sentences(text, start, end): #comma before entity start_comma = text.rfind(',', 0, start) if start_comma == -1: #if rfind did not find a comma before the entity: start_comma = 0 #start from the beginning (first sentence) else: start_comma += 1 #if comma found, then start from the char after the comma # comma after the entity end_comma = text.find(',', end) if end_comma == -1: return text[start_comma:].strip() #if it did not find a comma, return the text from the last comma to the end else: #if it did find a comma, go to that comma return text[start_comma:end_comma].strip() #Conveting the NER output into a DataFrame: def entities_to_df(text): all_entities = [] entities = ner(text)#the NER model will be used on the input text #putting the entities into a data frame with the needed keys + calling the split sentences fumction in the for loop for entity in entities: sentence = split_sentences(text, entity['start'], entity['end']) all_entities.append({ "Entity": entity['word'], "Type" : entity['entity_group'], #loc, org, per, misc "Score": float((entity['score'])), "Start": entity['start'], "End": entity['end'], "Sentence": sentence, }) df = pd.DataFrame(all_entities) #the df in the output did not round the score above so I rounded it after creating the df df['Score'] = df['Score'].round(4) return df #a function to highlight the entitties of the Dataframe using HTML def highlight_entities(text): df = entities_to_df(text) highlighted_text = "" last_idx = 0 # Iterating the DF rows in order for i, entity in df.iterrows(): #iterrows is a function in the df to iterate by rows # Add the text before the entity highlighted_text += text[last_idx:entity['Start']] #highlighting the entities in RED by using HTML div and css and thiers types(per, org,loc or misc) highlighted_text += f"<div style='background-color: red; display: inline;'>{entity['Entity']} ({entity['Type']})</div>" #updating the index after the current entity last_idx = entity['End'] # add the text after the last entity highlighted_text += text[last_idx:] # again we will use an HTML div block to make the output looks better :) return f"<div>{highlighted_text}</div>" # The last function which will combine the two previous functions and will be used in the interface def NER_output(text): html = highlight_entities(text) df = entities_to_df(text) return html,df #a defualt value that will be used in the gradio interface input default_value ="J.K. Rowling wrote the Harry Potter series, which was published by Bloomsbury Publishing." # Gradio Interface demo = gr.Interface( fn=NER_output, inputs=gr.Textbox(label="Enter text:", lines=6, value = default_value), outputs=[gr.HTML(label="Entities Highlighted"), gr.Dataframe(label="Entities in DataFrame format")], title = "NER model with highlighted entities" #above, we used the NER_output, and since that function return the html and the df there will be two outputs #The first is gr.HTML and the second gr.Datagrame ) demo.launch()