ner_spanish / app.py
Dabs's picture
innitial commit
acebe59
import os
import gradio as gr
from transformers import AutoTokenizer, PreTrainedTokenizerFast
from transformers import AutoModelForTokenClassification
from transformers import pipeline
import spacy
from spacy import displacy
from spacy.tokens import Span
# ============ INPUT =================
os.system("python -m spacy download es_core_news_sm")
colors = {
"LOC": "#ff5e5e",
"MISC": "#ff9999",
"ORG": "#ffd699",
"PER": "#80c5c5",
}
model_name = "mrm8488/bert-spanish-cased-finetuned-ner"
nlp = spacy.load("es_core_news_sm") #Esto es para usar displacy y renderizar las entidades
nlp.disable_pipes("ner")
# ============ Footer, titulo, descripciones y ejemplos ===============
article = "<div> Entidades nombradas: "
for clase in colors:
article += f"<span style='color:#000;background: {colors[clase]}; font-size: 0.8em; font-weight: bold; line-height: 2.5; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5em;;padding:0.5em;'>{clase}</span>"
article += "</div>"
title = "NER en espa帽ol"
description = "Esta aplicaci贸n es para detecci贸n de entidades nombradas en Espa帽ol"
examples = ["Hola me llamo David Betancur y vivo en Madrid"]
# =============== Modelo ===============
model = AutoModelForTokenClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
ner_pipe = pipeline("ner", model=model, tokenizer=tokenizer)
# =============== Funcion ===============
def ner(input_text):
entities = ner_pipe(input_text, aggregation_strategy="first")
doc = nlp(input_text)
potential_entities = []
for entity in entities:
start = entity["start"]
end = entity["end"]
label = entity["entity_group"]
ent = doc.char_span(start, end, label=label)
if ent != None:
doc.ents += (ent,)
else:
potential_entities.append(entity)
potential_entities.append({"entity_group": "NONE", "start": -1, "end": -1})
start = potential_entities[0]["start"]
end = potential_entities[0]["end"]
label = potential_entities[0]["entity_group"]
for item in potential_entities:
if item["entity_group"] == label and item["start"] == end:
end = item["end"]
continue
else:
if item["start"] != start:
ent = doc.char_span(start, end, label=label)
doc.ents += (ent,)
start = item["start"]
end = item["end"]
label = item["entity_group"]
options = {"ents": colors.keys(), "colors": colors}
output = displacy.render(doc, style="ent", options=options)
return output
# ===============Interfaz ===============
interface = gr.Interface(
title=title,
description=description,
article=article,
allow_screenshot=False,
allow_flagging=False,
fn=ner,
inputs=gr.inputs.Textbox(placeholder="Insertar el texto para analizar", lines=10),
outputs=gr.outputs.HTML(),
examples=examples
)
interface.launch()