Spaces:
Runtime error
Runtime error
import os | |
import gradio as gr | |
from transformers import AutoTokenizer, PreTrainedTokenizerFast | |
from transformers import AutoModelForTokenClassification | |
from transformers import pipeline | |
import spacy | |
from spacy import displacy | |
from spacy.tokens import Span | |
# ============ INPUT ================= | |
os.system("python -m spacy download es_core_news_sm") | |
colors = { | |
"LOC": "#ff5e5e", | |
"MISC": "#ff9999", | |
"ORG": "#ffd699", | |
"PER": "#80c5c5", | |
} | |
model_name = "mrm8488/bert-spanish-cased-finetuned-ner" | |
nlp = spacy.load("es_core_news_sm") #Esto es para usar displacy y renderizar las entidades | |
nlp.disable_pipes("ner") | |
# ============ Footer, titulo, descripciones y ejemplos =============== | |
article = "<div> Entidades nombradas: " | |
for clase in colors: | |
article += f"<span style='color:#000;background: {colors[clase]}; font-size: 0.8em; font-weight: bold; line-height: 2.5; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5em;;padding:0.5em;'>{clase}</span>" | |
article += "</div>" | |
title = "NER en espa帽ol" | |
description = "Esta aplicaci贸n es para detecci贸n de entidades nombradas en Espa帽ol" | |
examples = ["Hola me llamo David Betancur y vivo en Madrid"] | |
# =============== Modelo =============== | |
model = AutoModelForTokenClassification.from_pretrained(model_name) | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
ner_pipe = pipeline("ner", model=model, tokenizer=tokenizer) | |
# =============== Funcion =============== | |
def ner(input_text): | |
entities = ner_pipe(input_text, aggregation_strategy="first") | |
doc = nlp(input_text) | |
potential_entities = [] | |
for entity in entities: | |
start = entity["start"] | |
end = entity["end"] | |
label = entity["entity_group"] | |
ent = doc.char_span(start, end, label=label) | |
if ent != None: | |
doc.ents += (ent,) | |
else: | |
potential_entities.append(entity) | |
potential_entities.append({"entity_group": "NONE", "start": -1, "end": -1}) | |
start = potential_entities[0]["start"] | |
end = potential_entities[0]["end"] | |
label = potential_entities[0]["entity_group"] | |
for item in potential_entities: | |
if item["entity_group"] == label and item["start"] == end: | |
end = item["end"] | |
continue | |
else: | |
if item["start"] != start: | |
ent = doc.char_span(start, end, label=label) | |
doc.ents += (ent,) | |
start = item["start"] | |
end = item["end"] | |
label = item["entity_group"] | |
options = {"ents": colors.keys(), "colors": colors} | |
output = displacy.render(doc, style="ent", options=options) | |
return output | |
# ===============Interfaz =============== | |
interface = gr.Interface( | |
title=title, | |
description=description, | |
article=article, | |
allow_screenshot=False, | |
allow_flagging=False, | |
fn=ner, | |
inputs=gr.inputs.Textbox(placeholder="Insertar el texto para analizar", lines=10), | |
outputs=gr.outputs.HTML(), | |
examples=examples | |
) | |
interface.launch() |