Spaces:
Runtime error
Runtime error
File size: 2,925 Bytes
acebe59 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import os
import gradio as gr
from transformers import AutoTokenizer, PreTrainedTokenizerFast
from transformers import AutoModelForTokenClassification
from transformers import pipeline
import spacy
from spacy import displacy
from spacy.tokens import Span
# ============ INPUT =================
os.system("python -m spacy download es_core_news_sm")
colors = {
"LOC": "#ff5e5e",
"MISC": "#ff9999",
"ORG": "#ffd699",
"PER": "#80c5c5",
}
model_name = "mrm8488/bert-spanish-cased-finetuned-ner"
nlp = spacy.load("es_core_news_sm") #Esto es para usar displacy y renderizar las entidades
nlp.disable_pipes("ner")
# ============ Footer, titulo, descripciones y ejemplos ===============
article = "<div> Entidades nombradas: "
for clase in colors:
article += f"<span style='color:#000;background: {colors[clase]}; font-size: 0.8em; font-weight: bold; line-height: 2.5; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5em;;padding:0.5em;'>{clase}</span>"
article += "</div>"
title = "NER en español"
description = "Esta aplicación es para detección de entidades nombradas en Español"
examples = ["Hola me llamo David Betancur y vivo en Madrid"]
# =============== Modelo ===============
model = AutoModelForTokenClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
ner_pipe = pipeline("ner", model=model, tokenizer=tokenizer)
# =============== Funcion ===============
def ner(input_text):
entities = ner_pipe(input_text, aggregation_strategy="first")
doc = nlp(input_text)
potential_entities = []
for entity in entities:
start = entity["start"]
end = entity["end"]
label = entity["entity_group"]
ent = doc.char_span(start, end, label=label)
if ent != None:
doc.ents += (ent,)
else:
potential_entities.append(entity)
potential_entities.append({"entity_group": "NONE", "start": -1, "end": -1})
start = potential_entities[0]["start"]
end = potential_entities[0]["end"]
label = potential_entities[0]["entity_group"]
for item in potential_entities:
if item["entity_group"] == label and item["start"] == end:
end = item["end"]
continue
else:
if item["start"] != start:
ent = doc.char_span(start, end, label=label)
doc.ents += (ent,)
start = item["start"]
end = item["end"]
label = item["entity_group"]
options = {"ents": colors.keys(), "colors": colors}
output = displacy.render(doc, style="ent", options=options)
return output
# ===============Interfaz ===============
interface = gr.Interface(
title=title,
description=description,
article=article,
allow_screenshot=False,
allow_flagging=False,
fn=ner,
inputs=gr.inputs.Textbox(placeholder="Insertar el texto para analizar", lines=10),
outputs=gr.outputs.HTML(),
examples=examples
)
interface.launch() |