import os import gradio as gr from transformers import AutoTokenizer, PreTrainedTokenizerFast from transformers import AutoModelForTokenClassification from transformers import pipeline import spacy from spacy import displacy from spacy.tokens import Span # ============ INPUT ================= os.system("python -m spacy download es_core_news_sm") colors = { "LOC": "#ff5e5e", "MISC": "#ff9999", "ORG": "#ffd699", "PER": "#80c5c5", } model_name = "mrm8488/bert-spanish-cased-finetuned-ner" nlp = spacy.load("es_core_news_sm") #Esto es para usar displacy y renderizar las entidades nlp.disable_pipes("ner") # ============ Footer, titulo, descripciones y ejemplos =============== article = "
Entidades nombradas: " for clase in colors: article += f"{clase}" article += "
" title = "NER en español" description = "Esta aplicación es para detección de entidades nombradas en Español" examples = ["Hola me llamo David Betancur y vivo en Madrid"] # =============== Modelo =============== model = AutoModelForTokenClassification.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) ner_pipe = pipeline("ner", model=model, tokenizer=tokenizer) # =============== Funcion =============== def ner(input_text): entities = ner_pipe(input_text, aggregation_strategy="first") doc = nlp(input_text) potential_entities = [] for entity in entities: start = entity["start"] end = entity["end"] label = entity["entity_group"] ent = doc.char_span(start, end, label=label) if ent != None: doc.ents += (ent,) else: potential_entities.append(entity) potential_entities.append({"entity_group": "NONE", "start": -1, "end": -1}) start = potential_entities[0]["start"] end = potential_entities[0]["end"] label = potential_entities[0]["entity_group"] for item in potential_entities: if item["entity_group"] == label and item["start"] == end: end = item["end"] continue else: if item["start"] != start: ent = doc.char_span(start, end, label=label) doc.ents += (ent,) start = item["start"] end = item["end"] label = item["entity_group"] options = {"ents": colors.keys(), "colors": colors} output = displacy.render(doc, style="ent", options=options) return output # ===============Interfaz =============== interface = gr.Interface( title=title, description=description, article=article, allow_screenshot=False, allow_flagging=False, fn=ner, inputs=gr.inputs.Textbox(placeholder="Insertar el texto para analizar", lines=10), outputs=gr.outputs.HTML(), examples=examples ) interface.launch()