Spaces:

dt
/

ner_spanish

Runtime error

File size: 2,925 Bytes

acebe59

import os
import gradio as gr
from transformers import AutoTokenizer, PreTrainedTokenizerFast
from transformers import AutoModelForTokenClassification
from transformers import pipeline
import spacy
from spacy import displacy
from spacy.tokens import Span


# ============ INPUT =================
os.system("python -m spacy download es_core_news_sm")
colors = {
    "LOC": "#ff5e5e",
    "MISC": "#ff9999",
    "ORG": "#ffd699",
    "PER": "#80c5c5",
}
model_name = "mrm8488/bert-spanish-cased-finetuned-ner"

nlp = spacy.load("es_core_news_sm") #Esto es para usar displacy y renderizar las entidades
nlp.disable_pipes("ner")

# ============ Footer, titulo, descripciones y ejemplos ===============
article = "<div> Entidades nombradas: "
for clase in colors:
    article += f"<span style='color:#000;background: {colors[clase]}; font-size: 0.8em; font-weight: bold; line-height: 2.5; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5em;;padding:0.5em;'>{clase}</span>"
article += "</div>"

title = "NER en español"
description = "Esta aplicación es para detección de entidades nombradas en Español"
examples = ["Hola me llamo David Betancur y vivo en Madrid"]

# =============== Modelo ===============

model = AutoModelForTokenClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
ner_pipe = pipeline("ner", model=model, tokenizer=tokenizer)

# =============== Funcion ===============
def ner(input_text):
  entities = ner_pipe(input_text, aggregation_strategy="first")

  doc = nlp(input_text)

  potential_entities = []

  for entity in entities:
    start = entity["start"]
    end = entity["end"]
    label = entity["entity_group"]

    ent = doc.char_span(start, end, label=label)
    if ent != None:
      doc.ents += (ent,)
    else:
      potential_entities.append(entity)

  potential_entities.append({"entity_group": "NONE", "start": -1, "end": -1})

  start = potential_entities[0]["start"]
  end = potential_entities[0]["end"]
  label = potential_entities[0]["entity_group"]

  for item in potential_entities:
    if item["entity_group"] == label and item["start"] == end:
      end = item["end"]
      continue
    else:
      if item["start"] != start:
        ent = doc.char_span(start, end, label=label)
        doc.ents += (ent,)

      start = item["start"]
      end = item["end"]
      label = item["entity_group"]

  options = {"ents": colors.keys(), "colors": colors}

  output = displacy.render(doc, style="ent", options=options)
  return output

# ===============Interfaz ===============
interface = gr.Interface(
    title=title,
    description=description,
    article=article,
    allow_screenshot=False,
    allow_flagging=False,
    fn=ner,
    inputs=gr.inputs.Textbox(placeholder="Insertar el texto para analizar", lines=10),
    outputs=gr.outputs.HTML(),
    examples=examples
    )

interface.launch()