Spaces:

dt
/

ner_spanish

Runtime error

App Files Files Community

ner_spanish / app.py

Dabs

innitial commit

acebe59 about 3 years ago

raw

history blame contribute delete

2.93 kB

	import os
	import gradio as gr
	from transformers import AutoTokenizer, PreTrainedTokenizerFast
	from transformers import AutoModelForTokenClassification
	from transformers import pipeline
	import spacy
	from spacy import displacy
	from spacy.tokens import Span


	# ============ INPUT =================
	os.system("python -m spacy download es_core_news_sm")
	colors = {
	"LOC": "#ff5e5e",
	"MISC": "#ff9999",
	"ORG": "#ffd699",
	"PER": "#80c5c5",
	}
	model_name = "mrm8488/bert-spanish-cased-finetuned-ner"

	nlp = spacy.load("es_core_news_sm") #Esto es para usar displacy y renderizar las entidades
	nlp.disable_pipes("ner")

	# ============ Footer, titulo, descripciones y ejemplos ===============
	article = "<div> Entidades nombradas: "
	for clase in colors:
	article += f"<span style='color:#000;background: {colors[clase]}; font-size: 0.8em; font-weight: bold; line-height: 2.5; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5em;;padding:0.5em;'>{clase}</span>"
	article += "</div>"

	title = "NER en español"
	description = "Esta aplicación es para detección de entidades nombradas en Español"
	examples = ["Hola me llamo David Betancur y vivo en Madrid"]

	# =============== Modelo ===============

	model = AutoModelForTokenClassification.from_pretrained(model_name)
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	ner_pipe = pipeline("ner", model=model, tokenizer=tokenizer)

	# =============== Funcion ===============
	def ner(input_text):
	entities = ner_pipe(input_text, aggregation_strategy="first")

	doc = nlp(input_text)

	potential_entities = []

	for entity in entities:
	start = entity["start"]
	end = entity["end"]
	label = entity["entity_group"]

	ent = doc.char_span(start, end, label=label)
	if ent != None:
	doc.ents += (ent,)
	else:
	potential_entities.append(entity)

	potential_entities.append({"entity_group": "NONE", "start": -1, "end": -1})

	start = potential_entities[0]["start"]
	end = potential_entities[0]["end"]
	label = potential_entities[0]["entity_group"]

	for item in potential_entities:
	if item["entity_group"] == label and item["start"] == end:
	end = item["end"]
	continue
	else:
	if item["start"] != start:
	ent = doc.char_span(start, end, label=label)
	doc.ents += (ent,)

	start = item["start"]
	end = item["end"]
	label = item["entity_group"]

	options = {"ents": colors.keys(), "colors": colors}

	output = displacy.render(doc, style="ent", options=options)
	return output

	# ===============Interfaz ===============
	interface = gr.Interface(
	title=title,
	description=description,
	article=article,
	allow_screenshot=False,
	allow_flagging=False,
	fn=ner,
	inputs=gr.inputs.Textbox(placeholder="Insertar el texto para analizar", lines=10),
	outputs=gr.outputs.HTML(),
	examples=examples
	)

	interface.launch()