Spaces:
Sleeping
Sleeping
initial
Browse files- README.md +1 -1
- app.py +60 -0
- requirements.txt +3 -0
README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
title: Person Searcher
|
3 |
-
emoji:
|
4 |
colorFrom: blue
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
|
|
1 |
---
|
2 |
title: Person Searcher
|
3 |
+
emoji: 👩🦰
|
4 |
colorFrom: blue
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
app.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import pipeline, BertForTokenClassification, BertTokenizerFast
|
3 |
+
|
4 |
+
def pretty_print_results(results):
|
5 |
+
output = []
|
6 |
+
for r in results:
|
7 |
+
output.append([r["entity"], f"{r['word']}"])
|
8 |
+
|
9 |
+
total = ""
|
10 |
+
for word in output:
|
11 |
+
total += (" " if not word[1].__contains__("##") else "") + (f"<span class='{word[0]}'>" if word[0] != "LABEL_2" else "") + word[1].replace("##", "") + ("</span>" if word[0] != "LABEL_2" else "")
|
12 |
+
|
13 |
+
result = total[1:].replace(" , ", ", ").replace(" . ", ". ").replace(" ? ", "? ").replace(" : ", ": ").replace(" r $ ", " R$ ").replace(" !", "!")
|
14 |
+
result = result.replace("¶", "<br></br>")
|
15 |
+
return result + """
|
16 |
+
<style>
|
17 |
+
.LABEL_0{background:green!important;color:white;}
|
18 |
+
.LABEL_1{background:blue!important;color:white ;}
|
19 |
+
</style>
|
20 |
+
"""
|
21 |
+
|
22 |
+
model_name = "rafola/BERT-base-pt-BR-person"
|
23 |
+
model = BertForTokenClassification.from_pretrained(model_name)
|
24 |
+
tokenizer = BertTokenizerFast.from_pretrained(model_name, use_fast=True)
|
25 |
+
|
26 |
+
nlp = pipeline("ner", model=model, tokenizer=tokenizer)
|
27 |
+
|
28 |
+
def greet(text):
|
29 |
+
sentences = text.split("\n")
|
30 |
+
results = []
|
31 |
+
for sentence in sentences:
|
32 |
+
results.append(nlp(sentence))
|
33 |
+
|
34 |
+
returned = ""
|
35 |
+
|
36 |
+
for result in results:
|
37 |
+
returned += pretty_print_results(result) + "<br/>"
|
38 |
+
return returned
|
39 |
+
|
40 |
+
with gr.Blocks() as demo:
|
41 |
+
gr.Markdown("# Named Entity Recognition (NER) - pt-BR")
|
42 |
+
gr.Markdown("This model is fine-tuned to primarily identify Brazilian names, ignoring street and place names, even if they contain a person's name.")
|
43 |
+
text_input = gr.Textbox(lines=5, placeholder="Type here...")
|
44 |
+
|
45 |
+
example_1 = "Recebi um convite ontem de um tal Marcos Souza Lima, o nome não me é estranho, acho que é o sobrinho da Tânia. Talvez seja, o Marcos mora na Rua das Palmeiras, e tem fotos do Parque João Costa."
|
46 |
+
example_2 = "Confirmados: \nAna Beatriz\nLucas Silva\nMariana Gomes\nFelipe Santos\nCamila Rodrigues\nTalvez:\nGustavo Ferreira\nJuliana Costa\nNão podem comparecer:\nRicardo Almeida\nVanessa Lima"
|
47 |
+
example_3 = "O presidente dos Estados Unidos fez um discurso importante ontem."
|
48 |
+
example_4 = "fomos na carreata que tinha na Rua João Avelange, o Renato e a Bia apareceu também..."
|
49 |
+
|
50 |
+
with gr.Row():
|
51 |
+
gr.Button("Exemplo 1").click(lambda: example_1, outputs=text_input)
|
52 |
+
gr.Button("Exemplo 2").click(lambda: example_2, outputs=text_input)
|
53 |
+
gr.Button("Exemplo 3").click(lambda: example_3, outputs=text_input)
|
54 |
+
gr.Button("Exemplo 4").click(lambda: example_4, outputs=text_input)
|
55 |
+
|
56 |
+
output_html = gr.HTML()
|
57 |
+
|
58 |
+
gr.Button("Process").click(fn=greet, inputs=text_input, outputs=output_html)
|
59 |
+
|
60 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
tranformers
|
2 |
+
gradio
|
3 |
+
torch
|