Spaces:
Runtime error
Runtime error
File size: 3,118 Bytes
adf4ac7 cb8149d adf4ac7 58ef0b0 df89742 58ef0b0 3cf2a36 58ef0b0 3cf2a36 58ef0b0 3cf2a36 83f7a4f 3cf2a36 83f7a4f 3cf2a36 83f7a4f 3cf2a36 83f7a4f 3cf2a36 adf4ac7 83f7a4f 3cf2a36 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import streamlit as st
from transformers import pipeline
from ipymarkup import show_span_box_markup
# Load the pre-trained NER model
model = pipeline("ner", model="/home/user/app/mendobert/", tokenizer="indolem/indobert-base-uncased")
basemodel = pipeline("ner", model="/home/user/app/base-model/", tokenizer="indolem/indobert-base-uncased")
text = st.text_area('enter some text: ')
if text:
ner_results = model(text)
ner_results2 = basemodel(text)
# MendoBERT
formatted_results = []
for result in ner_results:
end = result["start"]+len(result["word"].replace("##", ""))
if result["word"].startswith("##"):
formatted_results[-1]["end"] = end
formatted_results[-1]["word"]+= result["word"].replace("##", "")
else:
formatted_results.append({
'start': result["start"],
'end': end,
'entity': result["entity"],
'index': result["index"],
'score': result["score"],
'word': result["word"]})
for result in formatted_results:
if result["entity"].startswith("LABEL_0"):
result["entity"] = "O"
elif result["entity"].startswith("LABEL_1"):
result["entity"] = "B"
elif result["entity"].startswith("LABEL_2"):
result["entity"] = "I"
mendo = []
spanMendo = []
for result in formatted_results:
if not result["entity"].startswith("O"):
spanMendo.append((result["start"],result["end"],result["entity"]))
mendo.append(f"""Entity: {result["entity"]}, Start:{result["start"]}, End:{result["end"]}, word:{text[result["start"]:result["end"]]}""")
# Base Model
formatted_results = []
for result in ner_results2:
end = result["start"]+len(result["word"].replace("##", ""))
if result["word"].startswith("##"):
formatted_results[-1]["end"] = end
formatted_results[-1]["word"]+= result["word"].replace("##", "")
else:
formatted_results.append({
'start': result["start"],
'end': end,
'entity': result["entity"],
'index': result["index"],
'score': result["score"],
'word': result["word"]})
for result in formatted_results:
if result["entity"].startswith("LABEL_0"):
result["entity"] = "O"
elif result["entity"].startswith("LABEL_1"):
result["entity"] = "B"
elif result["entity"].startswith("LABEL_2"):
result["entity"] = "I"
base=[]
spanBase=[]
for result in formatted_results:
if not result["entity"].startswith("O"):
spanBase.append((result["start"],result["end"],result["entity"]))
base.append(f"""Entity: {result["entity"]}, Start:{result["start"]}, End:{result["end"]}, word:{text[result["start"]:result["end"]]}""")
st.text(show_span_box_markup(text, spanMendo))
st.text(show_span_box_markup(text, spanBase))
|