Spaces:
Runtime error
Runtime error
import streamlit as st | |
from transformers import pipeline | |
from ipymarkup import show_span_box_markup | |
# Load the pre-trained NER model | |
model = pipeline("ner", model="/home/user/app/mendobert/", tokenizer="indolem/indobert-base-uncased") | |
basemodel = pipeline("ner", model="/home/user/app/base-model/", tokenizer="indolem/indobert-base-uncased") | |
text = st.text_area('enter some text: ') | |
if text: | |
ner_results = model(text) | |
ner_results2 = basemodel(text) | |
# MendoBERT | |
formatted_results = [] | |
for result in ner_results: | |
end = result["start"]+len(result["word"].replace("##", "")) | |
if result["word"].startswith("##"): | |
formatted_results[-1]["end"] = end | |
formatted_results[-1]["word"]+= result["word"].replace("##", "") | |
else: | |
formatted_results.append({ | |
'start': result["start"], | |
'end': end, | |
'entity': result["entity"], | |
'index': result["index"], | |
'score': result["score"], | |
'word': result["word"]}) | |
for result in formatted_results: | |
if result["entity"].startswith("LABEL_0"): | |
result["entity"] = "O" | |
elif result["entity"].startswith("LABEL_1"): | |
result["entity"] = "B" | |
elif result["entity"].startswith("LABEL_2"): | |
result["entity"] = "I" | |
mendo = [] | |
spanMendo = [] | |
for result in formatted_results: | |
if not result["entity"].startswith("O"): | |
spanMendo.append((result["start"],result["end"],result["entity"])) | |
mendo.append(f"""Entity: {result["entity"]}, Start:{result["start"]}, End:{result["end"]}, word:{text[result["start"]:result["end"]]}""") | |
# Base Model | |
formatted_results = [] | |
for result in ner_results2: | |
end = result["start"]+len(result["word"].replace("##", "")) | |
if result["word"].startswith("##"): | |
formatted_results[-1]["end"] = end | |
formatted_results[-1]["word"]+= result["word"].replace("##", "") | |
else: | |
formatted_results.append({ | |
'start': result["start"], | |
'end': end, | |
'entity': result["entity"], | |
'index': result["index"], | |
'score': result["score"], | |
'word': result["word"]}) | |
for result in formatted_results: | |
if result["entity"].startswith("LABEL_0"): | |
result["entity"] = "O" | |
elif result["entity"].startswith("LABEL_1"): | |
result["entity"] = "B" | |
elif result["entity"].startswith("LABEL_2"): | |
result["entity"] = "I" | |
base=[] | |
spanBase=[] | |
for result in formatted_results: | |
if not result["entity"].startswith("O"): | |
spanBase.append((result["start"],result["end"],result["entity"])) | |
base.append(f"""Entity: {result["entity"]}, Start:{result["start"]}, End:{result["end"]}, word:{text[result["start"]:result["end"]]}""") | |
st.text(show_span_box_markup(text, spanMendo)) | |
st.text(show_span_box_markup(text, spanBase)) | |