Spaces:
Runtime error
Runtime error
File size: 4,567 Bytes
adf4ac7 062527a adf4ac7 58ef0b0 df89742 58ef0b0 50e8f91 599660b a445fea faf86dc a445fea faf86dc a445fea 3cf2a36 58ef0b0 3cf2a36 83f7a4f 3cf2a36 83f7a4f 15aafe1 3cf2a36 83f7a4f 3cf2a36 83f7a4f afff5b8 adf4ac7 96898b2 dcb64e4 44854e8 08b8ceb dcb64e4 b7f70e6 8dd94d9 f185ae2 c22d8af a445fea b7f70e6 8dd94d9 dcb64e4 3cf2a36 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
import streamlit as st
from transformers import pipeline
from ipymarkup import format_span_box_markup
# Load the pre-trained NER model
model = pipeline("ner", model="/home/user/app/mendobert/", tokenizer="indolem/indobert-base-uncased")
basemodel = pipeline("ner", model="/home/user/app/base-model/", tokenizer="indolem/indobert-base-uncased")
st.title(':blue[MendoBERT] - Named Entity Recognition :sunglasses:')
if 'options' not in st.session_state:
st.session_state['options'] = ""
def button1_callback():
st.session_state['options'] = "Aspartylglucosaminuria (AGU) adalah gangguan metabolisme glikoprotein langka."
def button2_callback():
st.session_state['options'] = "Mutasi germ - line dari gen BRCA1 membuat wanita cenderung mengalami kanker payudara dini dengan mengorbankan fungsi presumtif gen sebagai penekan tumor."
placeholder = st.empty()
st.caption('_Examples_')
st.button('Aspartylglucosaminuria (AGU) adalah gangguan metabolisme glikoprotein langka.', use_container_width=True, on_click = button1_callback)
st.button('Mutasi germ - line dari gen BRCA1 membuat wanita cenderung mengalami kanker payudara dini dengan mengorbankan fungsi presumtif gen sebagai penekan tumor.', use_container_width=True, on_click = button2_callback)
with placeholder:
text = st.text_area('Enter some text: ', key = 'options')
if text:
ner_results = model(text)
ner_results2 = basemodel(text)
# MendoBERT
formatted_results = []
for result in ner_results:
end = result["start"]+len(result["word"].replace("##", ""))
if result["word"].startswith("##"):
formatted_results[-1]["end"] = end
formatted_results[-1]["word"]+= result["word"].replace("##", "")
else:
formatted_results.append({
'start': result["start"],
'end': end,
'entity': result["entity"],
'index': result["index"],
'score': result["score"],
'word': result["word"]})
for result in formatted_results:
if result["entity"].startswith("LABEL_0"):
result["entity"] = "O"
elif result["entity"].startswith("LABEL_1"):
result["entity"] = "B"
elif result["entity"].startswith("LABEL_2"):
result["entity"] = "I"
mendo = []
spanMendo = []
for result in formatted_results:
if not result["entity"].startswith("O"):
spanMendo.append((result["start"],result["end"],result["entity"]))
mendo.append(f"""Entity: {result["entity"]}, Start:{result["start"]}, End:{result["end"]}, word:{text[result["start"]:result["end"]]}, score:{result["score"]}""")
# Base Model
formatted_results = []
for result in ner_results2:
end = result["start"]+len(result["word"].replace("##", ""))
if result["word"].startswith("##"):
formatted_results[-1]["end"] = end
formatted_results[-1]["word"]+= result["word"].replace("##", "")
else:
formatted_results.append({
'start': result["start"],
'end': end,
'entity': result["entity"],
'index': result["index"],
'score': result["score"],
'word': result["word"]})
for result in formatted_results:
if result["entity"].startswith("LABEL_0"):
result["entity"] = "O"
elif result["entity"].startswith("LABEL_1"):
result["entity"] = "B"
elif result["entity"].startswith("LABEL_2"):
result["entity"] = "I"
base=[]
spanBase=[]
for result in formatted_results:
if not result["entity"].startswith("O"):
spanBase.append((result["start"],result["end"],result["entity"]))
base.append(f"""Entity: {result["entity"]}, Start:{result["start"]}, End:{result["end"]}, word:{text[result["start"]:result["end"]]}, score:{result["score"]}""")
formatMendo = format_span_box_markup(text, spanMendo)
htmlMendo = ''.join(formatMendo)
formatBase = format_span_box_markup(text, spanBase)
htmlBase = ''.join(formatBase)
st.subheader('MendoBERT')
st.json(mendo)
st.markdown(htmlMendo,unsafe_allow_html=True)
st.subheader('IndoLEM')
st.json(base)
st.markdown(htmlBase,unsafe_allow_html=True)
st.write("\n")
st.info("'B' means Beginning of an entity, 'I' means Inside of an entity", icon="ℹ️")
text = False
st.write("\n\n")
|