import streamlit as st from transformers import pipeline from ipymarkup import show_span_box_markup # Load the pre-trained NER model model = pipeline("ner", model="/home/user/app/mendobert/", tokenizer="indolem/indobert-base-uncased") basemodel = pipeline("ner", model="/home/user/app/base-model/", tokenizer="indolem/indobert-base-uncased") text = st.text_area('enter some text: ') if text: ner_results = model(text) ner_results2 = basemodel(text) # MendoBERT formatted_results = [] for result in ner_results: end = result["start"]+len(result["word"].replace("##", "")) if result["word"].startswith("##"): formatted_results[-1]["end"] = end formatted_results[-1]["word"]+= result["word"].replace("##", "") else: formatted_results.append({ 'start': result["start"], 'end': end, 'entity': result["entity"], 'index': result["index"], 'score': result["score"], 'word': result["word"]}) for result in formatted_results: if result["entity"].startswith("LABEL_0"): result["entity"] = "O" elif result["entity"].startswith("LABEL_1"): result["entity"] = "B" elif result["entity"].startswith("LABEL_2"): result["entity"] = "I" mendo = [] spanMendo = [] for result in formatted_results: if not result["entity"].startswith("O"): spanMendo.append((result["start"],result["end"],result["entity"])) mendo.append(f"""Entity: {result["entity"]}, Start:{result["start"]}, End:{result["end"]}, word:{text[result["start"]:result["end"]]}""") # Base Model formatted_results = [] for result in ner_results2: end = result["start"]+len(result["word"].replace("##", "")) if result["word"].startswith("##"): formatted_results[-1]["end"] = end formatted_results[-1]["word"]+= result["word"].replace("##", "") else: formatted_results.append({ 'start': result["start"], 'end': end, 'entity': result["entity"], 'index': result["index"], 'score': result["score"], 'word': result["word"]}) for result in formatted_results: if result["entity"].startswith("LABEL_0"): result["entity"] = "O" elif result["entity"].startswith("LABEL_1"): result["entity"] = "B" elif result["entity"].startswith("LABEL_2"): result["entity"] = "I" base=[] spanBase=[] for result in formatted_results: if not result["entity"].startswith("O"): spanBase.append((result["start"],result["end"],result["entity"])) base.append(f"""Entity: {result["entity"]}, Start:{result["start"]}, End:{result["end"]}, word:{text[result["start"]:result["end"]]}""") st.json(mendo) st.markdown(show_span_box_markup(text, spanMendo)) st.json(base) st.markdown(show_span_box_markup(text, spanBase))