Mizo-MLM / app.py
robzchhangte's picture
Update app.py
01c0e5d verified
import os
import gradio as gr
from transformers import pipeline
# Set your Hugging Face token here
HUGGINGFACE_TOKEN = os.getenv("hf_token")
# Instantiate the model
model = pipeline(task="fill-mask",
model="robzchhangte/MizBERT",
tokenizer="robzchhangte/MizBERT",
use_auth_token=HUGGINGFACE_TOKEN ) # Use the token to authenticate
def fill_the_mask(text):
if "[MASK]" not in text:
return "You did not enter \"[MASK]\" in the text. Please write your text again!"
else:
# Apply the model
model_out = model(text)
# First sort the list of dictionaries according to the score
model_out = sorted(model_out, key=lambda x: x['score'], reverse=True)
# Create an HTML string to display the output with colored predictions
html_output = "<h3>Predicted Tokens</h3>"
# Iterate over the list of dictionaries and get the required output
for sub_dict in model_out:
# Color the predicted token
sequence = sub_dict["sequence"].replace("[MASK]", f"<span style='color:red;'><b>{sub_dict['token_str']}</b></span>")
html_output += f"<p>{sequence} - Score: {round(sub_dict['score'], 3)}</p>"
return html_output
examples = [["Chief Minister chuan, Bana Kaih Scheme a taka [MASK] a ni thei hnai ta chu lawmawm a tih thu a sawi a."],
["Thlai thar lei dan tur [MASK] inhrilhhriatna hun pawh an nei nghal a."]]
# Create a Gradio user interface
my_interface = gr.Interface(
title="MIZO Masked Language Model",
description="This is a fine-tune version of mBERT using Mizo Corpus. To test the app you can find Mizo text here: https://dipr.mizoram.gov.in/category/mizo-press-releases",
fn=fill_the_mask,
inputs="text",
examples=examples,
outputs="html" # Output type is changed to HTML
)
# Define the main function
if __name__ == "__main__":
# Launch the Gradio interface
my_interface.launch()