import os import gradio as gr from transformers import pipeline # Set your Hugging Face token here HUGGINGFACE_TOKEN = os.getenv("hf_token") # Instantiate the model model = pipeline(task="fill-mask", model="robzchhangte/MizBERT", tokenizer="robzchhangte/MizBERT", use_auth_token=HUGGINGFACE_TOKEN ) # Use the token to authenticate def fill_the_mask(text): if "[MASK]" not in text: return "You did not enter \"[MASK]\" in the text. Please write your text again!" else: # Apply the model model_out = model(text) # First sort the list of dictionaries according to the score model_out = sorted(model_out, key=lambda x: x['score'], reverse=True) # Create an HTML string to display the output with colored predictions html_output = "

Predicted Tokens

" # Iterate over the list of dictionaries and get the required output for sub_dict in model_out: # Color the predicted token sequence = sub_dict["sequence"].replace("[MASK]", f"{sub_dict['token_str']}") html_output += f"

{sequence} - Score: {round(sub_dict['score'], 3)}

" return html_output examples = [["Chief Minister chuan, Bana Kaih Scheme a taka [MASK] a ni thei hnai ta chu lawmawm a tih thu a sawi a."], ["Thlai thar lei dan tur [MASK] inhrilhhriatna hun pawh an nei nghal a."]] # Create a Gradio user interface my_interface = gr.Interface( title="MIZO Masked Language Model", description="This is a fine-tune version of mBERT using Mizo Corpus. To test the app you can find Mizo text here: https://dipr.mizoram.gov.in/category/mizo-press-releases", fn=fill_the_mask, inputs="text", examples=examples, outputs="html" # Output type is changed to HTML ) # Define the main function if __name__ == "__main__": # Launch the Gradio interface my_interface.launch()