Spaces:

robzchhangte
/

Mizo-MLM

Sleeping

File size: 2,024 Bytes

ffee416
2d17b2b
04fada8
bf225b9
e4ecad3
1c90d93
5ab8121
e4ecad3
 
870c9e7
 
1c90d93
e4ecad3
 
 
 
 
 
 
5ab8121
e4ecad3
 
a1c1006
35a3419
 
4653e6f
e4ecad3
 
35a3419
 
 
e4ecad3
35a3419
 
 
 
 
e4ecad3
 
5203a2a
01c0e5d
e4ecad3
 
35a3419
 
e4ecad3

import os
import gradio as gr
from transformers import pipeline

# Set your Hugging Face token here
HUGGINGFACE_TOKEN = os.getenv("hf_token")

# Instantiate the model
model = pipeline(task="fill-mask",
                 model="robzchhangte/MizBERT",
                 tokenizer="robzchhangte/MizBERT",
                 use_auth_token=HUGGINGFACE_TOKEN )  # Use the token to authenticate

def fill_the_mask(text):
    if "[MASK]" not in text:
        return "You did not enter \"[MASK]\" in the text. Please write your text again!"
    else: 
        # Apply the model
        model_out = model(text)
    
        # First sort the list of dictionaries according to the score
        model_out = sorted(model_out, key=lambda x: x['score'], reverse=True)
    
        # Create an HTML string to display the output with colored predictions
        html_output = "<h3>Predicted Tokens</h3>"
    
        # Iterate over the list of dictionaries and get the required output
        for sub_dict in model_out:
            # Color the predicted token
            sequence = sub_dict["sequence"].replace("[MASK]", f"<span style='color:red;'><b>{sub_dict['token_str']}</b></span>")
            html_output += f"<p>{sequence} - Score: {round(sub_dict['score'], 3)}</p>"
            
        return html_output

examples = [["Chief Minister chuan, Bana Kaih Scheme a taka [MASK] a ni thei hnai ta chu lawmawm a tih thu a sawi a."], 
            ["Thlai thar lei dan tur [MASK] inhrilhhriatna hun pawh an nei nghal a."]]

# Create a Gradio user interface
my_interface = gr.Interface(
    title="MIZO Masked Language Model", 
    description="This is a fine-tune version of mBERT using Mizo Corpus. To test the app you can find Mizo text here: https://dipr.mizoram.gov.in/category/mizo-press-releases",
    fn=fill_the_mask, 
    inputs="text",
    examples=examples,
    outputs="html"  # Output type is changed to HTML
)    

# Define the main function
if __name__ == "__main__":
    # Launch the Gradio interface
    my_interface.launch()