import os
import gradio as gr
from transformers import pipeline

# Set your Hugging Face token here
HUGGINGFACE_TOKEN = os.getenv("hf_token")

# Instantiate the model
model = pipeline(task="fill-mask",
                 model="robzchhangte/MizBERT",
                 tokenizer="robzchhangte/MizBERT",
                 use_auth_token=HUGGINGFACE_TOKEN )  # Use the token to authenticate

def fill_the_mask(text):
    if "[MASK]" not in text:
        return "You did not enter \"[MASK]\" in the text. Please write your text again!"
    else: 
        # Apply the model
        model_out = model(text)
    
        # First sort the list of dictionaries according to the score
        model_out = sorted(model_out, key=lambda x: x['score'], reverse=True)
    
        # Create an HTML string to display the output with colored predictions
        html_output = "<h3>Predicted Tokens</h3>"
    
        # Iterate over the list of dictionaries and get the required output
        for sub_dict in model_out:
            # Color the predicted token
            sequence = sub_dict["sequence"].replace("[MASK]", f"<span style='color:red;'><b>{sub_dict['token_str']}</b></span>")
            html_output += f"<p>{sequence} - Score: {round(sub_dict['score'], 3)}</p>"
            
        return html_output

examples = [["Chief Minister chuan, Bana Kaih Scheme a taka [MASK] a ni thei hnai ta chu lawmawm a tih thu a sawi a."], 
            ["Thlai thar lei dan tur [MASK] inhrilhhriatna hun pawh an nei nghal a."]]

# Create a Gradio user interface
my_interface = gr.Interface(
    title="MIZO Masked Language Model", 
    description="This is a fine-tune version of mBERT using Mizo Corpus. To test the app you can find Mizo text here: https://dipr.mizoram.gov.in/category/mizo-press-releases",
    fn=fill_the_mask, 
    inputs="text",
    examples=examples,
    outputs="html"  # Output type is changed to HTML
)    

# Define the main function
if __name__ == "__main__":
    # Launch the Gradio interface
    my_interface.launch()