File size: 2,024 Bytes
ffee416
2d17b2b
04fada8
bf225b9
e4ecad3
1c90d93
5ab8121
e4ecad3
 
870c9e7
 
1c90d93
e4ecad3
 
 
 
 
 
 
5ab8121
e4ecad3
 
a1c1006
35a3419
 
4653e6f
e4ecad3
 
35a3419
 
 
e4ecad3
35a3419
 
 
 
 
e4ecad3
 
5203a2a
01c0e5d
e4ecad3
 
35a3419
 
e4ecad3
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import os
import gradio as gr
from transformers import pipeline

# Set your Hugging Face token here
HUGGINGFACE_TOKEN = os.getenv("hf_token")

# Instantiate the model
model = pipeline(task="fill-mask",
                 model="robzchhangte/MizBERT",
                 tokenizer="robzchhangte/MizBERT",
                 use_auth_token=HUGGINGFACE_TOKEN )  # Use the token to authenticate

def fill_the_mask(text):
    if "[MASK]" not in text:
        return "You did not enter \"[MASK]\" in the text. Please write your text again!"
    else: 
        # Apply the model
        model_out = model(text)
    
        # First sort the list of dictionaries according to the score
        model_out = sorted(model_out, key=lambda x: x['score'], reverse=True)
    
        # Create an HTML string to display the output with colored predictions
        html_output = "<h3>Predicted Tokens</h3>"
    
        # Iterate over the list of dictionaries and get the required output
        for sub_dict in model_out:
            # Color the predicted token
            sequence = sub_dict["sequence"].replace("[MASK]", f"<span style='color:red;'><b>{sub_dict['token_str']}</b></span>")
            html_output += f"<p>{sequence} - Score: {round(sub_dict['score'], 3)}</p>"
            
        return html_output

examples = [["Chief Minister chuan, Bana Kaih Scheme a taka [MASK] a ni thei hnai ta chu lawmawm a tih thu a sawi a."], 
            ["Thlai thar lei dan tur [MASK] inhrilhhriatna hun pawh an nei nghal a."]]

# Create a Gradio user interface
my_interface = gr.Interface(
    title="MIZO Masked Language Model", 
    description="This is a fine-tune version of mBERT using Mizo Corpus. To test the app you can find Mizo text here: https://dipr.mizoram.gov.in/category/mizo-press-releases",
    fn=fill_the_mask, 
    inputs="text",
    examples=examples,
    outputs="html"  # Output type is changed to HTML
)    

# Define the main function
if __name__ == "__main__":
    # Launch the Gradio interface
    my_interface.launch()