Spaces:

robzchhangte
/

Mizo-MLM

Running

App Files Files Community

Mizo-MLM / app.py

robzchhangte

Update app.py

01c0e5d verified 4 months ago

raw

history blame contribute delete

2.02 kB

	import os
	import gradio as gr
	from transformers import pipeline

	# Set your Hugging Face token here
	HUGGINGFACE_TOKEN = os.getenv("hf_token")

	# Instantiate the model
	model = pipeline(task="fill-mask",
	model="robzchhangte/MizBERT",
	tokenizer="robzchhangte/MizBERT",
	use_auth_token=HUGGINGFACE_TOKEN ) # Use the token to authenticate

	def fill_the_mask(text):
	if "[MASK]" not in text:
	return "You did not enter \"[MASK]\" in the text. Please write your text again!"
	else:
	# Apply the model
	model_out = model(text)

	# First sort the list of dictionaries according to the score
	model_out = sorted(model_out, key=lambda x: x['score'], reverse=True)

	# Create an HTML string to display the output with colored predictions
	html_output = "<h3>Predicted Tokens</h3>"

	# Iterate over the list of dictionaries and get the required output
	for sub_dict in model_out:
	# Color the predicted token
	sequence = sub_dict["sequence"].replace("[MASK]", f"<span style='color:red;'><b>{sub_dict['token_str']}</b></span>")
	html_output += f"<p>{sequence} - Score: {round(sub_dict['score'], 3)}</p>"

	return html_output

	examples = [["Chief Minister chuan, Bana Kaih Scheme a taka [MASK] a ni thei hnai ta chu lawmawm a tih thu a sawi a."],
	["Thlai thar lei dan tur [MASK] inhrilhhriatna hun pawh an nei nghal a."]]

	# Create a Gradio user interface
	my_interface = gr.Interface(
	title="MIZO Masked Language Model",
	description="This is a fine-tune version of mBERT using Mizo Corpus. To test the app you can find Mizo text here: https://dipr.mizoram.gov.in/category/mizo-press-releases",
	fn=fill_the_mask,
	inputs="text",
	examples=examples,
	outputs="html" # Output type is changed to HTML
	)

	# Define the main function
	if __name__ == "__main__":
	# Launch the Gradio interface
	my_interface.launch()