Spaces:

Shrey
/

cc-bert

Sleeping

App Files Files Community

cc-bert / app.py

Shrey

test version

1cfc7d3 verified 11 months ago

raw

history blame contribute delete

3.01 kB

	#the inference function
	from transformers import FillMaskPipeline ,DistilBertTokenizer,TFAutoModelForMaskedLM,AutoTokenizer
	from transformers import BertTokenizer

	#load the tokenizer
	tokenizer_path_1="./vocab.txt"
	tokenizer_1 = BertTokenizer.from_pretrained(tokenizer_path_1)

	#load the model path
	model_path="./bert_lm_10"
	model_1 = TFAutoModelForMaskedLM.from_pretrained(model_path)

	#build the unmasker pipeline using HF for inference
	unmasker = FillMaskPipeline(model=model_1,tokenizer=tokenizer_1)

	#try on a sample of txt
	txt="a polynomial [MASK] from 3-SAT." #reduction
	#results=unmasker(txt,top_k=5)

	#show the results
	#for res in results:
	#print(res["sequence"])
	#print(res["score"])

	#now for BERT on English
	default_name="bert-base-uncased"

	tokenizer = AutoTokenizer.from_pretrained(default_name)

	model = TFAutoModelForMaskedLM.from_pretrained(default_name)
	unmasker_bert = FillMaskPipeline(model=model,tokenizer=tokenizer)

	#make a function out of the unmasker
	def unmask_words(txt_with_mask,k_suggestions=5):
	results_cc=unmasker(txt_with_mask,top_k=k_suggestions)

	labels={}
	for res in results_cc:
	labels["".join(res["token_str"].split(" "))]=res["score"]

	results_bert=unmasker_bert(txt_with_mask,top_k=k_suggestions)

	labels_bert={}
	for res in results_bert:
	labels_bert["".join(res["token_str"].split(" "))]=res["score"]

	return labels,labels_bert



	#trying our function
	#val=unmask_words(txt)

	import gradio as gr
	description="""This is a demo to show the Masked Language model pretrained on data collected from ~197k papers on arXiv consisting of mathematical proofs and theorems.
	The aim of this interface is to show the difference between English and scientific English pretraining.
	For more information visit [Theoremkb Project](https://github.com/PierreSenellart/theoremkb)
	or contact [[email protected]]([email protected]).
	"""

	examples=[["as pspace is [MASK] under complement."],
	["n!-(n-1)[MASK]"],
	["[MASK] these two classes is a major problem."],
	["This would show that the polynomial hierarchy at the second [MASK], which is considered only"],
	["""we consider two ways of measuring complexity, data complexity, which is with respect to the size of the data,
	and their combined [MASK]"""]
	]



	input_box=gr.inputs.Textbox(lines=20,placeholder="Unifying computational entropies via Kullback–Leibler [MASK]",label="Enter the masked text:")
	interface=gr.Interface(fn=unmask_words,inputs=[input_box,
	gr.inputs.Slider(1,10,1,5,label="No of Suggestions:")],
	outputs=[gr.outputs.Label(label="top words:"),gr.outputs.Label(label="top words eng-bert:")],
	examples=examples,
	theme="darkhuggingface",
	title="CC-Bert MLM",description=description,allow_flagging=True)

	interface.launch()