cc-bert / app.py
Shrey's picture
test version
1cfc7d3 verified
#the inference function
from transformers import FillMaskPipeline ,DistilBertTokenizer,TFAutoModelForMaskedLM,AutoTokenizer
from transformers import BertTokenizer
#load the tokenizer
tokenizer_path_1="./vocab.txt"
tokenizer_1 = BertTokenizer.from_pretrained(tokenizer_path_1)
#load the model path
model_path="./bert_lm_10"
model_1 = TFAutoModelForMaskedLM.from_pretrained(model_path)
#build the unmasker pipeline using HF for inference
unmasker = FillMaskPipeline(model=model_1,tokenizer=tokenizer_1)
#try on a sample of txt
txt="a polynomial [MASK] from 3-SAT." #reduction
#results=unmasker(txt,top_k=5)
#show the results
#for res in results:
#print(res["sequence"])
#print(res["score"])
#now for BERT on English
default_name="bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(default_name)
model = TFAutoModelForMaskedLM.from_pretrained(default_name)
unmasker_bert = FillMaskPipeline(model=model,tokenizer=tokenizer)
#make a function out of the unmasker
def unmask_words(txt_with_mask,k_suggestions=5):
results_cc=unmasker(txt_with_mask,top_k=k_suggestions)
labels={}
for res in results_cc:
labels["".join(res["token_str"].split(" "))]=res["score"]
results_bert=unmasker_bert(txt_with_mask,top_k=k_suggestions)
labels_bert={}
for res in results_bert:
labels_bert["".join(res["token_str"].split(" "))]=res["score"]
return labels,labels_bert
#trying our function
#val=unmask_words(txt)
import gradio as gr
description="""This is a demo to show the Masked Language model pretrained on data collected from ~197k papers on arXiv consisting of mathematical proofs and theorems.
The aim of this interface is to show the difference between English and scientific English pretraining.
For more information visit [Theoremkb Project](https://github.com/PierreSenellart/theoremkb)
or contact [[email protected]]([email protected]).
"""
examples=[["as pspace is [MASK] under complement."],
["n!-(n-1)[MASK]"],
["[MASK] these two classes is a major problem."],
["This would show that the polynomial hierarchy at the second [MASK], which is considered only"],
["""we consider two ways of measuring complexity, data complexity, which is with respect to the size of the data,
and their combined [MASK]"""]
]
input_box=gr.inputs.Textbox(lines=20,placeholder="Unifying computational entropies via Kullback–Leibler [MASK]",label="Enter the masked text:")
interface=gr.Interface(fn=unmask_words,inputs=[input_box,
gr.inputs.Slider(1,10,1,5,label="No of Suggestions:")],
outputs=[gr.outputs.Label(label="top words:"),gr.outputs.Label(label="top words eng-bert:")],
examples=examples,
theme="darkhuggingface",
title="CC-Bert MLM",description=description,allow_flagging=True)
interface.launch()