|
|
|
from transformers import FillMaskPipeline ,DistilBertTokenizer,TFAutoModelForMaskedLM,AutoTokenizer |
|
from transformers import BertTokenizer |
|
|
|
|
|
tokenizer_path_1="./vocab.txt" |
|
tokenizer_1 = BertTokenizer.from_pretrained(tokenizer_path_1) |
|
|
|
|
|
model_path="./bert_lm_10" |
|
model_1 = TFAutoModelForMaskedLM.from_pretrained(model_path) |
|
|
|
|
|
unmasker = FillMaskPipeline(model=model_1,tokenizer=tokenizer_1) |
|
|
|
|
|
txt="a polynomial [MASK] from 3-SAT." |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
default_name="bert-base-uncased" |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(default_name) |
|
|
|
model = TFAutoModelForMaskedLM.from_pretrained(default_name) |
|
unmasker_bert = FillMaskPipeline(model=model,tokenizer=tokenizer) |
|
|
|
|
|
def unmask_words(txt_with_mask,k_suggestions=5): |
|
results_cc=unmasker(txt_with_mask,top_k=k_suggestions) |
|
|
|
labels={} |
|
for res in results_cc: |
|
labels["".join(res["token_str"].split(" "))]=res["score"] |
|
|
|
results_bert=unmasker_bert(txt_with_mask,top_k=k_suggestions) |
|
|
|
labels_bert={} |
|
for res in results_bert: |
|
labels_bert["".join(res["token_str"].split(" "))]=res["score"] |
|
|
|
return labels,labels_bert |
|
|
|
|
|
|
|
|
|
|
|
|
|
import gradio as gr |
|
description="""This is a demo to show the Masked Language model pretrained on data collected from ~197k papers on arXiv consisting of mathematical proofs and theorems. |
|
The aim of this interface is to show the difference between English and scientific English pretraining. |
|
For more information visit [Theoremkb Project](https://github.com/PierreSenellart/theoremkb) |
|
or contact [[email protected]]([email protected]). |
|
""" |
|
|
|
examples=[["as pspace is [MASK] under complement."], |
|
["n!-(n-1)[MASK]"], |
|
["[MASK] these two classes is a major problem."], |
|
["This would show that the polynomial hierarchy at the second [MASK], which is considered only"], |
|
["""we consider two ways of measuring complexity, data complexity, which is with respect to the size of the data, |
|
and their combined [MASK]"""] |
|
] |
|
|
|
|
|
|
|
input_box=gr.inputs.Textbox(lines=20,placeholder="Unifying computational entropies via Kullback–Leibler [MASK]",label="Enter the masked text:") |
|
interface=gr.Interface(fn=unmask_words,inputs=[input_box, |
|
gr.inputs.Slider(1,10,1,5,label="No of Suggestions:")], |
|
outputs=[gr.outputs.Label(label="top words:"),gr.outputs.Label(label="top words eng-bert:")], |
|
examples=examples, |
|
theme="darkhuggingface", |
|
title="CC-Bert MLM",description=description,allow_flagging=True) |
|
|
|
interface.launch() |