import gradio as gr
import pandas as pd


from transformers import pipeline



pipe = pipeline("fill-mask", model="aminghias/Clinical-BERT-finetuned")
pipe2 = pipeline("fill-mask", model="emilyalsentzer/Bio_ClinicalBERT")
pipe3= pipeline("fill-mask", model="medicalai/ClinicalBERT")



def predict(text):

    pred1 = pipe(text)
    pred2 = pipe2(text)
    pred3= pipe3(text)
    
    df_sum=pd.DataFrame(pred1)
    
    df_sum
    df_sum['score_finetuned_CBERT']=df_sum['score']
    df_sum2=pd.DataFrame(pred2)
    df_sum2['score_Bio_CBERT']=df_sum2['score']
    df_sum2

    df_sum3= pd.DataFrame(pred3)
    df_sum3['score_CBERT']=df_sum3['score']
    
    # # join the two dataframes on token do outer join
    
    df_join=pd.merge(df_sum,df_sum2,on='token_str',how='outer')
    df_join=pd.merge(df_sum3,df_join,on='token_str',how='outer')
    
    df_join
    df_join['sum_sequence']=df_join['sequence_x'].fillna(df_join['sequence_y'])
    df_join['sum_sequence']=df_join['sum_sequence'].fillna(df_join['sequence'])
    df_join=df_join.fillna(0)
    df_join['score_average']=(df_join['score_finetuned_CBERT']+df_join['score_Bio_CBERT']+df_join['score_CBERT'])/3
    
    df_join=df_join.sort_values(by='score_average',ascending=False)
    df_join=df_join.reset_index(drop=True)

    df=df_join.copy()
    df_join=df_join[['token_str','score_average','score_finetuned_CBERT','score_Bio_CBERT','score_CBERT']].head()


    return (df['sum_sequence'][0],df_join)


    
demo = gr.Interface(
  fn=predict, 
  inputs='text',
  # outputs='text',
  outputs=['text', gr.Dataframe()],

  title="Filling Missing Clinical/Medical Data ",
    examples=[ ['The  high blood pressure was due to [MASK]  which is critical.'],
              ['The  patient is suffering from throat infection causing [MASK] and cough.']
             ],
    description="This application fills any missing words in the medical domain",

)

demo.launch()