File size: 1,206 Bytes
ea90e06
e683309
ea90e06
 
a09216c
28b7f46
a09216c
ea90e06
 
c59a4f8
ea90e06
fa1c92c
ea90e06
 
454fde7
 
ea90e06
ccc474b
454fde7
 
ea90e06
 
 
fa1c92c
ea90e06
e7bd68e
 
 
 
 
 
 
ea90e06
caa1fd8
e7bd68e
a09216c
e7bd68e
c871b03
e7bd68e
 
ccc474b
 
 
7078b67
cd7dcf3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import streamlit as st
import transformers
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForMaskedLM
import pandas as pd
import numpy as np


tokenizer = AutoTokenizer.from_pretrained("moussaKam/AraBART", padding= True, truncation=True, max_length=128)
model = AutoModelForMaskedLM.from_pretrained("moussaKam/AraBART")

#@st.cache
def next_word(text, pipe):
    res_dict= {  
  'Word':[],
  'Score':[],
  }
    for e in pipe(text):
        res_dict['Word'].append(e['token_str'])
        res_dict['Score'].append(e['score'])
    return res_dict

st.title("Predict Next Word")
st.write("Expand your query by leveraging various models")
default_value = "التاريخ هو تحليل و"
# sent is the variable holding the user's input
sent = st.text_area("Input", default_value, height=30)
if len(sent)>20:
   text_st = sent[-20:]
   text_st += ' <mask>'
else:
   text_st = sent+ ' <mask>'

pipe = pipeline("fill-mask", tokenizer=tokenizer, model=model)
dict_next_words = next_word(text_st, pipe)
df = pd.DataFrame.from_dict(dict_next_words)
#df.reset_index(drop=True, inplace=True)

st.dataframe(df)
st.bar_chart(df)
#st.dataframe(df)

#st.bar_chart(df)

#st.table(df)