File size: 1,371 Bytes
ea90e06 e683309 ea90e06 a09216c 28b7f46 407249a a09216c ea90e06 538d7ca b46708b ea90e06 fa1c92c ea90e06 454fde7 ea90e06 407249a ccc474b 407249a ea90e06 67ec166 dee9089 e7bd68e 67ec166 0af80fa 67ec166 0af80fa 30ba48b ea90e06 407249a e7bd68e a09216c dee9089 c871b03 e7bd68e ccc474b 7078b67 cd7dcf3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
import streamlit as st
import transformers
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForMaskedLM
import pandas as pd
import numpy as np
import string
tokenizer = AutoTokenizer.from_pretrained("moussaKam/AraBART", max_length=128, padding=True, pad_to_max_length = True, truncation=True)
model = AutoModelForMaskedLM.from_pretrained("Hamda/test-1-finetuned-AraBART")
#@st.cache
def next_word(text, pipe):
res_dict= {
'Word':[],
'Score':[],
}
for e in pipe(text):
if e['token_str'] not in string.punctuation:
res_dict['Word'].append(e['token_str'])
res_dict['Score'].append(e['score'])
return res_dict
st.title("المساعدة اللغوية في التنبؤ بالمتلازمات والمتصاحبات والتعبيرات الاصطلاحية وتصحيحها")
default_value = "بيعت الأسلحة في السوق"
# sent is the variable holding the user's input
sent = st.text_area("مدخل", default_value, height=20)
st.checkbox('استعمال الرسم البياني', value=False)
text_st = sent+ ' <mask>'
pipe = pipeline("fill-mask", tokenizer=tokenizer, model=model, top_k=10)
dict_next_words = next_word(text_st, pipe)
df = pd.DataFrame.from_dict(dict_next_words)
df.reset_index(drop=True, inplace=True)
st.dataframe(df)
#st.table(df) |