File size: 2,922 Bytes
ea90e06 e683309 ea90e06 a09216c 407249a a09216c 5d2b0fe 12fc412 538d7ca 5d2b0fe ea90e06 d8f9f62 a8e534e be34b4b a8e534e be34b4b a8e534e 7078b67 cd7dcf3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import streamlit as st
import transformers
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForMaskedLM
import pandas as pd
import string
st.title("المساعدة اللغوية في التنبؤ بالمتلازمات والمتصاحبات وتصحيحها")
default_value = "بيعت الأسلحة في السوق"
# sent is the variable holding the user's input
sent = st.text_area("مدخل", default_value, height=20)
tokenizer = AutoTokenizer.from_pretrained("moussaKam/AraBART", max_length=128, padding=True, pad_to_max_length = True, truncation=True)
model = AutoModelForMaskedLM.from_pretrained("Hamda/test-1-finetuned-AraBART")
#@st.cache
if (st.button('بحث', disabled=False)):
def next_word(text, pipe):
res_dict= {
'Word':[],
'Score':[],
}
for e in pipe(text):
if all(c not in list(string.punctuation) for c in e['token_str']):
res_dict['Word'].append(e['token_str'])
res_dict['Score'].append(e['score'])
return res_dict
text_st = sent+ ' <mask>'
pipe = pipeline("fill-mask", tokenizer=tokenizer, model=model, top_k=10)
dict_next_words = next_word(text_st, pipe)
df = pd.DataFrame.from_dict(dict_next_words)
df.reset_index(drop=True, inplace=True)
st.dataframe(df)
if (st.button('استعمال الرسم البياني', disabled=False)):
tmt = {}
VocMap = './voc.csv'
ScoreMap = './BM25.csv'
df3 = pd.read_csv(VocMap, delimiter='\t')
df_g = pd.read_csv(ScoreMap, delimiter='\t')
df_g.set_index(['ID1','ID2'], inplace=True)
df_in = pd.read_csv(ScoreMap, delimiter='\t')
df_in.set_index(['ID1'], inplace=True)
def Query2id(voc, query):
return [voc.index[voc['word'] == word].values[0] for word in query.split()]
id_list = Query2id(df3, sent)
def setQueriesVoc(df, id_list):
res = []
for e in id_list:
res.extend(list(df.loc[e]['ID2'].values))
return list(set(res))
L = setQueriesVoc(df_in, id_list)
for nc in L:
score = 0.0
temp = []
for ni in id_list:
try:
score = score + df_g.loc[(ni, nc),'score']
except KeyError:
continue
key = df3.loc[nc].values[0]
tmt[key] = score
exp_terms = []
t_li = tmt.values()
tmexp = sorted(tmt.items(), key=lambda x: x[1], reverse=True)
i = 0
dict_res = {'word':[], 'score':[]}
for key, value in tmexp:
new_score=((value-min(t_li))/(max(t_li)-min(t_li)))-0.0001
dict_res['score'].append(str(new_score)[:6])
dict_res['word'].append(key)
i+=1
if (i==10):
break
res_df = pd.DataFrame.from_dict(dict_res)
res_df.index += 1
st.dataframe(res_df)
#st.table(df) |