Update app.py
Browse files
app.py
CHANGED
@@ -19,21 +19,29 @@ default_value = "أستاذ التعليم"
|
|
19 |
sent = st.text_area('المدخل',default_value)
|
20 |
|
21 |
tokenizer = AutoTokenizer.from_pretrained("moussaKam/AraBART", max_length=128, padding=True, pad_to_max_length = True, truncation=True)
|
|
|
22 |
model = AutoModelForMaskedLM.from_pretrained("Hamda/test-1-finetuned-AraBART")
|
23 |
pipe = pipeline("fill-mask", tokenizer=tokenizer, model=model, top_k=10)
|
|
|
24 |
def next_word(text, pipe):
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
34 |
if (st.button('بحث', disabled=False)):
|
|
|
35 |
text_st = sent+ ' <mask>'
|
|
|
36 |
dict_next_words = next_word(text_st, pipe)
|
|
|
37 |
df = pd.DataFrame.from_dict(dict_next_words)
|
38 |
df.reset_index(drop=True, inplace=True)
|
39 |
st.dataframe(df)
|
@@ -57,7 +65,12 @@ if (st.checkbox('الاستعانة بالرسم البياني المعرفي
|
|
57 |
|
58 |
#@st.cache
|
59 |
def Query2id(voc, query):
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
id_list = Query2id(df3, sent)
|
63 |
#@st.cache
|
@@ -87,13 +100,19 @@ if (st.checkbox('الاستعانة بالرسم البياني المعرفي
|
|
87 |
key = df3.loc[nc].values[0]
|
88 |
tmt[key] = score
|
89 |
return tmt
|
|
|
90 |
tmt = compute_score(L, id_list)
|
|
|
|
|
|
|
|
|
91 |
exp_terms = []
|
92 |
t_li = tmt.values()
|
93 |
tmexp = sorted(tmt.items(), key=lambda x: x[1], reverse=True)
|
94 |
i = 0
|
95 |
dict_res = {'الكلمة المقترحة':[],
|
96 |
'العلامة':[]}
|
|
|
97 |
for key, value in tmexp:
|
98 |
new_score=((value-min(t_li))/(max(t_li)-min(t_li)))-0.0001
|
99 |
dict_res['العلامة'].append(str(new_score)[:6])
|
@@ -101,6 +120,7 @@ if (st.checkbox('الاستعانة بالرسم البياني المعرفي
|
|
101 |
i+=1
|
102 |
if (i==10):
|
103 |
break
|
|
|
104 |
res_df = pd.DataFrame.from_dict(dict_res)
|
105 |
res_df.index += 1
|
106 |
|
|
|
19 |
sent = st.text_area('المدخل',default_value)
|
20 |
|
21 |
tokenizer = AutoTokenizer.from_pretrained("moussaKam/AraBART", max_length=128, padding=True, pad_to_max_length = True, truncation=True)
|
22 |
+
|
23 |
model = AutoModelForMaskedLM.from_pretrained("Hamda/test-1-finetuned-AraBART")
|
24 |
pipe = pipeline("fill-mask", tokenizer=tokenizer, model=model, top_k=10)
|
25 |
+
|
26 |
def next_word(text, pipe):
|
27 |
+
|
28 |
+
filter_words = list(string.punctuation).extend(sent)
|
29 |
+
res_dict= {
|
30 |
+
'الكلمة المقترحة':[],
|
31 |
+
'العلامة':[],
|
32 |
+
}
|
33 |
+
for e in pipe(text):
|
34 |
+
if all(c not in filter_words for c in e['token_str']):
|
35 |
+
res_dict['الكلمة المقترحة'].append(e['token_str'])
|
36 |
+
res_dict['العلامة'].append(e['score'])
|
37 |
+
return res_dict
|
38 |
+
|
39 |
if (st.button('بحث', disabled=False)):
|
40 |
+
|
41 |
text_st = sent+ ' <mask>'
|
42 |
+
|
43 |
dict_next_words = next_word(text_st, pipe)
|
44 |
+
|
45 |
df = pd.DataFrame.from_dict(dict_next_words)
|
46 |
df.reset_index(drop=True, inplace=True)
|
47 |
st.dataframe(df)
|
|
|
65 |
|
66 |
#@st.cache
|
67 |
def Query2id(voc, query):
|
68 |
+
try:
|
69 |
+
return [voc.index[voc['word'] == word].values[0] for word in query.split()]
|
70 |
+
except KeyError:
|
71 |
+
st.markdown("""---""")
|
72 |
+
st.write('لم يتم التعرف على الكلمة')
|
73 |
+
st.markdown("""---""")
|
74 |
|
75 |
id_list = Query2id(df3, sent)
|
76 |
#@st.cache
|
|
|
100 |
key = df3.loc[nc].values[0]
|
101 |
tmt[key] = score
|
102 |
return tmt
|
103 |
+
|
104 |
tmt = compute_score(L, id_list)
|
105 |
+
|
106 |
+
for e in sent:
|
107 |
+
del tmt[e]
|
108 |
+
|
109 |
exp_terms = []
|
110 |
t_li = tmt.values()
|
111 |
tmexp = sorted(tmt.items(), key=lambda x: x[1], reverse=True)
|
112 |
i = 0
|
113 |
dict_res = {'الكلمة المقترحة':[],
|
114 |
'العلامة':[]}
|
115 |
+
|
116 |
for key, value in tmexp:
|
117 |
new_score=((value-min(t_li))/(max(t_li)-min(t_li)))-0.0001
|
118 |
dict_res['العلامة'].append(str(new_score)[:6])
|
|
|
120 |
i+=1
|
121 |
if (i==10):
|
122 |
break
|
123 |
+
|
124 |
res_df = pd.DataFrame.from_dict(dict_res)
|
125 |
res_df.index += 1
|
126 |
|