Spaces:

Hamda
/

AraJARIR

Runtime error

App Files Files Community

Hamda commited on May 29, 2022

Commit

a8e534e

1 Parent(s): be34b4b

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -70

app.py CHANGED Viewed

@@ -11,81 +11,80 @@ default_value = "بيعت الأسلحة في السوق"
 # sent is the variable holding the user's input
 sent = st.text_area("مدخل", default_value, height=20)
-st.checkbox('استعمال الرسم البياني', value=False)
-tmt = {}
-VocMap = './voc.csv'
-ibra_gr = './BM25.csv'
-df3 = pd.read_csv(VocMap, delimiter='\t')
-df_g = pd.read_csv(ibra_gr, delimiter='\t')
-df_g.set_index(['ID1','ID2'], inplace=True)
-df_in = pd.read_csv(ibra_gr, delimiter='\t')
-df_in.set_index(['ID1'], inplace=True)
-def Query2id(voc, query):
-    return [voc.index[voc['word'] == word].values[0] for word in query.split()]
-id_list = Query2id(df3, sent)
-def setQueriesVoc(df, id_list):
-    res = []
-    for e in id_list:
-        res.extend(list(df.loc[e]['ID2'].values))
-    return list(set(res))
-L = setQueriesVoc(df_in, id_list)
 tokenizer = AutoTokenizer.from_pretrained("moussaKam/AraBART", max_length=128, padding=True, pad_to_max_length = True, truncation=True)
 model = AutoModelForMaskedLM.from_pretrained("Hamda/test-1-finetuned-AraBART")
 #@st.cache
-def next_word(text, pipe):
-    res_dict= {
-      'Word':[],
-      'Score':[],
-    }
-    for e in pipe(text):
-        if all(c not in list(string.punctuation) for c in e['token_str']):
-            res_dict['Word'].append(e['token_str'])
-            res_dict['Score'].append(e['score'])
-    return res_dict
-text_st = sent+ ' <mask>'
-pipe = pipeline("fill-mask", tokenizer=tokenizer, model=model, top_k=10)
-dict_next_words = next_word(text_st, pipe)
-df = pd.DataFrame.from_dict(dict_next_words)
-df.reset_index(drop=True, inplace=True)
-for nc in L:
-    score = 0.0
-    temp = []
-    for ni in id_list:
-        try:
-            score = score + df_g.loc[(ni, nc),'score']
-        except KeyError:
-            continue
-    key  = df3.loc[nc].values[0]
-    tmt[key] = score
-exp_terms = []
-t_li = tmt.values()
-tmexp = sorted(tmt.items(), key=lambda x: x[1], reverse=True)
-i = 0
-dict_res = {'word':[], 'score':[]}
-for key, value in tmexp:
-    new_score=((value-min(t_li))/(max(t_li)-min(t_li)))-0.0001
-    dict_res['score'].append(str(new_score)[:6])
-    dict_res['word'].append(key)
-    i+=1
-    if (i==10):
-        break
-res_df = pd.DataFrame.from_dict(dict_res)
-res_df.index += 1
-st.dataframe(df)
-st.dataframe(res_df)
 #st.table(df)

 # sent is the variable holding the user's input
 sent = st.text_area("مدخل", default_value, height=20)
 tokenizer = AutoTokenizer.from_pretrained("moussaKam/AraBART", max_length=128, padding=True, pad_to_max_length = True, truncation=True)
 model = AutoModelForMaskedLM.from_pretrained("Hamda/test-1-finetuned-AraBART")
 #@st.cache
+if (st.button('بحث', disabled=False)):
+    def next_word(text, pipe):
+        res_dict= {
+          'Word':[],
+          'Score':[],
+        }
+        for e in pipe(text):
+            if all(c not in list(string.punctuation) for c in e['token_str']):
+                res_dict['Word'].append(e['token_str'])
+                res_dict['Score'].append(e['score'])
+        return res_dict
+    text_st = sent+ ' <mask>'
+    pipe = pipeline("fill-mask", tokenizer=tokenizer, model=model, top_k=10)
+    dict_next_words = next_word(text_st, pipe)
+    df = pd.DataFrame.from_dict(dict_next_words)
+    df.reset_index(drop=True, inplace=True)
+    st.dataframe(df)
+if (st.button('استعمال الرسم البياني', disabled=False)):
+    tmt = {}
+    VocMap = './voc.csv'
+    ScoreMap = './BM25.csv'
+    df3 = pd.read_csv(VocMap, delimiter='\t')
+    df_g = pd.read_csv(ScoreMap, delimiter='\t')
+    df_g.set_index(['ID1','ID2'], inplace=True)
+    df_in = pd.read_csv(ScoreMap, delimiter='\t')
+    df_in.set_index(['ID1'], inplace=True)
+    def Query2id(voc, query):
+        return [voc.index[voc['word'] == word].values[0] for word in query.split()]
+    id_list = Query2id(df3, sent)
+    def setQueriesVoc(df, id_list):
+        res = []
+        for e in id_list:
+            res.extend(list(df.loc[e]['ID2'].values))
+        return list(set(res))
+    L = setQueriesVoc(df_in, id_list)
+    for nc in L:
+        score = 0.0
+        temp = []
+        for ni in id_list:
+            try:
+                score = score + df_g.loc[(ni, nc),'score']
+            except KeyError:
+                continue
+        key  = df3.loc[nc].values[0]
+        tmt[key] = score
+    exp_terms = []
+    t_li = tmt.values()
+    tmexp = sorted(tmt.items(), key=lambda x: x[1], reverse=True)
+    i = 0
+    dict_res = {'word':[], 'score':[]}
+    for key, value in tmexp:
+        new_score=((value-min(t_li))/(max(t_li)-min(t_li)))-0.0001
+        dict_res['score'].append(str(new_score)[:6])
+        dict_res['word'].append(key)
+        i+=1
+        if (i==10):
+            break
+    res_df = pd.DataFrame.from_dict(dict_res)
+    res_df.index += 1
+    st.dataframe(res_df)
 #st.table(df)