Hamda commited on
Commit
12fc412
·
1 Parent(s): e1f11a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -9
app.py CHANGED
@@ -3,10 +3,40 @@ import transformers
3
  from transformers import pipeline
4
  from transformers import AutoTokenizer, AutoModelForMaskedLM
5
  import pandas as pd
6
- import numpy as np
7
  import string
8
 
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  tokenizer = AutoTokenizer.from_pretrained("moussaKam/AraBART", max_length=128, padding=True, pad_to_max_length = True, truncation=True)
11
  model = AutoModelForMaskedLM.from_pretrained("Hamda/test-1-finetuned-AraBART")
12
 
@@ -22,13 +52,6 @@ def next_word(text, pipe):
22
  res_dict['Score'].append(e['score'])
23
  return res_dict
24
 
25
- st.title("المساعدة اللغوية في التنبؤ بالمتلازمات والمتصاحبات والتعبيرات الاصطلاحية وتصحيحها")
26
- default_value = "بيعت الأسلحة في السوق"
27
- # sent is the variable holding the user's input
28
- sent = st.text_area("مدخل", default_value, height=20)
29
-
30
- st.checkbox('استعمال الرسم البياني', value=False)
31
-
32
  text_st = sent+ ' <mask>'
33
 
34
  pipe = pipeline("fill-mask", tokenizer=tokenizer, model=model, top_k=10)
@@ -36,7 +59,25 @@ dict_next_words = next_word(text_st, pipe)
36
  df = pd.DataFrame.from_dict(dict_next_words)
37
  df.reset_index(drop=True, inplace=True)
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  st.dataframe(df)
40
-
41
 
42
  #st.table(df)
 
3
  from transformers import pipeline
4
  from transformers import AutoTokenizer, AutoModelForMaskedLM
5
  import pandas as pd
 
6
  import string
7
 
8
 
9
+ st.title("المساعدة اللغوية في التنبؤ بالمتلازمات والمتصاحبات وتصحيحها")
10
+ default_value = "بيعت الأسلحة في السوق"
11
+ # sent is the variable holding the user's input
12
+ sent = st.text_area("مدخل", default_value, height=20)
13
+
14
+ st.checkbox('استعمال الرسم البياني', value=False)
15
+
16
+ tmt = {}
17
+ VocMap = r'.\voc.csv'
18
+ ibra_gr = r'.\BM25.csv'
19
+
20
+ df3 = pd.read_csv(VocMap, delimiter='\t')
21
+ df_g = pd.read_csv(ibra_gr, delimiter='\t')
22
+ df_g.set_index(['ID1','ID2'], inplace=True)
23
+
24
+ df_in = pd.read_csv(ibra_gr, delimiter='\t')
25
+ df_in.set_index(['ID1'], inplace=True)
26
+
27
+ def Query2id(voc, query):
28
+ return [voc.index[voc['word'] == word].values[0] for word in query.split()]
29
+
30
+ id_list = Query2id(df3, sent)
31
+
32
+ def setQueriesVoc(df, id_list):
33
+ res = []
34
+ for e in id_list:
35
+ res.extend(list(df.loc[e]['ID2'].values))
36
+ return list(set(res))
37
+
38
+ L = setQueriesVoc(df_in, id_list)
39
+
40
  tokenizer = AutoTokenizer.from_pretrained("moussaKam/AraBART", max_length=128, padding=True, pad_to_max_length = True, truncation=True)
41
  model = AutoModelForMaskedLM.from_pretrained("Hamda/test-1-finetuned-AraBART")
42
 
 
52
  res_dict['Score'].append(e['score'])
53
  return res_dict
54
 
 
 
 
 
 
 
 
55
  text_st = sent+ ' <mask>'
56
 
57
  pipe = pipeline("fill-mask", tokenizer=tokenizer, model=model, top_k=10)
 
59
  df = pd.DataFrame.from_dict(dict_next_words)
60
  df.reset_index(drop=True, inplace=True)
61
 
62
+ for nc in L:
63
+ score = 0.0
64
+ temp = []
65
+ for ni in id_list:
66
+ try:
67
+ score = score + df_g.loc[(ni, nc),'score']
68
+ except KeyError:
69
+ continue
70
+ key = df3.loc[nc].values[0]
71
+ tmt[key] = score
72
+ exp_terms = []
73
+ tmexp = sorted(tmt.items(), key=lambda x: x[1], reverse=True)
74
+ i = 0
75
+ for key, value in tmexp:
76
+ exp_terms.append(str(key)+' | '+str(value))
77
+ i+=1
78
+ if (i==10):
79
+ break
80
  st.dataframe(df)
81
+ st.write(exp_terms)
82
 
83
  #st.table(df)