etadevosyan commited on
Commit
46b2548
·
verified ·
1 Parent(s): 3b2275f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -38
app.py CHANGED
@@ -1,39 +1,41 @@
1
- import json
2
- import numpy as np
3
-
4
- from transformers import BertTokenizer
5
- from rank_bm25 import BM25Okapi
6
- import gradio as gr
7
-
8
- tokenizer = BertTokenizer.from_pretrained("DeepPavlov/rubert-base-cased")
9
-
10
- f = open('budu_search_syn_database.json')
11
-
12
- database = json.load(f)
13
-
14
- b25corpus = [x for x in database.values()]
15
- b25local_names = [x for x in database.keys()]
16
- bm25 = BM25Okapi(corpus=b25corpus)
17
-
18
- def predict_bm25(service):
19
- tokenized_query = tokenizer.tokenize(service.lower())
20
-
21
- doc_scores = bm25.get_scores(tokenized_query)
22
- sorted_doc_indices = doc_scores.argsort()[::-1]
23
-
24
- sorted_local_names = np.array([b25local_names[i] for i in sorted_doc_indices])
25
- scores = doc_scores[sorted_doc_indices]
26
- scores_filtered = np.argwhere(scores>0).reshape(-1)
27
- filtered_local_names = sorted_local_names[scores_filtered.tolist()].tolist()
28
- return filtered_local_names
29
-
30
- demo = gr.Interface(fn=predict_bm25,inputs=gr.components.Textbox(label='Запрос пользователя'),
31
- outputs=[gr.components.Textbox(label='Рекомендованные услуги')],
32
- examples=[
33
- ['ферритин'],
34
- ['кальций'],
35
- ['железо'],
36
- ['прием']])
37
-
38
- if __name__ == "__main__":
 
 
39
  demo.launch()
 
1
+ import json
2
+ import numpy as np
3
+
4
+ from transformers import BertTokenizer
5
+ from rank_bm25 import BM25Okapi
6
+ import gradio as gr
7
+
8
+ tokenizer = BertTokenizer.from_pretrained("DeepPavlov/rubert-base-cased")
9
+
10
+ f = open('budu_search_syn_database.json')
11
+
12
+ database = json.load(f)
13
+
14
+ b25corpus = [x for x in database.values()]
15
+ b25local_names = [x for x in database.keys()]
16
+ bm25 = BM25Okapi(corpus=b25corpus)
17
+
18
+ def predict_bm25(service):
19
+ tokenized_query = tokenizer.tokenize(service.lower())
20
+
21
+ doc_scores = bm25.get_scores(tokenized_query)
22
+ sorted_doc_indices = doc_scores.argsort()[::-1]
23
+
24
+ sorted_local_names = np.array([b25local_names[i] for i in sorted_doc_indices])
25
+ scores = doc_scores[sorted_doc_indices]
26
+ scores_filtered = np.argwhere(scores>0).reshape(-1)
27
+ filtered_local_names = sorted_local_names[scores_filtered.tolist()].tolist()
28
+ if len(filtered_local_names)>5:
29
+ filtered_local_names = filtered_local_names[:5]
30
+ return filtered_local_names
31
+
32
+ demo = gr.Interface(fn=predict_bm25,inputs=gr.components.Textbox(label='Запрос пользователя'),
33
+ outputs=[gr.components.Textbox(label='Рекомендованные услуги')],
34
+ examples=[
35
+ ['ферритин'],
36
+ ['кальций'],
37
+ ['железо'],
38
+ ['прием']])
39
+
40
+ if __name__ == "__main__":
41
  demo.launch()