Spaces:
Sleeping
Sleeping
添加tagger
Browse files- app.py +24 -6
- requirements.txt +2 -1
- tagger_map.py +8 -1
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
import streamlit as st
|
2 |
from tagger_map import Tagger as Tagger_Map
|
3 |
from tagger_map import zh_dict
|
4 |
|
@@ -7,15 +6,34 @@ def search_text(search_sentences,topn= 5):
|
|
7 |
search_sentences = search_sentences.replace("_"," ")
|
8 |
search_sentences = search_sentences.strip()
|
9 |
if search_sentences not in zh_dict:
|
10 |
-
|
11 |
-
|
12 |
else:
|
|
|
13 |
rtn0 = tagger_map.get_top_weighted_neighbors(search_sentences,topn)
|
14 |
rtn = []
|
15 |
for tag in rtn0:
|
16 |
rtn.append(f"{tag.replace(' ','_')}《{zh_dict[tag]}》")
|
17 |
-
return rtn
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
|
|
|
|
|
|
|
|
19 |
|
20 |
-
|
21 |
-
st.
|
|
|
|
|
|
1 |
from tagger_map import Tagger as Tagger_Map
|
2 |
from tagger_map import zh_dict
|
3 |
|
|
|
6 |
search_sentences = search_sentences.replace("_"," ")
|
7 |
search_sentences = search_sentences.strip()
|
8 |
if search_sentences not in zh_dict:
|
9 |
+
log = "数据库内未找到相似tag,您是否在查询以下tag?"
|
10 |
+
rtn0 = tagger_map.fuzzy_complete_word(search_sentences,topn)
|
11 |
else:
|
12 |
+
log = " 查询结果:"
|
13 |
rtn0 = tagger_map.get_top_weighted_neighbors(search_sentences,topn)
|
14 |
rtn = []
|
15 |
for tag in rtn0:
|
16 |
rtn.append(f"{tag.replace(' ','_')}《{zh_dict[tag]}》")
|
17 |
+
return log, rtn
|
18 |
+
|
19 |
+
# -----------------------------
|
20 |
+
|
21 |
+
import streamlit as st
|
22 |
+
|
23 |
+
# 创建一个文本输入框
|
24 |
+
input_text = st.text_input("查询tag")
|
25 |
+
|
26 |
+
# 创建一个滑动条,范围从1到10
|
27 |
+
n = st.slider("查询数量", min_value=5, max_value=30,value=5,step=5)
|
28 |
+
|
29 |
+
# 初始化结果区域
|
30 |
+
result = ""
|
31 |
|
32 |
+
# 当输入栏有内容时,进行计算并将结果显示在屏幕上
|
33 |
+
if input_text:
|
34 |
+
log, rtn_0 = search_text(input_text, n)
|
35 |
+
result = f'### {log}\n - ' + '\n- '.join(rtn_0)
|
36 |
|
37 |
+
# 显示结果
|
38 |
+
with st.container():
|
39 |
+
st.markdown(result)
|
requirements.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
RainbowPrint
|
2 |
bidict
|
3 |
-
networkx
|
|
|
|
1 |
RainbowPrint
|
2 |
bidict
|
3 |
+
networkx
|
4 |
+
fuzzywuzzy
|
tagger_map.py
CHANGED
@@ -4,6 +4,7 @@ import networkx as nx
|
|
4 |
import heapq
|
5 |
from bidict import bidict
|
6 |
from RainbowPrint import RainbowPrint as rp
|
|
|
7 |
|
8 |
zh_path = r"./data/all_name_id_zh.txt"
|
9 |
|
@@ -58,7 +59,13 @@ class Tagger():
|
|
58 |
# 仅返回邻居节点的标识
|
59 |
return [self.nodes_id.inverse[nbr] for _, nbr in top_neighbors_with_weights]
|
60 |
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
|
64 |
if __name__ == '__main__':
|
|
|
4 |
import heapq
|
5 |
from bidict import bidict
|
6 |
from RainbowPrint import RainbowPrint as rp
|
7 |
+
from fuzzywuzzy import process
|
8 |
|
9 |
zh_path = r"./data/all_name_id_zh.txt"
|
10 |
|
|
|
59 |
# 仅返回邻居节点的标识
|
60 |
return [self.nodes_id.inverse[nbr] for _, nbr in top_neighbors_with_weights]
|
61 |
|
62 |
+
def fuzzy_complete_word(self, query, limit=5):
|
63 |
+
vocabulary = list(self.nodes_id.keys())
|
64 |
+
results = process.extract(query, vocabulary, limit=limit)
|
65 |
+
# 提取匹配词汇
|
66 |
+
matches = [result[0] for result in results]
|
67 |
+
rp.debug('map: fuzzy:', matches)
|
68 |
+
return matches
|
69 |
|
70 |
|
71 |
if __name__ == '__main__':
|