KevinHuSh commited on
Commit
429cc62
·
1 Parent(s): e441caf

fix bug about reload knowledgebase configuration reloading (#210)

Browse files

### What problem does this PR solve?

_Briefly describe what this PR aims to solve. Include background context
that will help reviewers understand the purpose of the PR._

Issue link:#[[Link the issue
here](https://github.com/infiniflow/ragflow/issues/209)]

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

api/apps/chunk_app.py CHANGED
@@ -79,7 +79,7 @@ def list():
79
  return get_json_result(data=res)
80
  except Exception as e:
81
  if str(e).find("not_found") > 0:
82
- return get_json_result(data=False, retmsg=f'Index not found!',
83
  retcode=RetCode.DATA_ERROR)
84
  return server_error_response(e)
85
 
@@ -262,6 +262,6 @@ def retrieval_test():
262
  return get_json_result(data=ranks)
263
  except Exception as e:
264
  if str(e).find("not_found") > 0:
265
- return get_json_result(data=False, retmsg=f'Index not found!',
266
  retcode=RetCode.DATA_ERROR)
267
  return server_error_response(e)
 
79
  return get_json_result(data=res)
80
  except Exception as e:
81
  if str(e).find("not_found") > 0:
82
+ return get_json_result(data=False, retmsg=f'No chunk found!',
83
  retcode=RetCode.DATA_ERROR)
84
  return server_error_response(e)
85
 
 
262
  return get_json_result(data=ranks)
263
  except Exception as e:
264
  if str(e).find("not_found") > 0:
265
+ return get_json_result(data=False, retmsg=f'No chunk found! Check the chunk status please!',
266
  retcode=RetCode.DATA_ERROR)
267
  return server_error_response(e)
api/db/services/knowledgebase_service.py CHANGED
@@ -44,7 +44,8 @@ class KnowledgebaseService(CommonService):
44
  def get_detail(cls, kb_id):
45
  fields = [
46
  cls.model.id,
47
- Tenant.embd_id,
 
48
  cls.model.avatar,
49
  cls.model.name,
50
  cls.model.language,
 
44
  def get_detail(cls, kb_id):
45
  fields = [
46
  cls.model.id,
47
+ #Tenant.embd_id,
48
+ cls.embd_id,
49
  cls.model.avatar,
50
  cls.model.name,
51
  cls.model.language,
rag/app/manual.py CHANGED
@@ -85,7 +85,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
85
  for t, lvl in pdf_parser.outlines:
86
  tks = set([t[i] + t[i + 1] for i in range(len(t) - 1)])
87
  tks_ = set([txt[i] + txt[i + 1]
88
- for i in range(min(len(t), len(txt) - 1))])
89
  if len(set(tks & tks_)) / max([len(tks), len(tks_), 1]) > 0.8:
90
  levels.append(lvl)
91
  break
@@ -109,7 +109,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
109
  sections = [(txt, sec_ids[i], poss)
110
  for i, (txt, _, poss) in enumerate(sections)]
111
  for (img, rows), poss in tbls:
112
- if not rows:continue
113
  sections.append((rows if isinstance(rows, str) else rows[0], -1,
114
  [(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss]))
115
 
@@ -125,7 +125,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
125
  for txt, sec_id, poss in sorted(sections, key=lambda x: (
126
  x[-1][0][0], x[-1][0][3], x[-1][0][1])):
127
  poss = "\t".join([tag(*pos) for pos in poss])
128
- if tk_cnt < 2048 and (sec_id == last_sid or sec_id == -1):
129
  if chunks:
130
  chunks[-1] += "\n" + txt + poss
131
  tk_cnt += num_tokens_from_string(txt)
@@ -143,6 +143,9 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
143
  if __name__ == "__main__":
144
  import sys
145
 
 
146
  def dummy(prog=None, msg=""):
147
  pass
 
 
148
  chunk(sys.argv[1], callback=dummy)
 
85
  for t, lvl in pdf_parser.outlines:
86
  tks = set([t[i] + t[i + 1] for i in range(len(t) - 1)])
87
  tks_ = set([txt[i] + txt[i + 1]
88
+ for i in range(min(len(t), len(txt) - 1))])
89
  if len(set(tks & tks_)) / max([len(tks), len(tks_), 1]) > 0.8:
90
  levels.append(lvl)
91
  break
 
109
  sections = [(txt, sec_ids[i], poss)
110
  for i, (txt, _, poss) in enumerate(sections)]
111
  for (img, rows), poss in tbls:
112
+ if not rows: continue
113
  sections.append((rows if isinstance(rows, str) else rows[0], -1,
114
  [(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss]))
115
 
 
125
  for txt, sec_id, poss in sorted(sections, key=lambda x: (
126
  x[-1][0][0], x[-1][0][3], x[-1][0][1])):
127
  poss = "\t".join([tag(*pos) for pos in poss])
128
+ if tk_cnt < 32 or (tk_cnt < 1024 and (sec_id == last_sid or sec_id == -1)):
129
  if chunks:
130
  chunks[-1] += "\n" + txt + poss
131
  tk_cnt += num_tokens_from_string(txt)
 
143
  if __name__ == "__main__":
144
  import sys
145
 
146
+
147
  def dummy(prog=None, msg=""):
148
  pass
149
+
150
+
151
  chunk(sys.argv[1], callback=dummy)
rag/app/qa.py CHANGED
@@ -133,7 +133,7 @@ def chunk(filename, binary=None, lang="Chinese", callback=None, **kwargs):
133
  return res
134
 
135
  raise NotImplementedError(
136
- "file type not supported yet(pptx, pdf supported)")
137
 
138
 
139
  if __name__ == "__main__":
 
133
  return res
134
 
135
  raise NotImplementedError(
136
+ "Excel and csv(txt) format files are supported.")
137
 
138
 
139
  if __name__ == "__main__":
rag/nlp/query.py CHANGED
@@ -73,7 +73,7 @@ class EsQueryer:
73
  return True
74
 
75
  qs, keywords = [], []
76
- for tt in self.tw.split(txt): # .split(" "):
77
  if not tt:
78
  continue
79
  twts = self.tw.weights([tt])
 
73
  return True
74
 
75
  qs, keywords = [], []
76
+ for tt in self.tw.split(txt)[:256]: # .split(" "):
77
  if not tt:
78
  continue
79
  twts = self.tw.weights([tt])