KevinHuSh commited on
Commit
9bf75d4
·
1 Parent(s): 6be3dd5

add dialog api (#33)

Browse files
Files changed (50) hide show
  1. api/apps/__init__.py +11 -11
  2. api/apps/chunk_app.py +113 -26
  3. api/apps/dialog_app.py +163 -0
  4. api/apps/document_app.py +26 -22
  5. api/apps/kb_app.py +10 -10
  6. api/apps/llm_app.py +11 -11
  7. api/apps/user_app.py +10 -10
  8. api/db/__init__.py +9 -2
  9. api/db/db_models.py +15 -12
  10. api/db/db_services.py +4 -4
  11. api/db/db_utils.py +5 -11
  12. api/db/init_data.py +5 -5
  13. api/db/operatioins.py +2 -2
  14. api/db/reload_config_base.py +1 -1
  15. api/db/runtime_config.py +2 -2
  16. api/db/services/__init__.py +1 -1
  17. api/db/services/common_service.py +3 -3
  18. api/db/services/dialog_service.py +3 -13
  19. api/db/services/document_service.py +7 -7
  20. api/db/services/kb_service.py +6 -9
  21. api/db/services/knowledgebase_service.py +3 -9
  22. api/db/services/llm_service.py +6 -9
  23. api/db/services/user_service.py +7 -7
  24. api/errors/error_services.py +1 -1
  25. api/errors/general_error.py +1 -1
  26. api/hook/__init__.py +2 -2
  27. api/hook/api/client_authentication.py +5 -5
  28. api/hook/api/permission.py +5 -5
  29. api/hook/api/site_authentication.py +5 -5
  30. api/hook/common/parameters.py +1 -1
  31. api/ragflow_server.py +9 -9
  32. api/settings.py +5 -5
  33. api/utils/__init__.py +1 -1
  34. api/utils/api_utils.py +6 -6
  35. api/utils/file_utils.py +2 -2
  36. api/utils/log_utils.py +2 -2
  37. api/utils/t_crypt.py +1 -1
  38. api/versions.py +2 -2
  39. rag/llm/__init__.py +1 -1
  40. rag/llm/chat_model.py +1 -1
  41. rag/llm/cv_model.py +1 -1
  42. rag/llm/embedding_model.py +5 -1
  43. rag/nlp/huqie.py +1 -1
  44. rag/nlp/query.py +1 -1
  45. rag/nlp/search.py +18 -18
  46. rag/nlp/synonym.py +1 -1
  47. rag/nlp/term_weight.py +1 -1
  48. rag/settings.py +4 -4
  49. rag/svr/parse_user_docs.py +7 -8
  50. rag/utils/es_conn.py +0 -1
api/apps/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -21,17 +21,17 @@ from flask import Blueprint, Flask, request
21
  from werkzeug.wrappers.request import Request
22
  from flask_cors import CORS
23
 
24
- from web_server.db import StatusEnum
25
- from web_server.db.services import UserService
26
- from web_server.utils import CustomJSONEncoder
27
 
28
  from flask_session import Session
29
  from flask_login import LoginManager
30
- from web_server.settings import RetCode, SECRET_KEY, stat_logger
31
- from web_server.hook import HookManager
32
- from web_server.hook.common.parameters import AuthenticationParameters, ClientAuthenticationParameters
33
- from web_server.settings import API_VERSION, CLIENT_AUTHENTICATION, SITE_AUTHENTICATION, access_logger
34
- from web_server.utils.api_utils import get_json_result, server_error_response
35
  from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
36
 
37
  __all__ = ['app']
@@ -68,7 +68,7 @@ def search_pages_path(pages_dir):
68
 
69
  def register_page(page_path):
70
  page_name = page_path.stem.rstrip('_app')
71
- module_name = '.'.join(page_path.parts[page_path.parts.index('web_server'):-1] + (page_name, ))
72
 
73
  spec = spec_from_file_location(module_name, page_path)
74
  page = module_from_spec(spec)
@@ -86,7 +86,7 @@ def register_page(page_path):
86
 
87
  pages_dir = [
88
  Path(__file__).parent,
89
- Path(__file__).parent.parent / 'web_server' / 'apps',
90
  ]
91
 
92
  client_urls_prefix = [
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
21
  from werkzeug.wrappers.request import Request
22
  from flask_cors import CORS
23
 
24
+ from api.db import StatusEnum
25
+ from api.db.services import UserService
26
+ from api.utils import CustomJSONEncoder
27
 
28
  from flask_session import Session
29
  from flask_login import LoginManager
30
+ from api.settings import RetCode, SECRET_KEY, stat_logger
31
+ from api.hook import HookManager
32
+ from api.hook.common.parameters import AuthenticationParameters, ClientAuthenticationParameters
33
+ from api.settings import API_VERSION, CLIENT_AUTHENTICATION, SITE_AUTHENTICATION, access_logger
34
+ from api.utils.api_utils import get_json_result, server_error_response
35
  from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
36
 
37
  __all__ = ['app']
 
68
 
69
  def register_page(page_path):
70
  page_name = page_path.stem.rstrip('_app')
71
+ module_name = '.'.join(page_path.parts[page_path.parts.index('api'):-1] + (page_name, ))
72
 
73
  spec = spec_from_file_location(module_name, page_path)
74
  page = module_from_spec(spec)
 
86
 
87
  pages_dir = [
88
  Path(__file__).parent,
89
+ Path(__file__).parent.parent / 'api' / 'apps',
90
  ]
91
 
92
  client_urls_prefix = [
api/apps/chunk_app.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -13,31 +13,26 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
- import base64
17
  import hashlib
18
- import pathlib
19
  import re
20
 
21
- from elasticsearch_dsl import Q
22
  from flask import request
23
  from flask_login import login_required, current_user
24
 
25
  from rag.nlp import search, huqie
26
  from rag.utils import ELASTICSEARCH, rmSpace
27
- from web_server.db import LLMType
28
- from web_server.db.services import duplicate_name
29
- from web_server.db.services.kb_service import KnowledgebaseService
30
- from web_server.db.services.llm_service import TenantLLMService
31
- from web_server.db.services.user_service import UserTenantService
32
- from web_server.utils.api_utils import server_error_response, get_data_error_result, validate_request
33
- from web_server.utils import get_uuid
34
- from web_server.db.services.document_service import DocumentService
35
- from web_server.settings import RetCode
36
- from web_server.utils.api_utils import get_json_result
37
- from rag.utils.minio_conn import MINIO
38
- from web_server.utils.file_utils import filename_type
39
-
40
- retrival = search.Dealer(ELASTICSEARCH, None)
41
 
42
  @manager.route('/list', methods=['POST'])
43
  @login_required
@@ -45,16 +40,29 @@ retrival = search.Dealer(ELASTICSEARCH, None)
45
  def list():
46
  req = request.json
47
  doc_id = req["doc_id"]
48
- page = req.get("page", 1)
49
- size = req.get("size", 30)
50
  question = req.get("keywords", "")
51
  try:
52
- tenants = UserTenantService.query(user_id=current_user.id)
53
- if not tenants:
54
- return get_data_error_result(retmsg="Tenant not found!")
55
- res = retrival.search({
56
  "doc_ids": [doc_id], "page": page, "size": size, "question": question
57
- }, search.index_name(tenants[0].tenant_id))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  return get_json_result(data=res)
59
  except Exception as e:
60
  if str(e).find("not_found") > 0:
@@ -102,6 +110,7 @@ def set():
102
  d["content_sm_ltks"] = huqie.qieqie(d["content_ltks"])
103
  d["important_kwd"] = req["important_kwd"]
104
  d["important_tks"] = huqie.qie(" ".join(req["important_kwd"]))
 
105
 
106
  try:
107
  tenant_id = DocumentService.get_tenant_id(req["doc_id"])
@@ -116,10 +125,27 @@ def set():
116
  return server_error_response(e)
117
 
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  @manager.route('/create', methods=['POST'])
120
  @login_required
121
  @validate_request("doc_id", "content_ltks", "important_kwd")
122
- def set():
123
  req = request.json
124
  md5 = hashlib.md5()
125
  md5.update((req["content_ltks"] + req["doc_id"]).encode("utf-8"))
@@ -148,3 +174,64 @@ def set():
148
  return get_json_result(data={"chunk_id": chunck_id})
149
  except Exception as e:
150
  return server_error_response(e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  import hashlib
 
17
  import re
18
 
19
+ import numpy as np
20
  from flask import request
21
  from flask_login import login_required, current_user
22
 
23
  from rag.nlp import search, huqie
24
  from rag.utils import ELASTICSEARCH, rmSpace
25
+ from api.db import LLMType
26
+ from api.db.services import duplicate_name
27
+ from api.db.services.kb_service import KnowledgebaseService
28
+ from api.db.services.llm_service import TenantLLMService
29
+ from api.db.services.user_service import UserTenantService
30
+ from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
31
+ from api.db.services.document_service import DocumentService
32
+ from api.settings import RetCode
33
+ from api.utils.api_utils import get_json_result
34
+
35
+ retrival = search.Dealer(ELASTICSEARCH)
 
 
 
36
 
37
  @manager.route('/list', methods=['POST'])
38
  @login_required
 
40
  def list():
41
  req = request.json
42
  doc_id = req["doc_id"]
43
+ page = int(req.get("page", 1))
44
+ size = int(req.get("size", 30))
45
  question = req.get("keywords", "")
46
  try:
47
+ tenant_id = DocumentService.get_tenant_id(req["doc_id"])
48
+ if not tenant_id: return get_data_error_result(retmsg="Tenant not found!")
49
+ query = {
 
50
  "doc_ids": [doc_id], "page": page, "size": size, "question": question
51
+ }
52
+ if "available_int" in req: query["available_int"] = int(req["available_int"])
53
+ sres = retrival.search(query, search.index_name(tenant_id))
54
+ res = {"total": sres.total, "chunks": []}
55
+ for id in sres.ids:
56
+ d = {
57
+ "chunk_id": id,
58
+ "content_ltks": rmSpace(sres.highlight[id]) if question else sres.field[id]["content_ltks"],
59
+ "doc_id": sres.field[id]["doc_id"],
60
+ "docnm_kwd": sres.field[id]["docnm_kwd"],
61
+ "important_kwd": sres.field[id].get("important_kwd", []),
62
+ "img_id": sres.field[id].get("img_id", ""),
63
+ "available_int": sres.field[id].get("available_int", 1),
64
+ }
65
+ res["chunks"].append(d)
66
  return get_json_result(data=res)
67
  except Exception as e:
68
  if str(e).find("not_found") > 0:
 
110
  d["content_sm_ltks"] = huqie.qieqie(d["content_ltks"])
111
  d["important_kwd"] = req["important_kwd"]
112
  d["important_tks"] = huqie.qie(" ".join(req["important_kwd"]))
113
+ if "available_int" in req: d["available_int"] = req["available_int"]
114
 
115
  try:
116
  tenant_id = DocumentService.get_tenant_id(req["doc_id"])
 
125
  return server_error_response(e)
126
 
127
 
128
+ @manager.route('/switch', methods=['POST'])
129
+ @login_required
130
+ @validate_request("chunk_ids", "available_int", "doc_id")
131
+ def switch():
132
+ req = request.json
133
+ try:
134
+ tenant_id = DocumentService.get_tenant_id(req["doc_id"])
135
+ if not tenant_id: return get_data_error_result(retmsg="Tenant not found!")
136
+ if not ELASTICSEARCH.upsert([{"id": i, "available_int": int(req["available_int"])} for i in req["chunk_ids"]],
137
+ search.index_name(tenant_id)):
138
+ return get_data_error_result(retmsg="Index updating failure")
139
+ return get_json_result(data=True)
140
+ except Exception as e:
141
+ return server_error_response(e)
142
+
143
+
144
+
145
  @manager.route('/create', methods=['POST'])
146
  @login_required
147
  @validate_request("doc_id", "content_ltks", "important_kwd")
148
+ def create():
149
  req = request.json
150
  md5 = hashlib.md5()
151
  md5.update((req["content_ltks"] + req["doc_id"]).encode("utf-8"))
 
174
  return get_json_result(data={"chunk_id": chunck_id})
175
  except Exception as e:
176
  return server_error_response(e)
177
+
178
+
179
+ @manager.route('/retrieval_test', methods=['POST'])
180
+ @login_required
181
+ @validate_request("kb_id", "question")
182
+ def retrieval_test():
183
+ req = request.json
184
+ page = int(req.get("page", 1))
185
+ size = int(req.get("size", 30))
186
+ question = req["question"]
187
+ kb_id = req["kb_id"]
188
+ doc_ids = req.get("doc_ids", [])
189
+ similarity_threshold = float(req.get("similarity_threshold", 0.4))
190
+ vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3))
191
+ top = int(req.get("top", 1024))
192
+ try:
193
+ e, kb = KnowledgebaseService.get_by_id(kb_id)
194
+ if not e:
195
+ return get_data_error_result(retmsg="Knowledgebase not found!")
196
+
197
+ embd_mdl = TenantLLMService.model_instance(kb.tenant_id, LLMType.EMBEDDING.value)
198
+ sres = retrival.search({"kb_ids": [kb_id], "doc_ids": doc_ids, "size": top,
199
+ "question": question, "vector": True,
200
+ "similarity": similarity_threshold},
201
+ search.index_name(kb.tenant_id),
202
+ embd_mdl)
203
+
204
+ sim, tsim, vsim = retrival.rerank(sres, question, 1-vector_similarity_weight, vector_similarity_weight)
205
+ idx = np.argsort(sim*-1)
206
+ ranks = {"total": 0, "chunks": [], "doc_aggs": {}}
207
+ start_idx = (page-1)*size
208
+ for i in idx:
209
+ ranks["total"] += 1
210
+ if sim[i] < similarity_threshold: break
211
+ start_idx -= 1
212
+ if start_idx >= 0:continue
213
+ if len(ranks["chunks"]) == size:continue
214
+ id = sres.ids[i]
215
+ dnm = sres.field[id]["docnm_kwd"]
216
+ d = {
217
+ "chunk_id": id,
218
+ "content_ltks": sres.field[id]["content_ltks"],
219
+ "doc_id": sres.field[id]["doc_id"],
220
+ "docnm_kwd": dnm,
221
+ "kb_id": sres.field[id]["kb_id"],
222
+ "important_kwd": sres.field[id].get("important_kwd", []),
223
+ "img_id": sres.field[id].get("img_id", ""),
224
+ "similarity": sim[i],
225
+ "vector_similarity": vsim[i],
226
+ "term_similarity": tsim[i]
227
+ }
228
+ ranks["chunks"].append(d)
229
+ if dnm not in ranks["doc_aggs"]:ranks["doc_aggs"][dnm] = 0
230
+ ranks["doc_aggs"][dnm] += 1
231
+
232
+ return get_json_result(data=ranks)
233
+ except Exception as e:
234
+ if str(e).find("not_found") > 0:
235
+ return get_json_result(data=False, retmsg=f'Index not found!',
236
+ retcode=RetCode.DATA_ERROR)
237
+ return server_error_response(e)
api/apps/dialog_app.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ import hashlib
17
+ import re
18
+
19
+ import numpy as np
20
+ from flask import request
21
+ from flask_login import login_required, current_user
22
+
23
+ from api.db.services.dialog_service import DialogService
24
+ from rag.nlp import search, huqie
25
+ from rag.utils import ELASTICSEARCH, rmSpace
26
+ from api.db import LLMType, StatusEnum
27
+ from api.db.services import duplicate_name
28
+ from api.db.services.kb_service import KnowledgebaseService
29
+ from api.db.services.llm_service import TenantLLMService
30
+ from api.db.services.user_service import UserTenantService, TenantService
31
+ from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
32
+ from api.utils import get_uuid
33
+ from api.db.services.document_service import DocumentService
34
+ from api.settings import RetCode, stat_logger
35
+ from api.utils.api_utils import get_json_result
36
+ from rag.utils.minio_conn import MINIO
37
+ from api.utils.file_utils import filename_type
38
+
39
+
40
+ @manager.route('/set', methods=['POST'])
41
+ @login_required
42
+ def set():
43
+ req = request.json
44
+ dialog_id = req.get("dialog_id")
45
+ name = req.get("name", "New Dialog")
46
+ description = req.get("description", "A helpful Dialog")
47
+ language = req.get("language", "Chinese")
48
+ llm_setting_type = req.get("llm_setting_type", "Precise")
49
+ llm_setting = req.get("llm_setting", {
50
+ "Creative": {
51
+ "temperature": 0.9,
52
+ "top_p": 0.9,
53
+ "frequency_penalty": 0.2,
54
+ "presence_penalty": 0.4,
55
+ "max_tokens": 512
56
+ },
57
+ "Precise": {
58
+ "temperature": 0.1,
59
+ "top_p": 0.3,
60
+ "frequency_penalty": 0.7,
61
+ "presence_penalty": 0.4,
62
+ "max_tokens": 215
63
+ },
64
+ "Evenly": {
65
+ "temperature": 0.5,
66
+ "top_p": 0.5,
67
+ "frequency_penalty": 0.7,
68
+ "presence_penalty": 0.4,
69
+ "max_tokens": 215
70
+ },
71
+ "Custom": {
72
+ "temperature": 0.2,
73
+ "top_p": 0.3,
74
+ "frequency_penalty": 0.6,
75
+ "presence_penalty": 0.3,
76
+ "max_tokens": 215
77
+ },
78
+ })
79
+ prompt_config = req.get("prompt_config", {
80
+ "system": """你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。
81
+ 以下是知识库:
82
+ {knowledge}
83
+ 以上是知识库。""",
84
+ "prologue": "您好,我是您的助手小樱,长得可爱又善良,can I help you?",
85
+ "parameters": [
86
+ {"key": "knowledge", "optional": False}
87
+ ],
88
+ "empty_response": "Sorry! 知识库中未找到相关内容!"
89
+ })
90
+
91
+ if len(prompt_config["parameters"]) < 1:
92
+ return get_data_error_result(retmsg="'knowledge' should be in parameters")
93
+
94
+ for p in prompt_config["parameters"]:
95
+ if prompt_config["system"].find("{%s}"%p["key"]) < 0:
96
+ return get_data_error_result(retmsg="Parameter '{}' is not used".format(p["key"]))
97
+
98
+ try:
99
+ e, tenant = TenantService.get_by_id(current_user.id)
100
+ if not e:return get_data_error_result(retmsg="Tenant not found!")
101
+ llm_id = req.get("llm_id", tenant.llm_id)
102
+ if not dialog_id:
103
+ dia = {
104
+ "id": get_uuid(),
105
+ "tenant_id": current_user.id,
106
+ "name": name,
107
+ "description": description,
108
+ "language": language,
109
+ "llm_id": llm_id,
110
+ "llm_setting_type": llm_setting_type,
111
+ "llm_setting": llm_setting,
112
+ "prompt_config": prompt_config
113
+ }
114
+ if not DialogService.save(**dia): return get_data_error_result(retmsg="Fail to new a dialog!")
115
+ e, dia = DialogService.get_by_id(dia["id"])
116
+ if not e: return get_data_error_result(retmsg="Fail to new a dialog!")
117
+ return get_json_result(data=dia.to_json())
118
+ else:
119
+ del req["dialog_id"]
120
+ if "kb_names" in req: del req["kb_names"]
121
+ if not DialogService.update_by_id(dialog_id, req):
122
+ return get_data_error_result(retmsg="Dialog not found!")
123
+ e, dia = DialogService.get_by_id(dialog_id)
124
+ if not e: return get_data_error_result(retmsg="Fail to update a dialog!")
125
+ dia = dia.to_dict()
126
+ dia["kb_ids"], dia["kb_names"] = get_kb_names(dia["kb_ids"])
127
+ return get_json_result(data=dia)
128
+ except Exception as e:
129
+ return server_error_response(e)
130
+
131
+ @manager.route('/get', methods=['GET'])
132
+ @login_required
133
+ def get():
134
+ dialog_id = request.args["dialog_id"]
135
+ try:
136
+ e,dia = DialogService.get_by_id(dialog_id)
137
+ if not e: return get_data_error_result(retmsg="Dialog not found!")
138
+ dia = dia.to_dict()
139
+ dia["kb_ids"], dia["kb_names"] = get_kb_names(dia["kb_ids"])
140
+ return get_json_result(data=dia)
141
+ except Exception as e:
142
+ return server_error_response(e)
143
+
144
+ def get_kb_names(kb_ids):
145
+ ids, nms = [], []
146
+ for kid in kb_ids:
147
+ e, kb = KnowledgebaseService.get_by_id(kid)
148
+ if not e or kb.status != StatusEnum.VALID.value:continue
149
+ ids.append(kid)
150
+ nms.append(kb.name)
151
+ return ids, nms
152
+
153
+ @manager.route('/list', methods=['GET'])
154
+ @login_required
155
+ def list():
156
+ try:
157
+ diags = DialogService.query(tenant_id=current_user.id, status=StatusEnum.VALID.value)
158
+ diags = [d.to_dict() for d in diags]
159
+ for d in diags:
160
+ d["kb_ids"], d["kb_names"] = get_kb_names(d["kb_ids"])
161
+ return get_json_result(data=diags)
162
+ except Exception as e:
163
+ return server_error_response(e)
api/apps/document_app.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -16,22 +16,23 @@
16
  import base64
17
  import pathlib
18
 
 
19
  from elasticsearch_dsl import Q
20
  from flask import request
21
  from flask_login import login_required, current_user
22
 
23
  from rag.nlp import search
24
  from rag.utils import ELASTICSEARCH
25
- from web_server.db.services import duplicate_name
26
- from web_server.db.services.kb_service import KnowledgebaseService
27
- from web_server.utils.api_utils import server_error_response, get_data_error_result, validate_request
28
- from web_server.utils import get_uuid
29
- from web_server.db import FileType
30
- from web_server.db.services.document_service import DocumentService
31
- from web_server.settings import RetCode
32
- from web_server.utils.api_utils import get_json_result
33
  from rag.utils.minio_conn import MINIO
34
- from web_server.utils.file_utils import filename_type
35
 
36
 
37
  @manager.route('/upload', methods=['POST'])
@@ -163,21 +164,13 @@ def change_status():
163
 
164
  if str(req["status"]) == "0":
165
  ELASTICSEARCH.updateScriptByQuery(Q("term", doc_id=req["doc_id"]),
166
- scripts="""
167
- if(ctx._source.kb_id.contains('%s'))
168
- ctx._source.kb_id.remove(
169
- ctx._source.kb_id.indexOf('%s')
170
- );
171
- """ % (doc.kb_id, doc.kb_id),
172
  idxnm=search.index_name(
173
  kb.tenant_id)
174
  )
175
  else:
176
  ELASTICSEARCH.updateScriptByQuery(Q("term", doc_id=req["doc_id"]),
177
- scripts="""
178
- if(!ctx._source.kb_id.contains('%s'))
179
- ctx._source.kb_id.add('%s');
180
- """ % (doc.kb_id, doc.kb_id),
181
  idxnm=search.index_name(
182
  kb.tenant_id)
183
  )
@@ -195,8 +188,7 @@ def rm():
195
  e, doc = DocumentService.get_by_id(req["doc_id"])
196
  if not e:
197
  return get_data_error_result(retmsg="Document not found!")
198
- if not ELASTICSEARCH.deleteByQuery(Q("match", doc_id=doc.id), idxnm=search.index_name(doc.kb_id)):
199
- return get_json_result(data=False, retmsg='Remove from ES failure"', retcode=RetCode.SERVER_ERROR)
200
 
201
  DocumentService.increment_chunk_num(doc.id, doc.kb_id, doc.token_num*-1, doc.chunk_num*-1, 0)
202
  if not DocumentService.delete_by_id(req["doc_id"]):
@@ -277,3 +269,15 @@ def change_parser():
277
  except Exception as e:
278
  return server_error_response(e)
279
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
16
  import base64
17
  import pathlib
18
 
19
+ import flask
20
  from elasticsearch_dsl import Q
21
  from flask import request
22
  from flask_login import login_required, current_user
23
 
24
  from rag.nlp import search
25
  from rag.utils import ELASTICSEARCH
26
+ from api.db.services import duplicate_name
27
+ from api.db.services.kb_service import KnowledgebaseService
28
+ from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
29
+ from api.utils import get_uuid
30
+ from api.db import FileType
31
+ from api.db.services.document_service import DocumentService
32
+ from api.settings import RetCode
33
+ from api.utils.api_utils import get_json_result
34
  from rag.utils.minio_conn import MINIO
35
+ from api.utils.file_utils import filename_type
36
 
37
 
38
  @manager.route('/upload', methods=['POST'])
 
164
 
165
  if str(req["status"]) == "0":
166
  ELASTICSEARCH.updateScriptByQuery(Q("term", doc_id=req["doc_id"]),
167
+ scripts="ctx._source.available_int=0;",
 
 
 
 
 
168
  idxnm=search.index_name(
169
  kb.tenant_id)
170
  )
171
  else:
172
  ELASTICSEARCH.updateScriptByQuery(Q("term", doc_id=req["doc_id"]),
173
+ scripts="ctx._source.available_int=1;",
 
 
 
174
  idxnm=search.index_name(
175
  kb.tenant_id)
176
  )
 
188
  e, doc = DocumentService.get_by_id(req["doc_id"])
189
  if not e:
190
  return get_data_error_result(retmsg="Document not found!")
191
+ ELASTICSEARCH.deleteByQuery(Q("match", doc_id=doc.id), idxnm=search.index_name(doc.kb_id))
 
192
 
193
  DocumentService.increment_chunk_num(doc.id, doc.kb_id, doc.token_num*-1, doc.chunk_num*-1, 0)
194
  if not DocumentService.delete_by_id(req["doc_id"]):
 
269
  except Exception as e:
270
  return server_error_response(e)
271
 
272
+
273
+ @manager.route('/image/<image_id>', methods=['GET'])
274
+ @login_required
275
+ def get_image(image_id):
276
+ try:
277
+ bkt, nm = image_id.split("-")
278
+ response = flask.make_response(MINIO.get(bkt, nm))
279
+ response.headers.set('Content-Type', 'image/JPEG')
280
+ return response
281
+ except Exception as e:
282
+ return server_error_response(e)
283
+
api/apps/kb_app.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -16,15 +16,15 @@
16
  from flask import request
17
  from flask_login import login_required, current_user
18
 
19
- from web_server.db.services import duplicate_name
20
- from web_server.db.services.user_service import TenantService, UserTenantService
21
- from web_server.utils.api_utils import server_error_response, get_data_error_result, validate_request
22
- from web_server.utils import get_uuid, get_format_time
23
- from web_server.db import StatusEnum, UserTenantRole
24
- from web_server.db.services.kb_service import KnowledgebaseService
25
- from web_server.db.db_models import Knowledgebase
26
- from web_server.settings import stat_logger, RetCode
27
- from web_server.utils.api_utils import get_json_result
28
 
29
 
30
  @manager.route('/create', methods=['post'])
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
16
  from flask import request
17
  from flask_login import login_required, current_user
18
 
19
+ from api.db.services import duplicate_name
20
+ from api.db.services.user_service import TenantService, UserTenantService
21
+ from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
22
+ from api.utils import get_uuid, get_format_time
23
+ from api.db import StatusEnum, UserTenantRole
24
+ from api.db.services.kb_service import KnowledgebaseService
25
+ from api.db.db_models import Knowledgebase
26
+ from api.settings import stat_logger, RetCode
27
+ from api.utils.api_utils import get_json_result
28
 
29
 
30
  @manager.route('/create', methods=['post'])
api/apps/llm_app.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -16,16 +16,16 @@
16
  from flask import request
17
  from flask_login import login_required, current_user
18
 
19
- from web_server.db.services import duplicate_name
20
- from web_server.db.services.llm_service import LLMFactoriesService, TenantLLMService, LLMService
21
- from web_server.db.services.user_service import TenantService, UserTenantService
22
- from web_server.utils.api_utils import server_error_response, get_data_error_result, validate_request
23
- from web_server.utils import get_uuid, get_format_time
24
- from web_server.db import StatusEnum, UserTenantRole
25
- from web_server.db.services.kb_service import KnowledgebaseService
26
- from web_server.db.db_models import Knowledgebase, TenantLLM
27
- from web_server.settings import stat_logger, RetCode
28
- from web_server.utils.api_utils import get_json_result
29
 
30
 
31
  @manager.route('/factories', methods=['GET'])
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
16
  from flask import request
17
  from flask_login import login_required, current_user
18
 
19
+ from api.db.services import duplicate_name
20
+ from api.db.services.llm_service import LLMFactoriesService, TenantLLMService, LLMService
21
+ from api.db.services.user_service import TenantService, UserTenantService
22
+ from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
23
+ from api.utils import get_uuid, get_format_time
24
+ from api.db import StatusEnum, UserTenantRole
25
+ from api.db.services.kb_service import KnowledgebaseService
26
+ from api.db.db_models import Knowledgebase, TenantLLM
27
+ from api.settings import stat_logger, RetCode
28
+ from api.utils.api_utils import get_json_result
29
 
30
 
31
  @manager.route('/factories', methods=['GET'])
api/apps/user_app.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -17,15 +17,15 @@ from flask import request, session, redirect, url_for
17
  from werkzeug.security import generate_password_hash, check_password_hash
18
  from flask_login import login_required, current_user, login_user, logout_user
19
 
20
- from web_server.db.db_models import TenantLLM
21
- from web_server.db.services.llm_service import TenantLLMService
22
- from web_server.utils.api_utils import server_error_response, validate_request
23
- from web_server.utils import get_uuid, get_format_time, decrypt, download_img
24
- from web_server.db import UserTenantRole, LLMType
25
- from web_server.settings import RetCode, GITHUB_OAUTH, CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS
26
- from web_server.db.services.user_service import UserService, TenantService, UserTenantService
27
- from web_server.settings import stat_logger
28
- from web_server.utils.api_utils import get_json_result, cors_reponse
29
 
30
 
31
  @manager.route('/login', methods=['POST', 'GET'])
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
17
  from werkzeug.security import generate_password_hash, check_password_hash
18
  from flask_login import login_required, current_user, login_user, logout_user
19
 
20
+ from api.db.db_models import TenantLLM
21
+ from api.db.services.llm_service import TenantLLMService
22
+ from api.utils.api_utils import server_error_response, validate_request
23
+ from api.utils import get_uuid, get_format_time, decrypt, download_img
24
+ from api.db import UserTenantRole, LLMType
25
+ from api.settings import RetCode, GITHUB_OAUTH, CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS
26
+ from api.db.services.user_service import UserService, TenantService, UserTenantService
27
+ from api.settings import stat_logger
28
+ from api.utils.api_utils import get_json_result, cors_reponse
29
 
30
 
31
  @manager.route('/login', methods=['POST', 'GET'])
api/db/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -51,4 +51,11 @@ class LLMType(StrEnum):
51
  CHAT = 'chat'
52
  EMBEDDING = 'embedding'
53
  SPEECH2TEXT = 'speech2text'
54
- IMAGE2TEXT = 'image2text'
 
 
 
 
 
 
 
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
51
  CHAT = 'chat'
52
  EMBEDDING = 'embedding'
53
  SPEECH2TEXT = 'speech2text'
54
+ IMAGE2TEXT = 'image2text'
55
+
56
+
57
+ class ChatStyle(StrEnum):
58
+ CREATIVE = 'Creative'
59
+ PRECISE = 'Precise'
60
+ EVENLY = 'Evenly'
61
+ CUSTOM = 'Custom'
api/db/db_models.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -29,10 +29,10 @@ from peewee import (
29
  )
30
  from playhouse.pool import PooledMySQLDatabase
31
 
32
- from web_server.db import SerializedType
33
- from web_server.settings import DATABASE, stat_logger, SECRET_KEY
34
- from web_server.utils.log_utils import getLogger
35
- from web_server import utils
36
 
37
  LOGGER = getLogger()
38
 
@@ -467,6 +467,8 @@ class Knowledgebase(DataBaseModel):
467
  doc_num = IntegerField(default=0)
468
  token_num = IntegerField(default=0)
469
  chunk_num = IntegerField(default=0)
 
 
470
 
471
  parser_id = CharField(max_length=32, null=False, help_text="default parser ID")
472
  status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted,1: validate)", default="1")
@@ -516,19 +518,20 @@ class Dialog(DataBaseModel):
516
  prompt_type = CharField(max_length=16, null=False, default="simple", help_text="simple|advanced")
517
  prompt_config = JSONField(null=False, default={"system": "", "prologue": "您好,我是您的助手小樱,长得可爱又善良,can I help you?",
518
  "parameters": [], "empty_response": "Sorry! 知识库中未找到相关内容!"})
 
519
  status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted,1: validate)", default="1")
520
 
521
  class Meta:
522
  db_table = "dialog"
523
 
524
 
525
- class DialogKb(DataBaseModel):
526
- dialog_id = CharField(max_length=32, null=False, index=True)
527
- kb_id = CharField(max_length=32, null=False)
528
-
529
- class Meta:
530
- db_table = "dialog_kb"
531
- primary_key = CompositeKey('dialog_id', 'kb_id')
532
 
533
 
534
  class Conversation(DataBaseModel):
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
29
  )
30
  from playhouse.pool import PooledMySQLDatabase
31
 
32
+ from api.db import SerializedType
33
+ from api.settings import DATABASE, stat_logger, SECRET_KEY
34
+ from api.utils.log_utils import getLogger
35
+ from api import utils
36
 
37
  LOGGER = getLogger()
38
 
 
467
  doc_num = IntegerField(default=0)
468
  token_num = IntegerField(default=0)
469
  chunk_num = IntegerField(default=0)
470
+ similarity_threshold = FloatField(default=0.4)
471
+ vector_similarity_weight = FloatField(default=0.3)
472
 
473
  parser_id = CharField(max_length=32, null=False, help_text="default parser ID")
474
  status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted,1: validate)", default="1")
 
518
  prompt_type = CharField(max_length=16, null=False, default="simple", help_text="simple|advanced")
519
  prompt_config = JSONField(null=False, default={"system": "", "prologue": "您好,我是您的助手小樱,长得可爱又善良,can I help you?",
520
  "parameters": [], "empty_response": "Sorry! 知识库中未找到相关内容!"})
521
+ kb_ids = JSONField(null=False, default=[])
522
  status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted,1: validate)", default="1")
523
 
524
  class Meta:
525
  db_table = "dialog"
526
 
527
 
528
+ # class DialogKb(DataBaseModel):
529
+ # dialog_id = CharField(max_length=32, null=False, index=True)
530
+ # kb_id = CharField(max_length=32, null=False)
531
+ #
532
+ # class Meta:
533
+ # db_table = "dialog_kb"
534
+ # primary_key = CompositeKey('dialog_id', 'kb_id')
535
 
536
 
537
  class Conversation(DataBaseModel):
api/db/db_services.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2021 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -19,10 +19,10 @@ import time
19
  from functools import wraps
20
  from shortuuid import ShortUUID
21
 
22
- from web_server.versions import get_rag_version
23
 
24
- from web_server.errors.error_services import *
25
- from web_server.settings import (
26
  GRPC_PORT, HOST, HTTP_PORT,
27
  RANDOM_INSTANCE_ID, stat_logger,
28
  )
 
1
  #
2
+ # Copyright 2021 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
19
  from functools import wraps
20
  from shortuuid import ShortUUID
21
 
22
+ from api.versions import get_rag_version
23
 
24
+ from api.errors.error_services import *
25
+ from api.settings import (
26
  GRPC_PORT, HOST, HTTP_PORT,
27
  RANDOM_INSTANCE_ID, stat_logger,
28
  )
api/db/db_utils.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -17,11 +17,11 @@ import operator
17
  from functools import reduce
18
  from typing import Dict, Type, Union
19
 
20
- from web_server.utils import current_timestamp, timestamp_to_date
21
 
22
- from web_server.db.db_models import DB, DataBaseModel
23
- from web_server.db.runtime_config import RuntimeConfig
24
- from web_server.utils.log_utils import getLogger
25
  from enum import Enum
26
 
27
 
@@ -123,9 +123,3 @@ def query_db(model: Type[DataBaseModel], limit: int = 0, offset: int = 0,
123
  data = data.offset(offset)
124
 
125
  return list(data), count
126
-
127
-
128
- class StatusEnum(Enum):
129
- # 样本可用状态
130
- VALID = "1"
131
- IN_VALID = "0"
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
17
  from functools import reduce
18
  from typing import Dict, Type, Union
19
 
20
+ from api.utils import current_timestamp, timestamp_to_date
21
 
22
+ from api.db.db_models import DB, DataBaseModel
23
+ from api.db.runtime_config import RuntimeConfig
24
+ from api.utils.log_utils import getLogger
25
  from enum import Enum
26
 
27
 
 
123
  data = data.offset(offset)
124
 
125
  return list(data), count
 
 
 
 
 
 
api/db/init_data.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -16,10 +16,10 @@
16
  import time
17
  import uuid
18
 
19
- from web_server.db import LLMType
20
- from web_server.db.db_models import init_database_tables as init_web_db
21
- from web_server.db.services import UserService
22
- from web_server.db.services.llm_service import LLMFactoriesService, LLMService
23
 
24
 
25
  def init_superuser():
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
16
  import time
17
  import uuid
18
 
19
+ from api.db import LLMType
20
+ from api.db.db_models import init_database_tables as init_web_db
21
+ from api.db.services import UserService
22
+ from api.db.services.llm_service import LLMFactoriesService, LLMService
23
 
24
 
25
  def init_superuser():
api/db/operatioins.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -17,5 +17,5 @@
17
  import operator
18
  import time
19
  import typing
20
- from web_server.utils.log_utils import sql_logger
21
  import peewee
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
17
  import operator
18
  import time
19
  import typing
20
+ from api.utils.log_utils import sql_logger
21
  import peewee
api/db/reload_config_base.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
api/db/runtime_config.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -13,7 +13,7 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
- from web_server.versions import get_versions
17
  from .reload_config_base import ReloadConfigBase
18
 
19
 
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ from api.versions import get_versions
17
  from .reload_config_base import ReloadConfigBase
18
 
19
 
api/db/services/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
api/db/services/common_service.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -17,8 +17,8 @@ from datetime import datetime
17
 
18
  import peewee
19
 
20
- from web_server.db.db_models import DB
21
- from web_server.utils import datetime_format
22
 
23
 
24
  class CommonService:
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
17
 
18
  import peewee
19
 
20
+ from api.db.db_models import DB
21
+ from api.utils import datetime_format
22
 
23
 
24
  class CommonService:
api/db/services/dialog_service.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -13,14 +13,8 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
- import peewee
17
- from werkzeug.security import generate_password_hash, check_password_hash
18
-
19
- from web_server.db.db_models import DB, UserTenant
20
- from web_server.db.db_models import Dialog, Conversation, DialogKb
21
- from web_server.db.services.common_service import CommonService
22
- from web_server.utils import get_uuid, get_format_time
23
- from web_server.db.db_utils import StatusEnum
24
 
25
 
26
  class DialogService(CommonService):
@@ -29,7 +23,3 @@ class DialogService(CommonService):
29
 
30
  class ConversationService(CommonService):
31
  model = Conversation
32
-
33
-
34
- class DialogKbService(CommonService):
35
- model = DialogKb
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ from api.db.db_models import Dialog, Conversation
17
+ from api.db.services.common_service import CommonService
 
 
 
 
 
 
18
 
19
 
20
  class DialogService(CommonService):
 
23
 
24
  class ConversationService(CommonService):
25
  model = Conversation
 
 
 
 
api/db/services/document_service.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -15,12 +15,12 @@
15
  #
16
  from peewee import Expression
17
 
18
- from web_server.db import TenantPermission, FileType
19
- from web_server.db.db_models import DB, Knowledgebase, Tenant
20
- from web_server.db.db_models import Document
21
- from web_server.db.services.common_service import CommonService
22
- from web_server.db.services.kb_service import KnowledgebaseService
23
- from web_server.db.db_utils import StatusEnum
24
 
25
 
26
  class DocumentService(CommonService):
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
15
  #
16
  from peewee import Expression
17
 
18
+ from api.db import TenantPermission, FileType
19
+ from api.db.db_models import DB, Knowledgebase, Tenant
20
+ from api.db.db_models import Document
21
+ from api.db.services.common_service import CommonService
22
+ from api.db.services.kb_service import KnowledgebaseService
23
+ from api.db import StatusEnum
24
 
25
 
26
  class DocumentService(CommonService):
api/db/services/kb_service.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -13,15 +13,12 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
- import peewee
17
- from werkzeug.security import generate_password_hash, check_password_hash
18
 
19
- from web_server.db import TenantPermission
20
- from web_server.db.db_models import DB, UserTenant, Tenant
21
- from web_server.db.db_models import Knowledgebase
22
- from web_server.db.services.common_service import CommonService
23
- from web_server.utils import get_uuid, get_format_time
24
- from web_server.db.db_utils import StatusEnum
25
 
26
 
27
  class KnowledgebaseService(CommonService):
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
 
16
 
17
+ from api.db import TenantPermission
18
+ from api.db.db_models import DB, Tenant
19
+ from api.db.db_models import Knowledgebase
20
+ from api.db.services.common_service import CommonService
21
+ from api.db import StatusEnum
 
22
 
23
 
24
  class KnowledgebaseService(CommonService):
api/db/services/knowledgebase_service.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -13,14 +13,8 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
- import peewee
17
- from werkzeug.security import generate_password_hash, check_password_hash
18
-
19
- from web_server.db.db_models import DB, UserTenant
20
- from web_server.db.db_models import Knowledgebase, Document
21
- from web_server.db.services.common_service import CommonService
22
- from web_server.utils import get_uuid, get_format_time
23
- from web_server.db.db_utils import StatusEnum
24
 
25
 
26
  class KnowledgebaseService(CommonService):
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ from api.db.db_models import Knowledgebase, Document
17
+ from api.db.services.common_service import CommonService
 
 
 
 
 
 
18
 
19
 
20
  class KnowledgebaseService(CommonService):
api/db/services/llm_service.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -13,15 +13,12 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
- import peewee
17
- from werkzeug.security import generate_password_hash, check_password_hash
18
-
19
  from rag.llm import EmbeddingModel, CvModel
20
- from web_server.db import LLMType
21
- from web_server.db.db_models import DB, UserTenant
22
- from web_server.db.db_models import LLMFactories, LLM, TenantLLM
23
- from web_server.db.services.common_service import CommonService
24
- from web_server.db.db_utils import StatusEnum
25
 
26
 
27
  class LLMFactoriesService(CommonService):
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
 
 
16
  from rag.llm import EmbeddingModel, CvModel
17
+ from api.db import LLMType
18
+ from api.db.db_models import DB, UserTenant
19
+ from api.db.db_models import LLMFactories, LLM, TenantLLM
20
+ from api.db.services.common_service import CommonService
21
+ from api.db import StatusEnum
22
 
23
 
24
  class LLMFactoriesService(CommonService):
api/db/services/user_service.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -16,12 +16,12 @@
16
  import peewee
17
  from werkzeug.security import generate_password_hash, check_password_hash
18
 
19
- from web_server.db import UserTenantRole
20
- from web_server.db.db_models import DB, UserTenant
21
- from web_server.db.db_models import User, Tenant
22
- from web_server.db.services.common_service import CommonService
23
- from web_server.utils import get_uuid, get_format_time
24
- from web_server.db.db_utils import StatusEnum
25
 
26
 
27
  class UserService(CommonService):
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
16
  import peewee
17
  from werkzeug.security import generate_password_hash, check_password_hash
18
 
19
+ from api.db import UserTenantRole
20
+ from api.db.db_models import DB, UserTenant
21
+ from api.db.db_models import User, Tenant
22
+ from api.db.services.common_service import CommonService
23
+ from api.utils import get_uuid, get_format_time
24
+ from api.db import StatusEnum
25
 
26
 
27
  class UserService(CommonService):
api/errors/error_services.py CHANGED
@@ -1,4 +1,4 @@
1
- from web_server.errors import RagFlowError
2
 
3
  __all__ = ['ServicesError', 'ServiceNotSupported', 'ZooKeeperNotConfigured',
4
  'MissingZooKeeperUsernameOrPassword', 'ZooKeeperBackendError']
 
1
+ from api.errors import RagFlowError
2
 
3
  __all__ = ['ServicesError', 'ServiceNotSupported', 'ZooKeeperNotConfigured',
4
  'MissingZooKeeperUsernameOrPassword', 'ZooKeeperBackendError']
api/errors/general_error.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
api/hook/__init__.py CHANGED
@@ -1,8 +1,8 @@
1
  import importlib
2
 
3
- from web_server.hook.common.parameters import SignatureParameters, AuthenticationParameters, \
4
  SignatureReturn, AuthenticationReturn, PermissionReturn, ClientAuthenticationReturn, ClientAuthenticationParameters
5
- from web_server.settings import HOOK_MODULE, stat_logger,RetCode
6
 
7
 
8
  class HookManager:
 
1
  import importlib
2
 
3
+ from api.hook.common.parameters import SignatureParameters, AuthenticationParameters, \
4
  SignatureReturn, AuthenticationReturn, PermissionReturn, ClientAuthenticationReturn, ClientAuthenticationParameters
5
+ from api.settings import HOOK_MODULE, stat_logger,RetCode
6
 
7
 
8
  class HookManager:
api/hook/api/client_authentication.py CHANGED
@@ -1,10 +1,10 @@
1
  import requests
2
 
3
- from web_server.db.service_registry import ServiceRegistry
4
- from web_server.settings import RegistryServiceName
5
- from web_server.hook import HookManager
6
- from web_server.hook.common.parameters import ClientAuthenticationParameters, ClientAuthenticationReturn
7
- from web_server.settings import HOOK_SERVER_NAME
8
 
9
 
10
  @HookManager.register_client_authentication_hook
 
1
  import requests
2
 
3
+ from api.db.service_registry import ServiceRegistry
4
+ from api.settings import RegistryServiceName
5
+ from api.hook import HookManager
6
+ from api.hook.common.parameters import ClientAuthenticationParameters, ClientAuthenticationReturn
7
+ from api.settings import HOOK_SERVER_NAME
8
 
9
 
10
  @HookManager.register_client_authentication_hook
api/hook/api/permission.py CHANGED
@@ -1,10 +1,10 @@
1
  import requests
2
 
3
- from web_server.db.service_registry import ServiceRegistry
4
- from web_server.settings import RegistryServiceName
5
- from web_server.hook import HookManager
6
- from web_server.hook.common.parameters import PermissionCheckParameters, PermissionReturn
7
- from web_server.settings import HOOK_SERVER_NAME
8
 
9
 
10
  @HookManager.register_permission_check_hook
 
1
  import requests
2
 
3
+ from api.db.service_registry import ServiceRegistry
4
+ from api.settings import RegistryServiceName
5
+ from api.hook import HookManager
6
+ from api.hook.common.parameters import PermissionCheckParameters, PermissionReturn
7
+ from api.settings import HOOK_SERVER_NAME
8
 
9
 
10
  @HookManager.register_permission_check_hook
api/hook/api/site_authentication.py CHANGED
@@ -1,11 +1,11 @@
1
  import requests
2
 
3
- from web_server.db.service_registry import ServiceRegistry
4
- from web_server.settings import RegistryServiceName
5
- from web_server.hook import HookManager
6
- from web_server.hook.common.parameters import SignatureParameters, AuthenticationParameters, AuthenticationReturn,\
7
  SignatureReturn
8
- from web_server.settings import HOOK_SERVER_NAME, PARTY_ID
9
 
10
 
11
  @HookManager.register_site_signature_hook
 
1
  import requests
2
 
3
+ from api.db.service_registry import ServiceRegistry
4
+ from api.settings import RegistryServiceName
5
+ from api.hook import HookManager
6
+ from api.hook.common.parameters import SignatureParameters, AuthenticationParameters, AuthenticationReturn,\
7
  SignatureReturn
8
+ from api.settings import HOOK_SERVER_NAME, PARTY_ID
9
 
10
 
11
  @HookManager.register_site_signature_hook
api/hook/common/parameters.py CHANGED
@@ -1,4 +1,4 @@
1
- from web_server.settings import RetCode
2
 
3
 
4
  class ParametersBase:
 
1
+ from api.settings import RetCode
2
 
3
 
4
  class ParametersBase:
api/ragflow_server.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -23,17 +23,17 @@ import traceback
23
 
24
  from werkzeug.serving import run_simple
25
 
26
- from web_server.apps import app
27
- from web_server.db.runtime_config import RuntimeConfig
28
- from web_server.hook import HookManager
29
- from web_server.settings import (
30
  HOST, HTTP_PORT, access_logger, database_logger, stat_logger,
31
  )
32
- from web_server import utils
33
 
34
- from web_server.db.db_models import init_database_tables as init_web_db
35
- from web_server.db.init_data import init_web_data
36
- from web_server.versions import get_versions
37
 
38
  if __name__ == '__main__':
39
  stat_logger.info(
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
23
 
24
  from werkzeug.serving import run_simple
25
 
26
+ from api.apps import app
27
+ from api.db.runtime_config import RuntimeConfig
28
+ from api.hook import HookManager
29
+ from api.settings import (
30
  HOST, HTTP_PORT, access_logger, database_logger, stat_logger,
31
  )
32
+ from api import utils
33
 
34
+ from api.db.db_models import init_database_tables as init_web_db
35
+ from api.db.init_data import init_web_data
36
+ from api.versions import get_versions
37
 
38
  if __name__ == '__main__':
39
  stat_logger.info(
api/settings.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@ import os
17
 
18
  from enum import IntEnum, Enum
19
 
20
- from web_server.utils import get_base_config,decrypt_database_config
21
- from web_server.utils.file_utils import get_project_base_directory
22
- from web_server.utils.log_utils import LoggerFactory, getLogger
23
 
24
 
25
  # Server
@@ -71,7 +71,7 @@ PROXY_PROTOCOL = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("protocol")
71
  DATABASE = decrypt_database_config()
72
 
73
  # Logger
74
- LoggerFactory.set_directory(os.path.join(get_project_base_directory(), "logs", "web_server"))
75
  # {CRITICAL: 50, FATAL:50, ERROR:40, WARNING:30, WARN:30, INFO:20, DEBUG:10, NOTSET:0}
76
  LoggerFactory.LEVEL = 10
77
 
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
17
 
18
  from enum import IntEnum, Enum
19
 
20
+ from api.utils import get_base_config,decrypt_database_config
21
+ from api.utils.file_utils import get_project_base_directory
22
+ from api.utils.log_utils import LoggerFactory, getLogger
23
 
24
 
25
  # Server
 
71
  DATABASE = decrypt_database_config()
72
 
73
  # Logger
74
+ LoggerFactory.set_directory(os.path.join(get_project_base_directory(), "logs", "api"))
75
  # {CRITICAL: 50, FATAL:50, ERROR:40, WARNING:30, WARN:30, INFO:20, DEBUG:10, NOTSET:0}
76
  LoggerFactory.LEVEL = 10
77
 
api/utils/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
api/utils/api_utils.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -24,16 +24,16 @@ from flask import (
24
  )
25
  from werkzeug.http import HTTP_STATUS_CODES
26
 
27
- from web_server.utils import json_dumps
28
- from web_server.versions import get_rag_version
29
- from web_server.settings import RetCode
30
- from web_server.settings import (
31
  REQUEST_MAX_WAIT_SEC, REQUEST_WAIT_SEC,
32
  stat_logger,CLIENT_AUTHENTICATION, HTTP_APP_KEY, SECRET_KEY
33
  )
34
  import requests
35
  import functools
36
- from web_server.utils import CustomJSONEncoder
37
  from uuid import uuid1
38
  from base64 import b64encode
39
  from hmac import HMAC
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
24
  )
25
  from werkzeug.http import HTTP_STATUS_CODES
26
 
27
+ from api.utils import json_dumps
28
+ from api.versions import get_rag_version
29
+ from api.settings import RetCode
30
+ from api.settings import (
31
  REQUEST_MAX_WAIT_SEC, REQUEST_WAIT_SEC,
32
  stat_logger,CLIENT_AUTHENTICATION, HTTP_APP_KEY, SECRET_KEY
33
  )
34
  import requests
35
  import functools
36
+ from api.utils import CustomJSONEncoder
37
  from uuid import uuid1
38
  from base64 import b64encode
39
  from hmac import HMAC
api/utils/file_utils.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -21,7 +21,7 @@ import re
21
  from cachetools import LRUCache, cached
22
  from ruamel.yaml import YAML
23
 
24
- from web_server.db import FileType
25
 
26
  PROJECT_BASE = os.getenv("RAG_PROJECT_BASE") or os.getenv("RAG_DEPLOY_BASE")
27
  RAG_BASE = os.getenv("RAG_BASE")
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
21
  from cachetools import LRUCache, cached
22
  from ruamel.yaml import YAML
23
 
24
+ from api.db import FileType
25
 
26
  PROJECT_BASE = os.getenv("RAG_PROJECT_BASE") or os.getenv("RAG_DEPLOY_BASE")
27
  RAG_BASE = os.getenv("RAG_BASE")
api/utils/log_utils.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -21,7 +21,7 @@ import inspect
21
  from logging.handlers import TimedRotatingFileHandler
22
  from threading import RLock
23
 
24
- from web_server.utils import file_utils
25
 
26
  class LoggerFactory(object):
27
  TYPE = "FILE"
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
21
  from logging.handlers import TimedRotatingFileHandler
22
  from threading import RLock
23
 
24
+ from api.utils import file_utils
25
 
26
  class LoggerFactory(object):
27
  TYPE = "FILE"
api/utils/t_crypt.py CHANGED
@@ -1,7 +1,7 @@
1
  import base64, os, sys
2
  from Cryptodome.PublicKey import RSA
3
  from Cryptodome.Cipher import PKCS1_v1_5 as Cipher_pkcs1_v1_5
4
- from web_server.utils import decrypt, file_utils
5
 
6
  def crypt(line):
7
  file_path = os.path.join(file_utils.get_project_base_directory(), "conf", "public.pem")
 
1
  import base64, os, sys
2
  from Cryptodome.PublicKey import RSA
3
  from Cryptodome.Cipher import PKCS1_v1_5 as Cipher_pkcs1_v1_5
4
+ from api.utils import decrypt, file_utils
5
 
6
  def crypt(line):
7
  file_path = os.path.join(file_utils.get_project_base_directory(), "conf", "public.pem")
api/versions.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -18,7 +18,7 @@ import os
18
  import dotenv
19
  import typing
20
 
21
- from web_server.utils.file_utils import get_project_base_directory
22
 
23
 
24
  def get_versions() -> typing.Mapping[str, typing.Any]:
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
18
  import dotenv
19
  import typing
20
 
21
+ from api.utils.file_utils import get_project_base_directory
22
 
23
 
24
  def get_versions() -> typing.Mapping[str, typing.Any]:
rag/llm/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
rag/llm/chat_model.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
rag/llm/cv_model.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
rag/llm/embedding_model.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -60,6 +60,10 @@ class HuEmbedding(Base):
60
  res.extend(self.model.encode(texts[i:i + batch_size]).tolist())
61
  return np.array(res), token_count
62
 
 
 
 
 
63
 
64
  class OpenAIEmbed(Base):
65
  def __init__(self, key, model_name="text-embedding-ada-002"):
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
60
  res.extend(self.model.encode(texts[i:i + batch_size]).tolist())
61
  return np.array(res), token_count
62
 
63
+ def encode_queries(self, text: str):
64
+ token_count = num_tokens_from_string(text)
65
+ return self.model.encode_queries([text]).tolist()[0], token_count
66
+
67
 
68
  class OpenAIEmbed(Base):
69
  def __init__(self, key, model_name="text-embedding-ada-002"):
rag/nlp/huqie.py CHANGED
@@ -9,7 +9,7 @@ import string
9
  import sys
10
  from hanziconv import HanziConv
11
 
12
- from web_server.utils.file_utils import get_project_base_directory
13
 
14
 
15
  class Huqie:
 
9
  import sys
10
  from hanziconv import HanziConv
11
 
12
+ from api.utils.file_utils import get_project_base_directory
13
 
14
 
15
  class Huqie:
rag/nlp/query.py CHANGED
@@ -147,7 +147,7 @@ class EsQueryer:
147
  atks = toDict(atks)
148
  btkss = [toDict(tks) for tks in btkss]
149
  tksim = [self.similarity(atks, btks) for btks in btkss]
150
- return np.array(sims[0]) * vtweight + np.array(tksim) * tkweight
151
 
152
  def similarity(self, qtwt, dtwt):
153
  if isinstance(dtwt, type("")):
 
147
  atks = toDict(atks)
148
  btkss = [toDict(tks) for tks in btkss]
149
  tksim = [self.similarity(atks, btks) for btks in btkss]
150
+ return np.array(sims[0]) * vtweight + np.array(tksim) * tkweight, sims[0], tksim
151
 
152
  def similarity(self, qtwt, dtwt):
153
  if isinstance(dtwt, type("")):
rag/nlp/search.py CHANGED
@@ -15,7 +15,7 @@ def index_name(uid): return f"ragflow_{uid}"
15
 
16
 
17
  class Dealer:
18
- def __init__(self, es, emb_mdl):
19
  self.qryr = query.EsQueryer(es)
20
  self.qryr.flds = [
21
  "title_tks^10",
@@ -23,7 +23,6 @@ class Dealer:
23
  "content_ltks^2",
24
  "content_sm_ltks"]
25
  self.es = es
26
- self.emb_mdl = emb_mdl
27
 
28
  @dataclass
29
  class SearchResult:
@@ -36,23 +35,26 @@ class Dealer:
36
  keywords: Optional[List[str]] = None
37
  group_docs: List[List] = None
38
 
39
- def _vector(self, txt, sim=0.8, topk=10):
40
- qv = self.emb_mdl.encode_queries(txt)
41
  return {
42
  "field": "q_%d_vec"%len(qv),
43
  "k": topk,
44
  "similarity": sim,
45
- "num_candidates": 1000,
46
  "query_vector": qv
47
  }
48
 
49
- def search(self, req, idxnm, tks_num=3):
50
  qst = req.get("question", "")
51
  bqry, keywords = self.qryr.question(qst)
52
  if req.get("kb_ids"):
53
  bqry.filter.append(Q("terms", kb_id=req["kb_ids"]))
54
  if req.get("doc_ids"):
55
  bqry.filter.append(Q("terms", doc_id=req["doc_ids"]))
 
 
 
56
  bqry.boost = 0.05
57
 
58
  s = Search()
@@ -60,7 +62,7 @@ class Dealer:
60
  ps = int(req.get("size", 1000))
61
  src = req.get("fields", ["docnm_kwd", "content_ltks", "kb_id","img_id",
62
  "image_id", "doc_id", "q_512_vec", "q_768_vec",
63
- "q_1024_vec", "q_1536_vec"])
64
 
65
  s = s.query(bqry)[pg * ps:(pg + 1) * ps]
66
  s = s.highlight("content_ltks")
@@ -80,7 +82,8 @@ class Dealer:
80
  s = s.to_dict()
81
  q_vec = []
82
  if req.get("vector"):
83
- s["knn"] = self._vector(qst, req.get("similarity", 0.4), ps)
 
84
  s["knn"]["filter"] = bqry.to_dict()
85
  if "highlight" in s: del s["highlight"]
86
  q_vec = s["knn"]["query_vector"]
@@ -168,7 +171,7 @@ class Dealer:
168
  def trans2floats(txt):
169
  return [float(t) for t in txt.split("\t")]
170
 
171
- def insert_citations(self, ans, top_idx, sres,
172
  vfield="q_vec", cfield="content_ltks"):
173
 
174
  ins_embd = [Dealer.trans2floats(
@@ -179,15 +182,14 @@ class Dealer:
179
  res = ""
180
 
181
  def citeit():
182
- nonlocal s, e, ans, res
183
  if not ins_embd:
184
  return
185
- embd = self.emb_mdl.encode(ans[s: e])
186
  sim = self.qryr.hybrid_similarity(embd,
187
  ins_embd,
188
  huqie.qie(ans[s:e]).split(" "),
189
  ins_tw)
190
- print(ans[s: e], sim)
191
  mx = np.max(sim) * 0.99
192
  if mx < 0.55:
193
  return
@@ -225,20 +227,18 @@ class Dealer:
225
 
226
  return res
227
 
228
- def rerank(self, sres, query, tkweight=0.3, vtweight=0.7,
229
- vfield="q_vec", cfield="content_ltks"):
230
  ins_embd = [
231
  Dealer.trans2floats(
232
- sres.field[i]["q_vec"]) for i in sres.ids]
233
  if not ins_embd:
234
  return []
235
  ins_tw = [sres.field[i][cfield].split(" ") for i in sres.ids]
236
- # return CosineSimilarity([sres.query_vector], ins_embd)[0]
237
- sim = self.qryr.hybrid_similarity(sres.query_vector,
238
  ins_embd,
239
  huqie.qie(query).split(" "),
240
  ins_tw, tkweight, vtweight)
241
- return sim
242
 
243
 
244
 
 
15
 
16
 
17
  class Dealer:
18
+ def __init__(self, es):
19
  self.qryr = query.EsQueryer(es)
20
  self.qryr.flds = [
21
  "title_tks^10",
 
23
  "content_ltks^2",
24
  "content_sm_ltks"]
25
  self.es = es
 
26
 
27
  @dataclass
28
  class SearchResult:
 
35
  keywords: Optional[List[str]] = None
36
  group_docs: List[List] = None
37
 
38
+ def _vector(self, txt, emb_mdl, sim=0.8, topk=10):
39
+ qv, c = emb_mdl.encode_queries(txt)
40
  return {
41
  "field": "q_%d_vec"%len(qv),
42
  "k": topk,
43
  "similarity": sim,
44
+ "num_candidates": topk*2,
45
  "query_vector": qv
46
  }
47
 
48
+ def search(self, req, idxnm, emb_mdl=None):
49
  qst = req.get("question", "")
50
  bqry, keywords = self.qryr.question(qst)
51
  if req.get("kb_ids"):
52
  bqry.filter.append(Q("terms", kb_id=req["kb_ids"]))
53
  if req.get("doc_ids"):
54
  bqry.filter.append(Q("terms", doc_id=req["doc_ids"]))
55
+ if "available_int" in req:
56
+ if req["available_int"] == 0: bqry.filter.append(Q("range", available_int={"lt": 1}))
57
+ else: bqry.filter.append(Q("bool", must_not=Q("range", available_int={"lt": 1})))
58
  bqry.boost = 0.05
59
 
60
  s = Search()
 
62
  ps = int(req.get("size", 1000))
63
  src = req.get("fields", ["docnm_kwd", "content_ltks", "kb_id","img_id",
64
  "image_id", "doc_id", "q_512_vec", "q_768_vec",
65
+ "q_1024_vec", "q_1536_vec", "available_int"])
66
 
67
  s = s.query(bqry)[pg * ps:(pg + 1) * ps]
68
  s = s.highlight("content_ltks")
 
82
  s = s.to_dict()
83
  q_vec = []
84
  if req.get("vector"):
85
+ assert emb_mdl, "No embedding model selected"
86
+ s["knn"] = self._vector(qst, emb_mdl, req.get("similarity", 0.4), ps)
87
  s["knn"]["filter"] = bqry.to_dict()
88
  if "highlight" in s: del s["highlight"]
89
  q_vec = s["knn"]["query_vector"]
 
171
  def trans2floats(txt):
172
  return [float(t) for t in txt.split("\t")]
173
 
174
+ def insert_citations(self, ans, top_idx, sres, emb_mdl,
175
  vfield="q_vec", cfield="content_ltks"):
176
 
177
  ins_embd = [Dealer.trans2floats(
 
182
  res = ""
183
 
184
  def citeit():
185
+ nonlocal s, e, ans, res, emb_mdl
186
  if not ins_embd:
187
  return
188
+ embd = emb_mdl.encode(ans[s: e])
189
  sim = self.qryr.hybrid_similarity(embd,
190
  ins_embd,
191
  huqie.qie(ans[s:e]).split(" "),
192
  ins_tw)
 
193
  mx = np.max(sim) * 0.99
194
  if mx < 0.55:
195
  return
 
227
 
228
  return res
229
 
230
+ def rerank(self, sres, query, tkweight=0.3, vtweight=0.7, cfield="content_ltks"):
 
231
  ins_embd = [
232
  Dealer.trans2floats(
233
+ sres.field[i]["q_%d_vec"%len(sres.query_vector)]) for i in sres.ids]
234
  if not ins_embd:
235
  return []
236
  ins_tw = [sres.field[i][cfield].split(" ") for i in sres.ids]
237
+ sim, tksim, vtsim = self.qryr.hybrid_similarity(sres.query_vector,
 
238
  ins_embd,
239
  huqie.qie(query).split(" "),
240
  ins_tw, tkweight, vtweight)
241
+ return sim, tksim, vtsim
242
 
243
 
244
 
rag/nlp/synonym.py CHANGED
@@ -4,7 +4,7 @@ import time
4
  import logging
5
  import re
6
 
7
- from web_server.utils.file_utils import get_project_base_directory
8
 
9
 
10
  class Dealer:
 
4
  import logging
5
  import re
6
 
7
+ from api.utils.file_utils import get_project_base_directory
8
 
9
 
10
  class Dealer:
rag/nlp/term_weight.py CHANGED
@@ -5,7 +5,7 @@ import re
5
  import os
6
  import numpy as np
7
  from rag.nlp import huqie
8
- from web_server.utils.file_utils import get_project_base_directory
9
 
10
 
11
  class Dealer:
 
5
  import os
6
  import numpy as np
7
  from rag.nlp import huqie
8
+ from api.utils.file_utils import get_project_base_directory
9
 
10
 
11
  class Dealer:
rag/settings.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -14,9 +14,9 @@
14
  # limitations under the License.
15
  #
16
  import os
17
- from web_server.utils import get_base_config,decrypt_database_config
18
- from web_server.utils.file_utils import get_project_base_directory
19
- from web_server.utils.log_utils import LoggerFactory, getLogger
20
 
21
 
22
  # Server
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
14
  # limitations under the License.
15
  #
16
  import os
17
+ from api.utils import get_base_config,decrypt_database_config
18
+ from api.utils.file_utils import get_project_base_directory
19
+ from api.utils.log_utils import LoggerFactory, getLogger
20
 
21
 
22
  # Server
rag/svr/parse_user_docs.py CHANGED
@@ -1,5 +1,5 @@
1
  #
2
- # Copyright 2019 The RAG Flow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
@@ -47,12 +47,12 @@ from rag.nlp.huchunk import (
47
  PptChunker,
48
  TextChunker
49
  )
50
- from web_server.db import LLMType
51
- from web_server.db.services.document_service import DocumentService
52
- from web_server.db.services.llm_service import TenantLLMService
53
- from web_server.settings import database_logger
54
- from web_server.utils import get_format_time
55
- from web_server.utils.file_utils import get_project_base_directory
56
 
57
  BATCH_SIZE = 64
58
 
@@ -257,7 +257,6 @@ def main(comm, mod):
257
  cron_logger.error(str(e))
258
  continue
259
 
260
-
261
  set_progress(r["id"], random.randint(70, 95) / 100.,
262
  "Finished embedding! Start to build index!")
263
  init_kb(r)
 
1
  #
2
+ # Copyright 2019 The InfiniFlow Authors. All Rights Reserved.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
47
  PptChunker,
48
  TextChunker
49
  )
50
+ from api.db import LLMType
51
+ from api.db.services.document_service import DocumentService
52
+ from api.db.services.llm_service import TenantLLMService
53
+ from api.settings import database_logger
54
+ from api.utils import get_format_time
55
+ from api.utils.file_utils import get_project_base_directory
56
 
57
  BATCH_SIZE = 64
58
 
 
257
  cron_logger.error(str(e))
258
  continue
259
 
 
260
  set_progress(r["id"], random.randint(70, 95) / 100.,
261
  "Finished embedding! Start to build index!")
262
  init_kb(r)
rag/utils/es_conn.py CHANGED
@@ -66,7 +66,6 @@ class HuEs:
66
  body=d,
67
  id=id,
68
  refresh=False,
69
- doc_type="_doc",
70
  retry_on_conflict=100)
71
  es_logger.info("Successfully upsert: %s" % id)
72
  T = True
 
66
  body=d,
67
  id=id,
68
  refresh=False,
 
69
  retry_on_conflict=100)
70
  es_logger.info("Successfully upsert: %s" % id)
71
  T = True