liuhua
liuhua
commited on
Commit
·
533089d
1
Parent(s):
4b50c07
Fix bugs in API (#3103)
Browse files### What problem does this PR solve?
Fix bugs in API
- [x] Bug Fix (non-breaking change which fixes an issue)
Co-authored-by: liuhua <[email protected]>
- api/apps/sdk/chat.py +26 -5
- api/apps/sdk/dataset.py +37 -22
- api/apps/sdk/doc.py +38 -18
- api/apps/sdk/session.py +18 -7
- api/utils/api_utils.py +1 -1
- sdk/python/ragflow/modules/chat.py +1 -1
- sdk/python/ragflow/ragflow.py +3 -3
api/apps/sdk/chat.py
CHANGED
@@ -14,7 +14,7 @@
|
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
from flask import request
|
17 |
-
|
18 |
from api.db import StatusEnum
|
19 |
from api.db.services.dialog_service import DialogService
|
20 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
@@ -40,6 +40,10 @@ def create(tenant_id):
|
|
40 |
kb=kbs[0]
|
41 |
if kb.chunk_num == 0:
|
42 |
return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
|
|
|
|
|
|
|
|
|
43 |
req["kb_ids"] = ids
|
44 |
# llm
|
45 |
llm = req.get("llm")
|
@@ -149,6 +153,8 @@ def update(tenant_id,chat_id):
|
|
149 |
return get_error_data_result(retmsg='You do not own the chat')
|
150 |
req =request.json
|
151 |
ids = req.get("dataset_ids")
|
|
|
|
|
152 |
if "dataset_ids" in req:
|
153 |
if not ids:
|
154 |
return get_error_data_result("`datasets` can't be empty")
|
@@ -160,6 +166,12 @@ def update(tenant_id,chat_id):
|
|
160 |
kb = kbs[0]
|
161 |
if kb.chunk_num == 0:
|
162 |
return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
req["kb_ids"] = ids
|
164 |
llm = req.get("llm")
|
165 |
if llm:
|
@@ -225,10 +237,18 @@ def update(tenant_id,chat_id):
|
|
225 |
@token_required
|
226 |
def delete(tenant_id):
|
227 |
req = request.json
|
228 |
-
|
|
|
|
|
|
|
229 |
if not ids:
|
230 |
-
|
231 |
-
|
|
|
|
|
|
|
|
|
|
|
232 |
if not DialogService.query(tenant_id=tenant_id, id=id, status=StatusEnum.VALID.value):
|
233 |
return get_error_data_result(retmsg=f"You don't own the chat {id}")
|
234 |
temp_dict = {"status": StatusEnum.INVALID.value}
|
@@ -260,7 +280,8 @@ def list_chat(tenant_id):
|
|
260 |
"quote": "show_quote",
|
261 |
"system": "prompt",
|
262 |
"rerank_id": "rerank_model",
|
263 |
-
"vector_similarity_weight": "keywords_similarity_weight"
|
|
|
264 |
key_list = ["similarity_threshold", "vector_similarity_weight", "top_n", "rerank_id"]
|
265 |
for res in chats:
|
266 |
for key, value in res["prompt_config"].items():
|
|
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
from flask import request
|
17 |
+
from api.settings import RetCode
|
18 |
from api.db import StatusEnum
|
19 |
from api.db.services.dialog_service import DialogService
|
20 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
|
|
40 |
kb=kbs[0]
|
41 |
if kb.chunk_num == 0:
|
42 |
return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
|
43 |
+
kbs = KnowledgebaseService.get_by_ids(ids)
|
44 |
+
embd_count = list(set(kb.embd_id for kb in kbs))
|
45 |
+
if embd_count != 1:
|
46 |
+
return get_result(retmsg='Datasets use different embedding models."',retcode=RetCode.AUTHENTICATION_ERROR)
|
47 |
req["kb_ids"] = ids
|
48 |
# llm
|
49 |
llm = req.get("llm")
|
|
|
153 |
return get_error_data_result(retmsg='You do not own the chat')
|
154 |
req =request.json
|
155 |
ids = req.get("dataset_ids")
|
156 |
+
if "show_quotation" in req:
|
157 |
+
req["do_refer"]=req.pop("show_quotation")
|
158 |
if "dataset_ids" in req:
|
159 |
if not ids:
|
160 |
return get_error_data_result("`datasets` can't be empty")
|
|
|
166 |
kb = kbs[0]
|
167 |
if kb.chunk_num == 0:
|
168 |
return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
|
169 |
+
kbs = KnowledgebaseService.get_by_ids(ids)
|
170 |
+
embd_count=list(set(kb.embd_id for kb in kbs))
|
171 |
+
if embd_count != 1 :
|
172 |
+
return get_result(
|
173 |
+
retmsg='Datasets use different embedding models."',
|
174 |
+
retcode=RetCode.AUTHENTICATION_ERROR)
|
175 |
req["kb_ids"] = ids
|
176 |
llm = req.get("llm")
|
177 |
if llm:
|
|
|
237 |
@token_required
|
238 |
def delete(tenant_id):
|
239 |
req = request.json
|
240 |
+
if not req:
|
241 |
+
ids=None
|
242 |
+
else:
|
243 |
+
ids=req.get("ids")
|
244 |
if not ids:
|
245 |
+
id_list = []
|
246 |
+
dias=DialogService.query(tenant_id=tenant_id,status=StatusEnum.VALID.value)
|
247 |
+
for dia in dias:
|
248 |
+
id_list.append(dia.id)
|
249 |
+
else:
|
250 |
+
id_list=ids
|
251 |
+
for id in id_list:
|
252 |
if not DialogService.query(tenant_id=tenant_id, id=id, status=StatusEnum.VALID.value):
|
253 |
return get_error_data_result(retmsg=f"You don't own the chat {id}")
|
254 |
temp_dict = {"status": StatusEnum.INVALID.value}
|
|
|
280 |
"quote": "show_quote",
|
281 |
"system": "prompt",
|
282 |
"rerank_id": "rerank_model",
|
283 |
+
"vector_similarity_weight": "keywords_similarity_weight",
|
284 |
+
"do_refer":"show_quotation"}
|
285 |
key_list = ["similarity_threshold", "vector_similarity_weight", "top_n", "rerank_id"]
|
286 |
for res in chats:
|
287 |
for key, value in res["prompt_config"].items():
|
api/apps/sdk/dataset.py
CHANGED
@@ -21,7 +21,7 @@ from api.db.services.document_service import DocumentService
|
|
21 |
from api.db.services.file2document_service import File2DocumentService
|
22 |
from api.db.services.file_service import FileService
|
23 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
24 |
-
from api.db.services.llm_service import TenantLLMService
|
25 |
from api.db.services.user_service import TenantService
|
26 |
from api.settings import RetCode
|
27 |
from api.utils import get_uuid
|
@@ -68,9 +68,12 @@ def create(tenant_id):
|
|
68 |
"BAAI/bge-small-zh-v1.5","jinaai/jina-embeddings-v2-base-en","jinaai/jina-embeddings-v2-small-en",
|
69 |
"nomic-ai/nomic-embed-text-v1.5","sentence-transformers/all-MiniLM-L6-v2","text-embedding-v2",
|
70 |
"text-embedding-v3","maidalun1020/bce-embedding-base_v1"]
|
71 |
-
|
72 |
-
|
73 |
return get_error_data_result(f"`embedding_model` {req.get('embedding_model')} doesn't exist")
|
|
|
|
|
|
|
74 |
key_mapping = {
|
75 |
"chunk_num": "chunk_count",
|
76 |
"doc_num": "document_count",
|
@@ -92,25 +95,32 @@ def create(tenant_id):
|
|
92 |
@token_required
|
93 |
def delete(tenant_id):
|
94 |
req = request.json
|
95 |
-
|
|
|
|
|
|
|
96 |
if not ids:
|
97 |
-
|
98 |
-
|
99 |
-
|
|
|
|
|
|
|
|
|
100 |
kbs = KnowledgebaseService.query(id=id, tenant_id=tenant_id)
|
101 |
if not kbs:
|
102 |
return get_error_data_result(retmsg=f"You don't own the dataset {id}")
|
103 |
-
|
104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
return get_error_data_result(
|
106 |
-
retmsg="
|
107 |
-
|
108 |
-
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
|
109 |
-
File2DocumentService.delete_by_document_id(doc.id)
|
110 |
-
if not KnowledgebaseService.delete_by_id(id):
|
111 |
-
return get_error_data_result(
|
112 |
-
retmsg="Delete dataset error.(Database error)")
|
113 |
-
return get_result(retcode=RetCode.SUCCESS)
|
114 |
|
115 |
@manager.route('/datasets/<dataset_id>', methods=['PUT'])
|
116 |
@token_required
|
@@ -139,8 +149,9 @@ def update(tenant_id,dataset_id):
|
|
139 |
retmsg="Can't change `tenant_id`.")
|
140 |
e, kb = KnowledgebaseService.get_by_id(dataset_id)
|
141 |
if "parser_config" in req:
|
142 |
-
|
143 |
-
|
|
|
144 |
if "chunk_count" in req:
|
145 |
if req["chunk_count"] != kb.chunk_num:
|
146 |
return get_error_data_result(
|
@@ -157,7 +168,8 @@ def update(tenant_id,dataset_id):
|
|
157 |
retmsg="If `chunk_count` is not 0, `chunk_method` is not changeable.")
|
158 |
req['parser_id'] = req.pop('chunk_method')
|
159 |
if req['parser_id'] != kb.parser_id:
|
160 |
-
req
|
|
|
161 |
if "embedding_model" in req:
|
162 |
if kb.chunk_num != 0 and req['embedding_model'] != kb.embd_id:
|
163 |
return get_error_data_result(
|
@@ -168,9 +180,12 @@ def update(tenant_id,dataset_id):
|
|
168 |
"BAAI/bge-small-zh-v1.5","jinaai/jina-embeddings-v2-base-en","jinaai/jina-embeddings-v2-small-en",
|
169 |
"nomic-ai/nomic-embed-text-v1.5","sentence-transformers/all-MiniLM-L6-v2","text-embedding-v2",
|
170 |
"text-embedding-v3","maidalun1020/bce-embedding-base_v1"]
|
171 |
-
|
172 |
-
|
173 |
return get_error_data_result(f"`embedding_model` {req.get('embedding_model')} doesn't exist")
|
|
|
|
|
|
|
174 |
req['embd_id'] = req.pop('embedding_model')
|
175 |
if "name" in req:
|
176 |
req["name"] = req["name"].strip()
|
|
|
21 |
from api.db.services.file2document_service import File2DocumentService
|
22 |
from api.db.services.file_service import FileService
|
23 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
24 |
+
from api.db.services.llm_service import TenantLLMService,LLMService
|
25 |
from api.db.services.user_service import TenantService
|
26 |
from api.settings import RetCode
|
27 |
from api.utils import get_uuid
|
|
|
68 |
"BAAI/bge-small-zh-v1.5","jinaai/jina-embeddings-v2-base-en","jinaai/jina-embeddings-v2-small-en",
|
69 |
"nomic-ai/nomic-embed-text-v1.5","sentence-transformers/all-MiniLM-L6-v2","text-embedding-v2",
|
70 |
"text-embedding-v3","maidalun1020/bce-embedding-base_v1"]
|
71 |
+
embd_model=LLMService.query(llm_name=req["embedding_model"],model_type="embedding")
|
72 |
+
if not embd_model:
|
73 |
return get_error_data_result(f"`embedding_model` {req.get('embedding_model')} doesn't exist")
|
74 |
+
if embd_model:
|
75 |
+
if req["embedding_model"] not in valid_embedding_models and not TenantLLMService.query(tenant_id=tenant_id,model_type="embedding", llm_name=req.get("embedding_model")):
|
76 |
+
return get_error_data_result(f"`embedding_model` {req.get('embedding_model')} doesn't exist")
|
77 |
key_mapping = {
|
78 |
"chunk_num": "chunk_count",
|
79 |
"doc_num": "document_count",
|
|
|
95 |
@token_required
|
96 |
def delete(tenant_id):
|
97 |
req = request.json
|
98 |
+
if not req:
|
99 |
+
ids=None
|
100 |
+
else:
|
101 |
+
ids=req.get("ids")
|
102 |
if not ids:
|
103 |
+
id_list = []
|
104 |
+
kbs=KnowledgebaseService.query(tenant_id=tenant_id)
|
105 |
+
for kb in kbs:
|
106 |
+
id_list.append(kb.id)
|
107 |
+
else:
|
108 |
+
id_list=ids
|
109 |
+
for id in id_list:
|
110 |
kbs = KnowledgebaseService.query(id=id, tenant_id=tenant_id)
|
111 |
if not kbs:
|
112 |
return get_error_data_result(retmsg=f"You don't own the dataset {id}")
|
113 |
+
for doc in DocumentService.query(kb_id=id):
|
114 |
+
if not DocumentService.remove_document(doc, tenant_id):
|
115 |
+
return get_error_data_result(
|
116 |
+
retmsg="Remove document error.(Database error)")
|
117 |
+
f2d = File2DocumentService.get_by_document_id(doc.id)
|
118 |
+
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
|
119 |
+
File2DocumentService.delete_by_document_id(doc.id)
|
120 |
+
if not KnowledgebaseService.delete_by_id(id):
|
121 |
return get_error_data_result(
|
122 |
+
retmsg="Delete dataset error.(Database error)")
|
123 |
+
return get_result(retcode=RetCode.SUCCESS)
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
|
125 |
@manager.route('/datasets/<dataset_id>', methods=['PUT'])
|
126 |
@token_required
|
|
|
149 |
retmsg="Can't change `tenant_id`.")
|
150 |
e, kb = KnowledgebaseService.get_by_id(dataset_id)
|
151 |
if "parser_config" in req:
|
152 |
+
temp_dict=kb.parser_config
|
153 |
+
temp_dict.update(req["parser_config"])
|
154 |
+
req["parser_config"] = temp_dict
|
155 |
if "chunk_count" in req:
|
156 |
if req["chunk_count"] != kb.chunk_num:
|
157 |
return get_error_data_result(
|
|
|
168 |
retmsg="If `chunk_count` is not 0, `chunk_method` is not changeable.")
|
169 |
req['parser_id'] = req.pop('chunk_method')
|
170 |
if req['parser_id'] != kb.parser_id:
|
171 |
+
if not req.get("parser_config"):
|
172 |
+
req["parser_config"] = get_parser_config(chunk_method, parser_config)
|
173 |
if "embedding_model" in req:
|
174 |
if kb.chunk_num != 0 and req['embedding_model'] != kb.embd_id:
|
175 |
return get_error_data_result(
|
|
|
180 |
"BAAI/bge-small-zh-v1.5","jinaai/jina-embeddings-v2-base-en","jinaai/jina-embeddings-v2-small-en",
|
181 |
"nomic-ai/nomic-embed-text-v1.5","sentence-transformers/all-MiniLM-L6-v2","text-embedding-v2",
|
182 |
"text-embedding-v3","maidalun1020/bce-embedding-base_v1"]
|
183 |
+
embd_model=LLMService.query(llm_name=req["embedding_model"],model_type="embedding")
|
184 |
+
if not embd_model:
|
185 |
return get_error_data_result(f"`embedding_model` {req.get('embedding_model')} doesn't exist")
|
186 |
+
if embd_model:
|
187 |
+
if req["embedding_model"] not in valid_embedding_models and not TenantLLMService.query(tenant_id=tenant_id,model_type="embedding", llm_name=req.get("embedding_model")):
|
188 |
+
return get_error_data_result(f"`embedding_model` {req.get('embedding_model')} doesn't exist")
|
189 |
req['embd_id'] = req.pop('embedding_model')
|
190 |
if "name" in req:
|
191 |
req["name"] = req["name"].strip()
|
api/apps/sdk/doc.py
CHANGED
@@ -46,6 +46,9 @@ from rag.utils.es_conn import ELASTICSEARCH
|
|
46 |
from rag.utils.storage_factory import STORAGE_IMPL
|
47 |
import os
|
48 |
|
|
|
|
|
|
|
49 |
|
50 |
@manager.route('/datasets/<dataset_id>/documents', methods=['POST'])
|
51 |
@token_required
|
@@ -58,11 +61,21 @@ def upload(dataset_id, tenant_id):
|
|
58 |
if file_obj.filename == '':
|
59 |
return get_result(
|
60 |
retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
e, kb = KnowledgebaseService.get_by_id(dataset_id)
|
63 |
if not e:
|
64 |
raise LookupError(f"Can't find the dataset with ID {dataset_id}!")
|
65 |
-
err, files
|
66 |
if err:
|
67 |
return get_result(
|
68 |
retmsg="\n".join(err), retcode=RetCode.SERVER_ERROR)
|
@@ -140,6 +153,7 @@ def update_doc(tenant_id, dataset_id, document_id):
|
|
140 |
if not e:
|
141 |
return get_error_data_result(retmsg="Document not found!")
|
142 |
req["parser_config"] = get_parser_config(req["chunk_method"], req.get("parser_config"))
|
|
|
143 |
if doc.token_num > 0:
|
144 |
e = DocumentService.increment_chunk_num(doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1,
|
145 |
doc.process_duation * -1)
|
@@ -210,10 +224,10 @@ def list_docs(dataset_id, tenant_id):
|
|
210 |
}
|
211 |
renamed_doc = {}
|
212 |
for key, value in doc.items():
|
213 |
-
if key =="run":
|
214 |
-
renamed_doc["run"]=run_mapping.get(str(value))
|
215 |
new_key = key_mapping.get(key, key)
|
216 |
renamed_doc[new_key] = value
|
|
|
|
|
217 |
renamed_doc_list.append(renamed_doc)
|
218 |
return get_result(data={"total": tol, "docs": renamed_doc_list})
|
219 |
|
@@ -280,14 +294,11 @@ def parse(tenant_id,dataset_id):
|
|
280 |
doc = DocumentService.query(id=id,kb_id=dataset_id)
|
281 |
if not doc:
|
282 |
return get_error_data_result(retmsg=f"You don't own the document {id}.")
|
283 |
-
if doc[0].progress != 0.0:
|
284 |
-
return get_error_data_result("Can't stop parsing document with progress at 0 or 100")
|
285 |
info = {"run": "1", "progress": 0}
|
286 |
info["progress_msg"] = ""
|
287 |
info["chunk_num"] = 0
|
288 |
info["token_num"] = 0
|
289 |
DocumentService.update_by_id(id, info)
|
290 |
-
# if str(req["run"]) == TaskStatus.CANCEL.value:
|
291 |
ELASTICSEARCH.deleteByQuery(
|
292 |
Q("match", doc_id=id), idxnm=search.index_name(tenant_id))
|
293 |
TaskService.filter_delete([Task.doc_id == id])
|
@@ -312,10 +323,8 @@ def stop_parsing(tenant_id,dataset_id):
|
|
312 |
return get_error_data_result(retmsg=f"You don't own the document {id}.")
|
313 |
if doc[0].progress == 100.0 or doc[0].progress == 0.0:
|
314 |
return get_error_data_result("Can't stop parsing document with progress at 0 or 100")
|
315 |
-
info = {"run": "2", "progress": 0}
|
316 |
DocumentService.update_by_id(id, info)
|
317 |
-
# if str(req["run"]) == TaskStatus.CANCEL.value:
|
318 |
-
tenant_id = DocumentService.get_tenant_id(id)
|
319 |
ELASTICSEARCH.deleteByQuery(
|
320 |
Q("match", doc_id=id), idxnm=search.index_name(tenant_id))
|
321 |
return get_result()
|
@@ -355,10 +364,10 @@ def list_chunks(tenant_id,dataset_id,document_id):
|
|
355 |
doc=doc.to_dict()
|
356 |
renamed_doc = {}
|
357 |
for key, value in doc.items():
|
358 |
-
if key == "run":
|
359 |
-
renamed_doc["run"] = run_mapping.get(str(value))
|
360 |
new_key = key_mapping.get(key, key)
|
361 |
renamed_doc[new_key] = value
|
|
|
|
|
362 |
res = {"total": sres.total, "chunks": [], "doc": renamed_doc}
|
363 |
origin_chunks = []
|
364 |
sign = 0
|
@@ -398,12 +407,17 @@ def list_chunks(tenant_id,dataset_id,document_id):
|
|
398 |
"content_with_weight": "content",
|
399 |
"doc_id": "document_id",
|
400 |
"important_kwd": "important_keywords",
|
401 |
-
"img_id": "image_id"
|
|
|
402 |
}
|
403 |
renamed_chunk = {}
|
404 |
for key, value in chunk.items():
|
405 |
new_key = key_mapping.get(key, key)
|
406 |
renamed_chunk[new_key] = value
|
|
|
|
|
|
|
|
|
407 |
res["chunks"].append(renamed_chunk)
|
408 |
return get_result(data=res)
|
409 |
|
@@ -441,7 +455,7 @@ def add_chunk(tenant_id,dataset_id,document_id):
|
|
441 |
embd_id = DocumentService.get_embd_id(document_id)
|
442 |
embd_mdl = TenantLLMService.model_instance(
|
443 |
tenant_id, LLMType.EMBEDDING.value, embd_id)
|
444 |
-
|
445 |
v, c = embd_mdl.encode([doc.name, req["content"]])
|
446 |
v = 0.1 * v[0] + 0.9 * v[1]
|
447 |
d["q_%d_vec" % len(v)] = v.tolist()
|
@@ -459,7 +473,7 @@ def add_chunk(tenant_id,dataset_id,document_id):
|
|
459 |
"kb_id": "dataset_id",
|
460 |
"create_timestamp_flt": "create_timestamp",
|
461 |
"create_time": "create_time",
|
462 |
-
"document_keyword": "document"
|
463 |
}
|
464 |
renamed_chunk = {}
|
465 |
for key, value in d.items():
|
@@ -480,12 +494,18 @@ def rm_chunk(tenant_id,dataset_id,document_id):
|
|
480 |
return get_error_data_result(retmsg=f"You don't own the document {document_id}.")
|
481 |
doc = doc[0]
|
482 |
req = request.json
|
483 |
-
if not req.get("chunk_ids"):
|
484 |
-
return get_error_data_result("`chunk_ids` is required")
|
485 |
query = {
|
486 |
"doc_ids": [doc.id], "page": 1, "size": 1024, "question": "", "sort": True}
|
487 |
sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True)
|
488 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
489 |
if chunk_id not in sres.ids:
|
490 |
return get_error_data_result(f"Chunk {chunk_id} not found")
|
491 |
if not ELASTICSEARCH.deleteByQuery(
|
|
|
46 |
from rag.utils.storage_factory import STORAGE_IMPL
|
47 |
import os
|
48 |
|
49 |
+
MAXIMUM_OF_UPLOADING_FILES = 256
|
50 |
+
|
51 |
+
|
52 |
|
53 |
@manager.route('/datasets/<dataset_id>/documents', methods=['POST'])
|
54 |
@token_required
|
|
|
61 |
if file_obj.filename == '':
|
62 |
return get_result(
|
63 |
retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
|
64 |
+
# total size
|
65 |
+
total_size = 0
|
66 |
+
for file_obj in file_objs:
|
67 |
+
file_obj.seek(0, os.SEEK_END)
|
68 |
+
total_size += file_obj.tell()
|
69 |
+
file_obj.seek(0)
|
70 |
+
MAX_TOTAL_FILE_SIZE=10*1024*1024
|
71 |
+
if total_size > MAX_TOTAL_FILE_SIZE:
|
72 |
+
return get_result(
|
73 |
+
retmsg=f'Total file size exceeds 10MB limit! ({total_size / (1024 * 1024):.2f} MB)',
|
74 |
+
retcode=RetCode.ARGUMENT_ERROR)
|
75 |
e, kb = KnowledgebaseService.get_by_id(dataset_id)
|
76 |
if not e:
|
77 |
raise LookupError(f"Can't find the dataset with ID {dataset_id}!")
|
78 |
+
err, files= FileService.upload_document(kb, file_objs, tenant_id)
|
79 |
if err:
|
80 |
return get_result(
|
81 |
retmsg="\n".join(err), retcode=RetCode.SERVER_ERROR)
|
|
|
153 |
if not e:
|
154 |
return get_error_data_result(retmsg="Document not found!")
|
155 |
req["parser_config"] = get_parser_config(req["chunk_method"], req.get("parser_config"))
|
156 |
+
DocumentService.update_parser_config(doc.id, req["parser_config"])
|
157 |
if doc.token_num > 0:
|
158 |
e = DocumentService.increment_chunk_num(doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1,
|
159 |
doc.process_duation * -1)
|
|
|
224 |
}
|
225 |
renamed_doc = {}
|
226 |
for key, value in doc.items():
|
|
|
|
|
227 |
new_key = key_mapping.get(key, key)
|
228 |
renamed_doc[new_key] = value
|
229 |
+
if key =="run":
|
230 |
+
renamed_doc["run"]=run_mapping.get(value)
|
231 |
renamed_doc_list.append(renamed_doc)
|
232 |
return get_result(data={"total": tol, "docs": renamed_doc_list})
|
233 |
|
|
|
294 |
doc = DocumentService.query(id=id,kb_id=dataset_id)
|
295 |
if not doc:
|
296 |
return get_error_data_result(retmsg=f"You don't own the document {id}.")
|
|
|
|
|
297 |
info = {"run": "1", "progress": 0}
|
298 |
info["progress_msg"] = ""
|
299 |
info["chunk_num"] = 0
|
300 |
info["token_num"] = 0
|
301 |
DocumentService.update_by_id(id, info)
|
|
|
302 |
ELASTICSEARCH.deleteByQuery(
|
303 |
Q("match", doc_id=id), idxnm=search.index_name(tenant_id))
|
304 |
TaskService.filter_delete([Task.doc_id == id])
|
|
|
323 |
return get_error_data_result(retmsg=f"You don't own the document {id}.")
|
324 |
if doc[0].progress == 100.0 or doc[0].progress == 0.0:
|
325 |
return get_error_data_result("Can't stop parsing document with progress at 0 or 100")
|
326 |
+
info = {"run": "2", "progress": 0,"chunk_num":0}
|
327 |
DocumentService.update_by_id(id, info)
|
|
|
|
|
328 |
ELASTICSEARCH.deleteByQuery(
|
329 |
Q("match", doc_id=id), idxnm=search.index_name(tenant_id))
|
330 |
return get_result()
|
|
|
364 |
doc=doc.to_dict()
|
365 |
renamed_doc = {}
|
366 |
for key, value in doc.items():
|
|
|
|
|
367 |
new_key = key_mapping.get(key, key)
|
368 |
renamed_doc[new_key] = value
|
369 |
+
if key == "run":
|
370 |
+
renamed_doc["run"] = run_mapping.get(str(value))
|
371 |
res = {"total": sres.total, "chunks": [], "doc": renamed_doc}
|
372 |
origin_chunks = []
|
373 |
sign = 0
|
|
|
407 |
"content_with_weight": "content",
|
408 |
"doc_id": "document_id",
|
409 |
"important_kwd": "important_keywords",
|
410 |
+
"img_id": "image_id",
|
411 |
+
"available_int":"available"
|
412 |
}
|
413 |
renamed_chunk = {}
|
414 |
for key, value in chunk.items():
|
415 |
new_key = key_mapping.get(key, key)
|
416 |
renamed_chunk[new_key] = value
|
417 |
+
if renamed_chunk["available"] == "0":
|
418 |
+
renamed_chunk["available"] = False
|
419 |
+
if renamed_chunk["available"] == "1":
|
420 |
+
renamed_chunk["available"] = True
|
421 |
res["chunks"].append(renamed_chunk)
|
422 |
return get_result(data=res)
|
423 |
|
|
|
455 |
embd_id = DocumentService.get_embd_id(document_id)
|
456 |
embd_mdl = TenantLLMService.model_instance(
|
457 |
tenant_id, LLMType.EMBEDDING.value, embd_id)
|
458 |
+
print(embd_mdl,flush=True)
|
459 |
v, c = embd_mdl.encode([doc.name, req["content"]])
|
460 |
v = 0.1 * v[0] + 0.9 * v[1]
|
461 |
d["q_%d_vec" % len(v)] = v.tolist()
|
|
|
473 |
"kb_id": "dataset_id",
|
474 |
"create_timestamp_flt": "create_timestamp",
|
475 |
"create_time": "create_time",
|
476 |
+
"document_keyword": "document"
|
477 |
}
|
478 |
renamed_chunk = {}
|
479 |
for key, value in d.items():
|
|
|
494 |
return get_error_data_result(retmsg=f"You don't own the document {document_id}.")
|
495 |
doc = doc[0]
|
496 |
req = request.json
|
|
|
|
|
497 |
query = {
|
498 |
"doc_ids": [doc.id], "page": 1, "size": 1024, "question": "", "sort": True}
|
499 |
sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True)
|
500 |
+
if not req:
|
501 |
+
chunk_ids=None
|
502 |
+
else:
|
503 |
+
chunk_ids=req.get("chunk_ids")
|
504 |
+
if not chunk_ids:
|
505 |
+
chunk_list=sres.ids
|
506 |
+
else:
|
507 |
+
chunk_list=chunk_ids
|
508 |
+
for chunk_id in chunk_list:
|
509 |
if chunk_id not in sres.ids:
|
510 |
return get_error_data_result(f"Chunk {chunk_id} not found")
|
511 |
if not ELASTICSEARCH.deleteByQuery(
|
api/apps/sdk/session.py
CHANGED
@@ -100,7 +100,7 @@ def completion(tenant_id,chat_id):
|
|
100 |
return get_error_data_result(retmsg="Session does not exist")
|
101 |
conv = conv[0]
|
102 |
if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
103 |
-
return get_error_data_result(retmsg="You do not own the
|
104 |
msg = []
|
105 |
question = {
|
106 |
"content": req.get("question"),
|
@@ -168,9 +168,6 @@ def list(chat_id,tenant_id):
|
|
168 |
return get_error_data_result(retmsg=f"You don't own the assistant {chat_id}.")
|
169 |
id = request.args.get("id")
|
170 |
name = request.args.get("name")
|
171 |
-
session = ConversationService.query(id=id,name=name,dialog_id=chat_id)
|
172 |
-
if not session:
|
173 |
-
return get_error_data_result(retmsg="The session doesn't exist")
|
174 |
page_number = int(request.args.get("page", 1))
|
175 |
items_per_page = int(request.args.get("page_size", 1024))
|
176 |
orderby = request.args.get("orderby", "create_time")
|
@@ -183,6 +180,10 @@ def list(chat_id,tenant_id):
|
|
183 |
return get_result(data=[])
|
184 |
for conv in convs:
|
185 |
conv['messages'] = conv.pop("message")
|
|
|
|
|
|
|
|
|
186 |
conv["chat"] = conv.pop("dialog_id")
|
187 |
if conv["reference"]:
|
188 |
messages = conv["messages"]
|
@@ -218,10 +219,20 @@ def list(chat_id,tenant_id):
|
|
218 |
def delete(tenant_id,chat_id):
|
219 |
if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
220 |
return get_error_data_result(retmsg="You don't own the chat")
|
221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
222 |
if not ids:
|
223 |
-
|
224 |
-
|
|
|
|
|
|
|
|
|
225 |
conv = ConversationService.query(id=id,dialog_id=chat_id)
|
226 |
if not conv:
|
227 |
return get_error_data_result(retmsg="The chat doesn't own the session")
|
|
|
100 |
return get_error_data_result(retmsg="Session does not exist")
|
101 |
conv = conv[0]
|
102 |
if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
103 |
+
return get_error_data_result(retmsg="You do not own the chat")
|
104 |
msg = []
|
105 |
question = {
|
106 |
"content": req.get("question"),
|
|
|
168 |
return get_error_data_result(retmsg=f"You don't own the assistant {chat_id}.")
|
169 |
id = request.args.get("id")
|
170 |
name = request.args.get("name")
|
|
|
|
|
|
|
171 |
page_number = int(request.args.get("page", 1))
|
172 |
items_per_page = int(request.args.get("page_size", 1024))
|
173 |
orderby = request.args.get("orderby", "create_time")
|
|
|
180 |
return get_result(data=[])
|
181 |
for conv in convs:
|
182 |
conv['messages'] = conv.pop("message")
|
183 |
+
infos = conv["messages"]
|
184 |
+
for info in infos:
|
185 |
+
if "prompt" in info:
|
186 |
+
info.pop("prompt")
|
187 |
conv["chat"] = conv.pop("dialog_id")
|
188 |
if conv["reference"]:
|
189 |
messages = conv["messages"]
|
|
|
219 |
def delete(tenant_id,chat_id):
|
220 |
if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
221 |
return get_error_data_result(retmsg="You don't own the chat")
|
222 |
+
req = request.json
|
223 |
+
convs = ConversationService.query(dialog_id=chat_id)
|
224 |
+
if not req:
|
225 |
+
ids = None
|
226 |
+
else:
|
227 |
+
ids=req.get("ids")
|
228 |
+
|
229 |
if not ids:
|
230 |
+
conv_list = []
|
231 |
+
for conv in convs:
|
232 |
+
conv_list.append(conv.id)
|
233 |
+
else:
|
234 |
+
conv_list=ids
|
235 |
+
for id in conv_list:
|
236 |
conv = ConversationService.query(id=id,dialog_id=chat_id)
|
237 |
if not conv:
|
238 |
return get_error_data_result(retmsg="The chat doesn't own the session")
|
api/utils/api_utils.py
CHANGED
@@ -344,7 +344,7 @@ def get_parser_config(chunk_method,parser_config):
|
|
344 |
return parser_config
|
345 |
if not chunk_method:
|
346 |
chunk_method = "naive"
|
347 |
-
key_mapping={"naive":{"chunk_token_num": 128, "delimiter": "\\n!?;。;!?", "html4excel": False,"layout_recognize": True, "raptor": {"
|
348 |
"qa":{"raptor":{"use_raptor":False}},
|
349 |
"resume":None,
|
350 |
"manual":{"raptor":{"use_raptor":False}},
|
|
|
344 |
return parser_config
|
345 |
if not chunk_method:
|
346 |
chunk_method = "naive"
|
347 |
+
key_mapping={"naive":{"chunk_token_num": 128, "delimiter": "\\n!?;。;!?", "html4excel": False,"layout_recognize": True, "raptor": {"use_raptor": False}},
|
348 |
"qa":{"raptor":{"use_raptor":False}},
|
349 |
"resume":None,
|
350 |
"manual":{"raptor":{"use_raptor":False}},
|
sdk/python/ragflow/modules/chat.py
CHANGED
@@ -68,7 +68,7 @@ class Chat(Base):
|
|
68 |
return result_list
|
69 |
raise Exception(res["message"])
|
70 |
|
71 |
-
def delete_sessions(self,ids):
|
72 |
res = self.rm(f"/chats/{self.id}/sessions", {"ids": ids})
|
73 |
res = res.json()
|
74 |
if res.get("code") != 0:
|
|
|
68 |
return result_list
|
69 |
raise Exception(res["message"])
|
70 |
|
71 |
+
def delete_sessions(self,ids:List[str]=None):
|
72 |
res = self.rm(f"/chats/{self.id}/sessions", {"ids": ids})
|
73 |
res = res.json()
|
74 |
if res.get("code") != 0:
|
sdk/python/ragflow/ragflow.py
CHANGED
@@ -64,7 +64,7 @@ class RAGFlow:
|
|
64 |
return DataSet(self, res["data"])
|
65 |
raise Exception(res["message"])
|
66 |
|
67 |
-
def delete_datasets(self, ids: List[str]):
|
68 |
res = self.delete("/datasets",{"ids": ids})
|
69 |
res=res.json()
|
70 |
if res.get("code") != 0:
|
@@ -135,9 +135,9 @@ class RAGFlow:
|
|
135 |
return Chat(self, res["data"])
|
136 |
raise Exception(res["message"])
|
137 |
|
138 |
-
def delete_chats(self,ids: List[str] = None
|
139 |
res = self.delete('/chats',
|
140 |
-
{"ids":ids
|
141 |
res = res.json()
|
142 |
if res.get("code") != 0:
|
143 |
raise Exception(res["message"])
|
|
|
64 |
return DataSet(self, res["data"])
|
65 |
raise Exception(res["message"])
|
66 |
|
67 |
+
def delete_datasets(self, ids: List[str] = None):
|
68 |
res = self.delete("/datasets",{"ids": ids})
|
69 |
res=res.json()
|
70 |
if res.get("code") != 0:
|
|
|
135 |
return Chat(self, res["data"])
|
136 |
raise Exception(res["message"])
|
137 |
|
138 |
+
def delete_chats(self,ids: List[str] = None) -> bool:
|
139 |
res = self.delete('/chats',
|
140 |
+
{"ids":ids})
|
141 |
res = res.json()
|
142 |
if res.get("code") != 0:
|
143 |
raise Exception(res["message"])
|