Kevin Hu
commited on
Commit
·
1275b47
1
Parent(s):
8069189
Fix out of boundary. (#3786)
Browse files### What problem does this PR solve?
#3769
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
- api/apps/sdk/session.py +24 -22
- deepdoc/parser/pdf_parser.py +1 -1
api/apps/sdk/session.py
CHANGED
@@ -35,7 +35,7 @@ from api.db.services.llm_service import LLMBundle
|
|
35 |
|
36 |
@manager.route('/chats/<chat_id>/sessions', methods=['POST'])
|
37 |
@token_required
|
38 |
-
def create(tenant_id,chat_id):
|
39 |
req = request.json
|
40 |
req["dialog_id"] = chat_id
|
41 |
dia = DialogService.query(tenant_id=tenant_id, id=req["dialog_id"], status=StatusEnum.VALID.value)
|
@@ -77,7 +77,7 @@ def create_agent_session(tenant_id, agent_id):
|
|
77 |
conv = {
|
78 |
"id": get_uuid(),
|
79 |
"dialog_id": cvs.id,
|
80 |
-
"user_id": req.get("usr_id","") if isinstance(req, dict) else "",
|
81 |
"message": [{"role": "assistant", "content": canvas.get_prologue()}],
|
82 |
"source": "agent"
|
83 |
}
|
@@ -88,11 +88,11 @@ def create_agent_session(tenant_id, agent_id):
|
|
88 |
|
89 |
@manager.route('/chats/<chat_id>/sessions/<session_id>', methods=['PUT'])
|
90 |
@token_required
|
91 |
-
def update(tenant_id,chat_id,session_id):
|
92 |
req = request.json
|
93 |
req["dialog_id"] = chat_id
|
94 |
conv_id = session_id
|
95 |
-
conv = ConversationService.query(id=conv_id,dialog_id=chat_id)
|
96 |
if not conv:
|
97 |
return get_error_data_result(message="Session does not exist")
|
98 |
if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
@@ -123,12 +123,12 @@ def completion(tenant_id, chat_id):
|
|
123 |
return get_error_data_result(message="`name` can not be empty.")
|
124 |
ConversationService.save(**conv)
|
125 |
e, conv = ConversationService.get_by_id(conv["id"])
|
126 |
-
session_id=conv.id
|
127 |
else:
|
128 |
session_id = req.get("session_id")
|
129 |
if not req.get("question"):
|
130 |
return get_error_data_result(message="Please input your question.")
|
131 |
-
conv = ConversationService.query(id=session_id,dialog_id=chat_id)
|
132 |
if not conv:
|
133 |
return get_error_data_result(message="Session does not exist")
|
134 |
conv = conv[0]
|
@@ -182,18 +182,18 @@ def completion(tenant_id, chat_id):
|
|
182 |
chunk_list.append(new_chunk)
|
183 |
reference["chunks"] = chunk_list
|
184 |
ans["id"] = message_id
|
185 |
-
ans["session_id"]=session_id
|
186 |
|
187 |
def stream():
|
188 |
nonlocal dia, msg, req, conv
|
189 |
try:
|
190 |
for ans in chat(dia, msg, **req):
|
191 |
fillin_conv(ans)
|
192 |
-
yield "data:" + json.dumps({"code": 0,
|
193 |
ConversationService.update_by_id(conv.id, conv.to_dict())
|
194 |
except Exception as e:
|
195 |
yield "data:" + json.dumps({"code": 500, "message": str(e),
|
196 |
-
"data": {"answer": "**ERROR**: " + str(e),"reference": []}},
|
197 |
ensure_ascii=False) + "\n\n"
|
198 |
yield "data:" + json.dumps({"code": 0, "data": True}, ensure_ascii=False) + "\n\n"
|
199 |
|
@@ -235,7 +235,7 @@ def agent_completion(tenant_id, agent_id):
|
|
235 |
conv = {
|
236 |
"id": session_id,
|
237 |
"dialog_id": cvs.id,
|
238 |
-
"user_id": req.get("user_id",""),
|
239 |
"message": [{"role": "assistant", "content": canvas.get_prologue()}],
|
240 |
"source": "agent"
|
241 |
}
|
@@ -251,9 +251,9 @@ def agent_completion(tenant_id, agent_id):
|
|
251 |
question = req.get("question")
|
252 |
if not question:
|
253 |
return get_error_data_result("`question` is required.")
|
254 |
-
question={
|
255 |
-
"role":"user",
|
256 |
-
"content":question,
|
257 |
"id": str(uuid4())
|
258 |
}
|
259 |
messages.append(question)
|
@@ -375,7 +375,7 @@ def agent_completion(tenant_id, agent_id):
|
|
375 |
|
376 |
@manager.route('/chats/<chat_id>/sessions', methods=['GET'])
|
377 |
@token_required
|
378 |
-
def list_session(chat_id,tenant_id):
|
379 |
if not DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value):
|
380 |
return get_error_data_result(message=f"You don't own the assistant {chat_id}.")
|
381 |
id = request.args.get("id")
|
@@ -387,7 +387,7 @@ def list_session(chat_id,tenant_id):
|
|
387 |
desc = False
|
388 |
else:
|
389 |
desc = True
|
390 |
-
convs = ConversationService.get_list(chat_id,page_number,items_per_page,orderby,desc,id,name)
|
391 |
if not convs:
|
392 |
return get_result(data=[])
|
393 |
for conv in convs:
|
@@ -429,7 +429,7 @@ def list_session(chat_id,tenant_id):
|
|
429 |
|
430 |
@manager.route('/chats/<chat_id>/sessions', methods=["DELETE"])
|
431 |
@token_required
|
432 |
-
def delete(tenant_id,chat_id):
|
433 |
if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
434 |
return get_error_data_result(message="You don't own the chat")
|
435 |
req = request.json
|
@@ -437,21 +437,22 @@ def delete(tenant_id,chat_id):
|
|
437 |
if not req:
|
438 |
ids = None
|
439 |
else:
|
440 |
-
ids=req.get("ids")
|
441 |
|
442 |
if not ids:
|
443 |
conv_list = []
|
444 |
for conv in convs:
|
445 |
conv_list.append(conv.id)
|
446 |
else:
|
447 |
-
conv_list=ids
|
448 |
for id in conv_list:
|
449 |
-
conv = ConversationService.query(id=id,dialog_id=chat_id)
|
450 |
if not conv:
|
451 |
return get_error_data_result(message="The chat doesn't own the session")
|
452 |
ConversationService.delete_by_id(id)
|
453 |
return get_result()
|
454 |
|
|
|
455 |
@manager.route('/sessions/ask', methods=['POST'])
|
456 |
@token_required
|
457 |
def ask_about(tenant_id):
|
@@ -460,17 +461,18 @@ def ask_about(tenant_id):
|
|
460 |
return get_error_data_result("`question` is required.")
|
461 |
if not req.get("dataset_ids"):
|
462 |
return get_error_data_result("`dataset_ids` is required.")
|
463 |
-
if not isinstance(req.get("dataset_ids"),list):
|
464 |
return get_error_data_result("`dataset_ids` should be a list.")
|
465 |
-
req["kb_ids"]=req.pop("dataset_ids")
|
466 |
for kb_id in req["kb_ids"]:
|
467 |
-
if not KnowledgebaseService.accessible(kb_id,tenant_id):
|
468 |
return get_error_data_result(f"You don't own the dataset {kb_id}.")
|
469 |
kbs = KnowledgebaseService.query(id=kb_id)
|
470 |
kb = kbs[0]
|
471 |
if kb.chunk_num == 0:
|
472 |
return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
|
473 |
uid = tenant_id
|
|
|
474 |
def stream():
|
475 |
nonlocal req, uid
|
476 |
try:
|
|
|
35 |
|
36 |
@manager.route('/chats/<chat_id>/sessions', methods=['POST'])
|
37 |
@token_required
|
38 |
+
def create(tenant_id, chat_id):
|
39 |
req = request.json
|
40 |
req["dialog_id"] = chat_id
|
41 |
dia = DialogService.query(tenant_id=tenant_id, id=req["dialog_id"], status=StatusEnum.VALID.value)
|
|
|
77 |
conv = {
|
78 |
"id": get_uuid(),
|
79 |
"dialog_id": cvs.id,
|
80 |
+
"user_id": req.get("usr_id", "") if isinstance(req, dict) else "",
|
81 |
"message": [{"role": "assistant", "content": canvas.get_prologue()}],
|
82 |
"source": "agent"
|
83 |
}
|
|
|
88 |
|
89 |
@manager.route('/chats/<chat_id>/sessions/<session_id>', methods=['PUT'])
|
90 |
@token_required
|
91 |
+
def update(tenant_id, chat_id, session_id):
|
92 |
req = request.json
|
93 |
req["dialog_id"] = chat_id
|
94 |
conv_id = session_id
|
95 |
+
conv = ConversationService.query(id=conv_id, dialog_id=chat_id)
|
96 |
if not conv:
|
97 |
return get_error_data_result(message="Session does not exist")
|
98 |
if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
|
|
123 |
return get_error_data_result(message="`name` can not be empty.")
|
124 |
ConversationService.save(**conv)
|
125 |
e, conv = ConversationService.get_by_id(conv["id"])
|
126 |
+
session_id = conv.id
|
127 |
else:
|
128 |
session_id = req.get("session_id")
|
129 |
if not req.get("question"):
|
130 |
return get_error_data_result(message="Please input your question.")
|
131 |
+
conv = ConversationService.query(id=session_id, dialog_id=chat_id)
|
132 |
if not conv:
|
133 |
return get_error_data_result(message="Session does not exist")
|
134 |
conv = conv[0]
|
|
|
182 |
chunk_list.append(new_chunk)
|
183 |
reference["chunks"] = chunk_list
|
184 |
ans["id"] = message_id
|
185 |
+
ans["session_id"] = session_id
|
186 |
|
187 |
def stream():
|
188 |
nonlocal dia, msg, req, conv
|
189 |
try:
|
190 |
for ans in chat(dia, msg, **req):
|
191 |
fillin_conv(ans)
|
192 |
+
yield "data:" + json.dumps({"code": 0, "data": ans}, ensure_ascii=False) + "\n\n"
|
193 |
ConversationService.update_by_id(conv.id, conv.to_dict())
|
194 |
except Exception as e:
|
195 |
yield "data:" + json.dumps({"code": 500, "message": str(e),
|
196 |
+
"data": {"answer": "**ERROR**: " + str(e), "reference": []}},
|
197 |
ensure_ascii=False) + "\n\n"
|
198 |
yield "data:" + json.dumps({"code": 0, "data": True}, ensure_ascii=False) + "\n\n"
|
199 |
|
|
|
235 |
conv = {
|
236 |
"id": session_id,
|
237 |
"dialog_id": cvs.id,
|
238 |
+
"user_id": req.get("user_id", ""),
|
239 |
"message": [{"role": "assistant", "content": canvas.get_prologue()}],
|
240 |
"source": "agent"
|
241 |
}
|
|
|
251 |
question = req.get("question")
|
252 |
if not question:
|
253 |
return get_error_data_result("`question` is required.")
|
254 |
+
question = {
|
255 |
+
"role": "user",
|
256 |
+
"content": question,
|
257 |
"id": str(uuid4())
|
258 |
}
|
259 |
messages.append(question)
|
|
|
375 |
|
376 |
@manager.route('/chats/<chat_id>/sessions', methods=['GET'])
|
377 |
@token_required
|
378 |
+
def list_session(chat_id, tenant_id):
|
379 |
if not DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value):
|
380 |
return get_error_data_result(message=f"You don't own the assistant {chat_id}.")
|
381 |
id = request.args.get("id")
|
|
|
387 |
desc = False
|
388 |
else:
|
389 |
desc = True
|
390 |
+
convs = ConversationService.get_list(chat_id, page_number, items_per_page, orderby, desc, id, name)
|
391 |
if not convs:
|
392 |
return get_result(data=[])
|
393 |
for conv in convs:
|
|
|
429 |
|
430 |
@manager.route('/chats/<chat_id>/sessions', methods=["DELETE"])
|
431 |
@token_required
|
432 |
+
def delete(tenant_id, chat_id):
|
433 |
if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
434 |
return get_error_data_result(message="You don't own the chat")
|
435 |
req = request.json
|
|
|
437 |
if not req:
|
438 |
ids = None
|
439 |
else:
|
440 |
+
ids = req.get("ids")
|
441 |
|
442 |
if not ids:
|
443 |
conv_list = []
|
444 |
for conv in convs:
|
445 |
conv_list.append(conv.id)
|
446 |
else:
|
447 |
+
conv_list = ids
|
448 |
for id in conv_list:
|
449 |
+
conv = ConversationService.query(id=id, dialog_id=chat_id)
|
450 |
if not conv:
|
451 |
return get_error_data_result(message="The chat doesn't own the session")
|
452 |
ConversationService.delete_by_id(id)
|
453 |
return get_result()
|
454 |
|
455 |
+
|
456 |
@manager.route('/sessions/ask', methods=['POST'])
|
457 |
@token_required
|
458 |
def ask_about(tenant_id):
|
|
|
461 |
return get_error_data_result("`question` is required.")
|
462 |
if not req.get("dataset_ids"):
|
463 |
return get_error_data_result("`dataset_ids` is required.")
|
464 |
+
if not isinstance(req.get("dataset_ids"), list):
|
465 |
return get_error_data_result("`dataset_ids` should be a list.")
|
466 |
+
req["kb_ids"] = req.pop("dataset_ids")
|
467 |
for kb_id in req["kb_ids"]:
|
468 |
+
if not KnowledgebaseService.accessible(kb_id, tenant_id):
|
469 |
return get_error_data_result(f"You don't own the dataset {kb_id}.")
|
470 |
kbs = KnowledgebaseService.query(id=kb_id)
|
471 |
kb = kbs[0]
|
472 |
if kb.chunk_num == 0:
|
473 |
return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
|
474 |
uid = tenant_id
|
475 |
+
|
476 |
def stream():
|
477 |
nonlocal req, uid
|
478 |
try:
|
deepdoc/parser/pdf_parser.py
CHANGED
@@ -152,7 +152,7 @@ class RAGFlowPdfParser:
|
|
152 |
max(len(up["text"]), len(down["text"])),
|
153 |
len(tks_all) - len(tks_up) - len(tks_down),
|
154 |
len(tks_down) - len(tks_up),
|
155 |
-
tks_down[-1] == tks_up[-1],
|
156 |
max(down["in_row"], up["in_row"]),
|
157 |
abs(down["in_row"] - up["in_row"]),
|
158 |
len(tks_down) == 1 and rag_tokenizer.tag(tks_down[0]).find("n") >= 0,
|
|
|
152 |
max(len(up["text"]), len(down["text"])),
|
153 |
len(tks_all) - len(tks_up) - len(tks_down),
|
154 |
len(tks_down) - len(tks_up),
|
155 |
+
tks_down[-1] == tks_up[-1] if tks_down and tks_up else False,
|
156 |
max(down["in_row"], up["in_row"]),
|
157 |
abs(down["in_row"] - up["in_row"]),
|
158 |
len(tks_down) == 1 and rag_tokenizer.tag(tks_down[0]).find("n") >= 0,
|