Kevin Hu commited on
Commit
1275b47
·
1 Parent(s): 8069189

Fix out of boundary. (#3786)

Browse files

### What problem does this PR solve?

#3769
### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

api/apps/sdk/session.py CHANGED
@@ -35,7 +35,7 @@ from api.db.services.llm_service import LLMBundle
35
 
36
  @manager.route('/chats/<chat_id>/sessions', methods=['POST'])
37
  @token_required
38
- def create(tenant_id,chat_id):
39
  req = request.json
40
  req["dialog_id"] = chat_id
41
  dia = DialogService.query(tenant_id=tenant_id, id=req["dialog_id"], status=StatusEnum.VALID.value)
@@ -77,7 +77,7 @@ def create_agent_session(tenant_id, agent_id):
77
  conv = {
78
  "id": get_uuid(),
79
  "dialog_id": cvs.id,
80
- "user_id": req.get("usr_id","") if isinstance(req, dict) else "",
81
  "message": [{"role": "assistant", "content": canvas.get_prologue()}],
82
  "source": "agent"
83
  }
@@ -88,11 +88,11 @@ def create_agent_session(tenant_id, agent_id):
88
 
89
  @manager.route('/chats/<chat_id>/sessions/<session_id>', methods=['PUT'])
90
  @token_required
91
- def update(tenant_id,chat_id,session_id):
92
  req = request.json
93
  req["dialog_id"] = chat_id
94
  conv_id = session_id
95
- conv = ConversationService.query(id=conv_id,dialog_id=chat_id)
96
  if not conv:
97
  return get_error_data_result(message="Session does not exist")
98
  if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
@@ -123,12 +123,12 @@ def completion(tenant_id, chat_id):
123
  return get_error_data_result(message="`name` can not be empty.")
124
  ConversationService.save(**conv)
125
  e, conv = ConversationService.get_by_id(conv["id"])
126
- session_id=conv.id
127
  else:
128
  session_id = req.get("session_id")
129
  if not req.get("question"):
130
  return get_error_data_result(message="Please input your question.")
131
- conv = ConversationService.query(id=session_id,dialog_id=chat_id)
132
  if not conv:
133
  return get_error_data_result(message="Session does not exist")
134
  conv = conv[0]
@@ -182,18 +182,18 @@ def completion(tenant_id, chat_id):
182
  chunk_list.append(new_chunk)
183
  reference["chunks"] = chunk_list
184
  ans["id"] = message_id
185
- ans["session_id"]=session_id
186
 
187
  def stream():
188
  nonlocal dia, msg, req, conv
189
  try:
190
  for ans in chat(dia, msg, **req):
191
  fillin_conv(ans)
192
- yield "data:" + json.dumps({"code": 0, "data": ans}, ensure_ascii=False) + "\n\n"
193
  ConversationService.update_by_id(conv.id, conv.to_dict())
194
  except Exception as e:
195
  yield "data:" + json.dumps({"code": 500, "message": str(e),
196
- "data": {"answer": "**ERROR**: " + str(e),"reference": []}},
197
  ensure_ascii=False) + "\n\n"
198
  yield "data:" + json.dumps({"code": 0, "data": True}, ensure_ascii=False) + "\n\n"
199
 
@@ -235,7 +235,7 @@ def agent_completion(tenant_id, agent_id):
235
  conv = {
236
  "id": session_id,
237
  "dialog_id": cvs.id,
238
- "user_id": req.get("user_id",""),
239
  "message": [{"role": "assistant", "content": canvas.get_prologue()}],
240
  "source": "agent"
241
  }
@@ -251,9 +251,9 @@ def agent_completion(tenant_id, agent_id):
251
  question = req.get("question")
252
  if not question:
253
  return get_error_data_result("`question` is required.")
254
- question={
255
- "role":"user",
256
- "content":question,
257
  "id": str(uuid4())
258
  }
259
  messages.append(question)
@@ -375,7 +375,7 @@ def agent_completion(tenant_id, agent_id):
375
 
376
  @manager.route('/chats/<chat_id>/sessions', methods=['GET'])
377
  @token_required
378
- def list_session(chat_id,tenant_id):
379
  if not DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value):
380
  return get_error_data_result(message=f"You don't own the assistant {chat_id}.")
381
  id = request.args.get("id")
@@ -387,7 +387,7 @@ def list_session(chat_id,tenant_id):
387
  desc = False
388
  else:
389
  desc = True
390
- convs = ConversationService.get_list(chat_id,page_number,items_per_page,orderby,desc,id,name)
391
  if not convs:
392
  return get_result(data=[])
393
  for conv in convs:
@@ -429,7 +429,7 @@ def list_session(chat_id,tenant_id):
429
 
430
  @manager.route('/chats/<chat_id>/sessions', methods=["DELETE"])
431
  @token_required
432
- def delete(tenant_id,chat_id):
433
  if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
434
  return get_error_data_result(message="You don't own the chat")
435
  req = request.json
@@ -437,21 +437,22 @@ def delete(tenant_id,chat_id):
437
  if not req:
438
  ids = None
439
  else:
440
- ids=req.get("ids")
441
 
442
  if not ids:
443
  conv_list = []
444
  for conv in convs:
445
  conv_list.append(conv.id)
446
  else:
447
- conv_list=ids
448
  for id in conv_list:
449
- conv = ConversationService.query(id=id,dialog_id=chat_id)
450
  if not conv:
451
  return get_error_data_result(message="The chat doesn't own the session")
452
  ConversationService.delete_by_id(id)
453
  return get_result()
454
 
 
455
  @manager.route('/sessions/ask', methods=['POST'])
456
  @token_required
457
  def ask_about(tenant_id):
@@ -460,17 +461,18 @@ def ask_about(tenant_id):
460
  return get_error_data_result("`question` is required.")
461
  if not req.get("dataset_ids"):
462
  return get_error_data_result("`dataset_ids` is required.")
463
- if not isinstance(req.get("dataset_ids"),list):
464
  return get_error_data_result("`dataset_ids` should be a list.")
465
- req["kb_ids"]=req.pop("dataset_ids")
466
  for kb_id in req["kb_ids"]:
467
- if not KnowledgebaseService.accessible(kb_id,tenant_id):
468
  return get_error_data_result(f"You don't own the dataset {kb_id}.")
469
  kbs = KnowledgebaseService.query(id=kb_id)
470
  kb = kbs[0]
471
  if kb.chunk_num == 0:
472
  return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
473
  uid = tenant_id
 
474
  def stream():
475
  nonlocal req, uid
476
  try:
 
35
 
36
  @manager.route('/chats/<chat_id>/sessions', methods=['POST'])
37
  @token_required
38
+ def create(tenant_id, chat_id):
39
  req = request.json
40
  req["dialog_id"] = chat_id
41
  dia = DialogService.query(tenant_id=tenant_id, id=req["dialog_id"], status=StatusEnum.VALID.value)
 
77
  conv = {
78
  "id": get_uuid(),
79
  "dialog_id": cvs.id,
80
+ "user_id": req.get("usr_id", "") if isinstance(req, dict) else "",
81
  "message": [{"role": "assistant", "content": canvas.get_prologue()}],
82
  "source": "agent"
83
  }
 
88
 
89
  @manager.route('/chats/<chat_id>/sessions/<session_id>', methods=['PUT'])
90
  @token_required
91
+ def update(tenant_id, chat_id, session_id):
92
  req = request.json
93
  req["dialog_id"] = chat_id
94
  conv_id = session_id
95
+ conv = ConversationService.query(id=conv_id, dialog_id=chat_id)
96
  if not conv:
97
  return get_error_data_result(message="Session does not exist")
98
  if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
 
123
  return get_error_data_result(message="`name` can not be empty.")
124
  ConversationService.save(**conv)
125
  e, conv = ConversationService.get_by_id(conv["id"])
126
+ session_id = conv.id
127
  else:
128
  session_id = req.get("session_id")
129
  if not req.get("question"):
130
  return get_error_data_result(message="Please input your question.")
131
+ conv = ConversationService.query(id=session_id, dialog_id=chat_id)
132
  if not conv:
133
  return get_error_data_result(message="Session does not exist")
134
  conv = conv[0]
 
182
  chunk_list.append(new_chunk)
183
  reference["chunks"] = chunk_list
184
  ans["id"] = message_id
185
+ ans["session_id"] = session_id
186
 
187
  def stream():
188
  nonlocal dia, msg, req, conv
189
  try:
190
  for ans in chat(dia, msg, **req):
191
  fillin_conv(ans)
192
+ yield "data:" + json.dumps({"code": 0, "data": ans}, ensure_ascii=False) + "\n\n"
193
  ConversationService.update_by_id(conv.id, conv.to_dict())
194
  except Exception as e:
195
  yield "data:" + json.dumps({"code": 500, "message": str(e),
196
+ "data": {"answer": "**ERROR**: " + str(e), "reference": []}},
197
  ensure_ascii=False) + "\n\n"
198
  yield "data:" + json.dumps({"code": 0, "data": True}, ensure_ascii=False) + "\n\n"
199
 
 
235
  conv = {
236
  "id": session_id,
237
  "dialog_id": cvs.id,
238
+ "user_id": req.get("user_id", ""),
239
  "message": [{"role": "assistant", "content": canvas.get_prologue()}],
240
  "source": "agent"
241
  }
 
251
  question = req.get("question")
252
  if not question:
253
  return get_error_data_result("`question` is required.")
254
+ question = {
255
+ "role": "user",
256
+ "content": question,
257
  "id": str(uuid4())
258
  }
259
  messages.append(question)
 
375
 
376
  @manager.route('/chats/<chat_id>/sessions', methods=['GET'])
377
  @token_required
378
+ def list_session(chat_id, tenant_id):
379
  if not DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value):
380
  return get_error_data_result(message=f"You don't own the assistant {chat_id}.")
381
  id = request.args.get("id")
 
387
  desc = False
388
  else:
389
  desc = True
390
+ convs = ConversationService.get_list(chat_id, page_number, items_per_page, orderby, desc, id, name)
391
  if not convs:
392
  return get_result(data=[])
393
  for conv in convs:
 
429
 
430
  @manager.route('/chats/<chat_id>/sessions', methods=["DELETE"])
431
  @token_required
432
+ def delete(tenant_id, chat_id):
433
  if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
434
  return get_error_data_result(message="You don't own the chat")
435
  req = request.json
 
437
  if not req:
438
  ids = None
439
  else:
440
+ ids = req.get("ids")
441
 
442
  if not ids:
443
  conv_list = []
444
  for conv in convs:
445
  conv_list.append(conv.id)
446
  else:
447
+ conv_list = ids
448
  for id in conv_list:
449
+ conv = ConversationService.query(id=id, dialog_id=chat_id)
450
  if not conv:
451
  return get_error_data_result(message="The chat doesn't own the session")
452
  ConversationService.delete_by_id(id)
453
  return get_result()
454
 
455
+
456
  @manager.route('/sessions/ask', methods=['POST'])
457
  @token_required
458
  def ask_about(tenant_id):
 
461
  return get_error_data_result("`question` is required.")
462
  if not req.get("dataset_ids"):
463
  return get_error_data_result("`dataset_ids` is required.")
464
+ if not isinstance(req.get("dataset_ids"), list):
465
  return get_error_data_result("`dataset_ids` should be a list.")
466
+ req["kb_ids"] = req.pop("dataset_ids")
467
  for kb_id in req["kb_ids"]:
468
+ if not KnowledgebaseService.accessible(kb_id, tenant_id):
469
  return get_error_data_result(f"You don't own the dataset {kb_id}.")
470
  kbs = KnowledgebaseService.query(id=kb_id)
471
  kb = kbs[0]
472
  if kb.chunk_num == 0:
473
  return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
474
  uid = tenant_id
475
+
476
  def stream():
477
  nonlocal req, uid
478
  try:
deepdoc/parser/pdf_parser.py CHANGED
@@ -152,7 +152,7 @@ class RAGFlowPdfParser:
152
  max(len(up["text"]), len(down["text"])),
153
  len(tks_all) - len(tks_up) - len(tks_down),
154
  len(tks_down) - len(tks_up),
155
- tks_down[-1] == tks_up[-1],
156
  max(down["in_row"], up["in_row"]),
157
  abs(down["in_row"] - up["in_row"]),
158
  len(tks_down) == 1 and rag_tokenizer.tag(tks_down[0]).find("n") >= 0,
 
152
  max(len(up["text"]), len(down["text"])),
153
  len(tks_all) - len(tks_up) - len(tks_down),
154
  len(tks_down) - len(tks_up),
155
+ tks_down[-1] == tks_up[-1] if tks_down and tks_up else False,
156
  max(down["in_row"], up["in_row"]),
157
  abs(down["in_row"] - up["in_row"]),
158
  len(tks_down) == 1 and rag_tokenizer.tag(tks_down[0]).find("n") >= 0,