liuhua liuhua commited on
Commit
533089d
·
1 Parent(s): 4b50c07

Fix bugs in API (#3103)

Browse files

### What problem does this PR solve?

Fix bugs in API


- [x] Bug Fix (non-breaking change which fixes an issue)

Co-authored-by: liuhua <[email protected]>

api/apps/sdk/chat.py CHANGED
@@ -14,7 +14,7 @@
14
  # limitations under the License.
15
  #
16
  from flask import request
17
-
18
  from api.db import StatusEnum
19
  from api.db.services.dialog_service import DialogService
20
  from api.db.services.knowledgebase_service import KnowledgebaseService
@@ -40,6 +40,10 @@ def create(tenant_id):
40
  kb=kbs[0]
41
  if kb.chunk_num == 0:
42
  return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
 
 
 
 
43
  req["kb_ids"] = ids
44
  # llm
45
  llm = req.get("llm")
@@ -149,6 +153,8 @@ def update(tenant_id,chat_id):
149
  return get_error_data_result(retmsg='You do not own the chat')
150
  req =request.json
151
  ids = req.get("dataset_ids")
 
 
152
  if "dataset_ids" in req:
153
  if not ids:
154
  return get_error_data_result("`datasets` can't be empty")
@@ -160,6 +166,12 @@ def update(tenant_id,chat_id):
160
  kb = kbs[0]
161
  if kb.chunk_num == 0:
162
  return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
 
 
 
 
 
 
163
  req["kb_ids"] = ids
164
  llm = req.get("llm")
165
  if llm:
@@ -225,10 +237,18 @@ def update(tenant_id,chat_id):
225
  @token_required
226
  def delete(tenant_id):
227
  req = request.json
228
- ids = req.get("ids")
 
 
 
229
  if not ids:
230
- return get_error_data_result(retmsg="`ids` are required")
231
- for id in ids:
 
 
 
 
 
232
  if not DialogService.query(tenant_id=tenant_id, id=id, status=StatusEnum.VALID.value):
233
  return get_error_data_result(retmsg=f"You don't own the chat {id}")
234
  temp_dict = {"status": StatusEnum.INVALID.value}
@@ -260,7 +280,8 @@ def list_chat(tenant_id):
260
  "quote": "show_quote",
261
  "system": "prompt",
262
  "rerank_id": "rerank_model",
263
- "vector_similarity_weight": "keywords_similarity_weight"}
 
264
  key_list = ["similarity_threshold", "vector_similarity_weight", "top_n", "rerank_id"]
265
  for res in chats:
266
  for key, value in res["prompt_config"].items():
 
14
  # limitations under the License.
15
  #
16
  from flask import request
17
+ from api.settings import RetCode
18
  from api.db import StatusEnum
19
  from api.db.services.dialog_service import DialogService
20
  from api.db.services.knowledgebase_service import KnowledgebaseService
 
40
  kb=kbs[0]
41
  if kb.chunk_num == 0:
42
  return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
43
+ kbs = KnowledgebaseService.get_by_ids(ids)
44
+ embd_count = list(set(kb.embd_id for kb in kbs))
45
+ if embd_count != 1:
46
+ return get_result(retmsg='Datasets use different embedding models."',retcode=RetCode.AUTHENTICATION_ERROR)
47
  req["kb_ids"] = ids
48
  # llm
49
  llm = req.get("llm")
 
153
  return get_error_data_result(retmsg='You do not own the chat')
154
  req =request.json
155
  ids = req.get("dataset_ids")
156
+ if "show_quotation" in req:
157
+ req["do_refer"]=req.pop("show_quotation")
158
  if "dataset_ids" in req:
159
  if not ids:
160
  return get_error_data_result("`datasets` can't be empty")
 
166
  kb = kbs[0]
167
  if kb.chunk_num == 0:
168
  return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
169
+ kbs = KnowledgebaseService.get_by_ids(ids)
170
+ embd_count=list(set(kb.embd_id for kb in kbs))
171
+ if embd_count != 1 :
172
+ return get_result(
173
+ retmsg='Datasets use different embedding models."',
174
+ retcode=RetCode.AUTHENTICATION_ERROR)
175
  req["kb_ids"] = ids
176
  llm = req.get("llm")
177
  if llm:
 
237
  @token_required
238
  def delete(tenant_id):
239
  req = request.json
240
+ if not req:
241
+ ids=None
242
+ else:
243
+ ids=req.get("ids")
244
  if not ids:
245
+ id_list = []
246
+ dias=DialogService.query(tenant_id=tenant_id,status=StatusEnum.VALID.value)
247
+ for dia in dias:
248
+ id_list.append(dia.id)
249
+ else:
250
+ id_list=ids
251
+ for id in id_list:
252
  if not DialogService.query(tenant_id=tenant_id, id=id, status=StatusEnum.VALID.value):
253
  return get_error_data_result(retmsg=f"You don't own the chat {id}")
254
  temp_dict = {"status": StatusEnum.INVALID.value}
 
280
  "quote": "show_quote",
281
  "system": "prompt",
282
  "rerank_id": "rerank_model",
283
+ "vector_similarity_weight": "keywords_similarity_weight",
284
+ "do_refer":"show_quotation"}
285
  key_list = ["similarity_threshold", "vector_similarity_weight", "top_n", "rerank_id"]
286
  for res in chats:
287
  for key, value in res["prompt_config"].items():
api/apps/sdk/dataset.py CHANGED
@@ -21,7 +21,7 @@ from api.db.services.document_service import DocumentService
21
  from api.db.services.file2document_service import File2DocumentService
22
  from api.db.services.file_service import FileService
23
  from api.db.services.knowledgebase_service import KnowledgebaseService
24
- from api.db.services.llm_service import TenantLLMService
25
  from api.db.services.user_service import TenantService
26
  from api.settings import RetCode
27
  from api.utils import get_uuid
@@ -68,9 +68,12 @@ def create(tenant_id):
68
  "BAAI/bge-small-zh-v1.5","jinaai/jina-embeddings-v2-base-en","jinaai/jina-embeddings-v2-small-en",
69
  "nomic-ai/nomic-embed-text-v1.5","sentence-transformers/all-MiniLM-L6-v2","text-embedding-v2",
70
  "text-embedding-v3","maidalun1020/bce-embedding-base_v1"]
71
- if not TenantLLMService.query(tenant_id=tenant_id,model_type="embedding", llm_name=req.get("embedding_model"))\
72
- and req.get("embedding_model") not in valid_embedding_models:
73
  return get_error_data_result(f"`embedding_model` {req.get('embedding_model')} doesn't exist")
 
 
 
74
  key_mapping = {
75
  "chunk_num": "chunk_count",
76
  "doc_num": "document_count",
@@ -92,25 +95,32 @@ def create(tenant_id):
92
  @token_required
93
  def delete(tenant_id):
94
  req = request.json
95
- ids = req.get("ids")
 
 
 
96
  if not ids:
97
- return get_error_data_result(
98
- retmsg="ids are required")
99
- for id in ids:
 
 
 
 
100
  kbs = KnowledgebaseService.query(id=id, tenant_id=tenant_id)
101
  if not kbs:
102
  return get_error_data_result(retmsg=f"You don't own the dataset {id}")
103
- for doc in DocumentService.query(kb_id=id):
104
- if not DocumentService.remove_document(doc, tenant_id):
 
 
 
 
 
 
105
  return get_error_data_result(
106
- retmsg="Remove document error.(Database error)")
107
- f2d = File2DocumentService.get_by_document_id(doc.id)
108
- FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
109
- File2DocumentService.delete_by_document_id(doc.id)
110
- if not KnowledgebaseService.delete_by_id(id):
111
- return get_error_data_result(
112
- retmsg="Delete dataset error.(Database error)")
113
- return get_result(retcode=RetCode.SUCCESS)
114
 
115
  @manager.route('/datasets/<dataset_id>', methods=['PUT'])
116
  @token_required
@@ -139,8 +149,9 @@ def update(tenant_id,dataset_id):
139
  retmsg="Can't change `tenant_id`.")
140
  e, kb = KnowledgebaseService.get_by_id(dataset_id)
141
  if "parser_config" in req:
142
- print(kb.parser_config,flush=True)
143
- req["parser_config"]=kb.parser_config.update(req["parser_config"])
 
144
  if "chunk_count" in req:
145
  if req["chunk_count"] != kb.chunk_num:
146
  return get_error_data_result(
@@ -157,7 +168,8 @@ def update(tenant_id,dataset_id):
157
  retmsg="If `chunk_count` is not 0, `chunk_method` is not changeable.")
158
  req['parser_id'] = req.pop('chunk_method')
159
  if req['parser_id'] != kb.parser_id:
160
- req["parser_config"] = get_parser_config(chunk_method, parser_config)
 
161
  if "embedding_model" in req:
162
  if kb.chunk_num != 0 and req['embedding_model'] != kb.embd_id:
163
  return get_error_data_result(
@@ -168,9 +180,12 @@ def update(tenant_id,dataset_id):
168
  "BAAI/bge-small-zh-v1.5","jinaai/jina-embeddings-v2-base-en","jinaai/jina-embeddings-v2-small-en",
169
  "nomic-ai/nomic-embed-text-v1.5","sentence-transformers/all-MiniLM-L6-v2","text-embedding-v2",
170
  "text-embedding-v3","maidalun1020/bce-embedding-base_v1"]
171
- if not TenantLLMService.query(tenant_id=tenant_id,model_type="embedding", llm_name=req.get("embedding_model"))\
172
- and req.get("embedding_model") not in valid_embedding_models:
173
  return get_error_data_result(f"`embedding_model` {req.get('embedding_model')} doesn't exist")
 
 
 
174
  req['embd_id'] = req.pop('embedding_model')
175
  if "name" in req:
176
  req["name"] = req["name"].strip()
 
21
  from api.db.services.file2document_service import File2DocumentService
22
  from api.db.services.file_service import FileService
23
  from api.db.services.knowledgebase_service import KnowledgebaseService
24
+ from api.db.services.llm_service import TenantLLMService,LLMService
25
  from api.db.services.user_service import TenantService
26
  from api.settings import RetCode
27
  from api.utils import get_uuid
 
68
  "BAAI/bge-small-zh-v1.5","jinaai/jina-embeddings-v2-base-en","jinaai/jina-embeddings-v2-small-en",
69
  "nomic-ai/nomic-embed-text-v1.5","sentence-transformers/all-MiniLM-L6-v2","text-embedding-v2",
70
  "text-embedding-v3","maidalun1020/bce-embedding-base_v1"]
71
+ embd_model=LLMService.query(llm_name=req["embedding_model"],model_type="embedding")
72
+ if not embd_model:
73
  return get_error_data_result(f"`embedding_model` {req.get('embedding_model')} doesn't exist")
74
+ if embd_model:
75
+ if req["embedding_model"] not in valid_embedding_models and not TenantLLMService.query(tenant_id=tenant_id,model_type="embedding", llm_name=req.get("embedding_model")):
76
+ return get_error_data_result(f"`embedding_model` {req.get('embedding_model')} doesn't exist")
77
  key_mapping = {
78
  "chunk_num": "chunk_count",
79
  "doc_num": "document_count",
 
95
  @token_required
96
  def delete(tenant_id):
97
  req = request.json
98
+ if not req:
99
+ ids=None
100
+ else:
101
+ ids=req.get("ids")
102
  if not ids:
103
+ id_list = []
104
+ kbs=KnowledgebaseService.query(tenant_id=tenant_id)
105
+ for kb in kbs:
106
+ id_list.append(kb.id)
107
+ else:
108
+ id_list=ids
109
+ for id in id_list:
110
  kbs = KnowledgebaseService.query(id=id, tenant_id=tenant_id)
111
  if not kbs:
112
  return get_error_data_result(retmsg=f"You don't own the dataset {id}")
113
+ for doc in DocumentService.query(kb_id=id):
114
+ if not DocumentService.remove_document(doc, tenant_id):
115
+ return get_error_data_result(
116
+ retmsg="Remove document error.(Database error)")
117
+ f2d = File2DocumentService.get_by_document_id(doc.id)
118
+ FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
119
+ File2DocumentService.delete_by_document_id(doc.id)
120
+ if not KnowledgebaseService.delete_by_id(id):
121
  return get_error_data_result(
122
+ retmsg="Delete dataset error.(Database error)")
123
+ return get_result(retcode=RetCode.SUCCESS)
 
 
 
 
 
 
124
 
125
  @manager.route('/datasets/<dataset_id>', methods=['PUT'])
126
  @token_required
 
149
  retmsg="Can't change `tenant_id`.")
150
  e, kb = KnowledgebaseService.get_by_id(dataset_id)
151
  if "parser_config" in req:
152
+ temp_dict=kb.parser_config
153
+ temp_dict.update(req["parser_config"])
154
+ req["parser_config"] = temp_dict
155
  if "chunk_count" in req:
156
  if req["chunk_count"] != kb.chunk_num:
157
  return get_error_data_result(
 
168
  retmsg="If `chunk_count` is not 0, `chunk_method` is not changeable.")
169
  req['parser_id'] = req.pop('chunk_method')
170
  if req['parser_id'] != kb.parser_id:
171
+ if not req.get("parser_config"):
172
+ req["parser_config"] = get_parser_config(chunk_method, parser_config)
173
  if "embedding_model" in req:
174
  if kb.chunk_num != 0 and req['embedding_model'] != kb.embd_id:
175
  return get_error_data_result(
 
180
  "BAAI/bge-small-zh-v1.5","jinaai/jina-embeddings-v2-base-en","jinaai/jina-embeddings-v2-small-en",
181
  "nomic-ai/nomic-embed-text-v1.5","sentence-transformers/all-MiniLM-L6-v2","text-embedding-v2",
182
  "text-embedding-v3","maidalun1020/bce-embedding-base_v1"]
183
+ embd_model=LLMService.query(llm_name=req["embedding_model"],model_type="embedding")
184
+ if not embd_model:
185
  return get_error_data_result(f"`embedding_model` {req.get('embedding_model')} doesn't exist")
186
+ if embd_model:
187
+ if req["embedding_model"] not in valid_embedding_models and not TenantLLMService.query(tenant_id=tenant_id,model_type="embedding", llm_name=req.get("embedding_model")):
188
+ return get_error_data_result(f"`embedding_model` {req.get('embedding_model')} doesn't exist")
189
  req['embd_id'] = req.pop('embedding_model')
190
  if "name" in req:
191
  req["name"] = req["name"].strip()
api/apps/sdk/doc.py CHANGED
@@ -46,6 +46,9 @@ from rag.utils.es_conn import ELASTICSEARCH
46
  from rag.utils.storage_factory import STORAGE_IMPL
47
  import os
48
 
 
 
 
49
 
50
  @manager.route('/datasets/<dataset_id>/documents', methods=['POST'])
51
  @token_required
@@ -58,11 +61,21 @@ def upload(dataset_id, tenant_id):
58
  if file_obj.filename == '':
59
  return get_result(
60
  retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
61
-
 
 
 
 
 
 
 
 
 
 
62
  e, kb = KnowledgebaseService.get_by_id(dataset_id)
63
  if not e:
64
  raise LookupError(f"Can't find the dataset with ID {dataset_id}!")
65
- err, files = FileService.upload_document(kb, file_objs, tenant_id)
66
  if err:
67
  return get_result(
68
  retmsg="\n".join(err), retcode=RetCode.SERVER_ERROR)
@@ -140,6 +153,7 @@ def update_doc(tenant_id, dataset_id, document_id):
140
  if not e:
141
  return get_error_data_result(retmsg="Document not found!")
142
  req["parser_config"] = get_parser_config(req["chunk_method"], req.get("parser_config"))
 
143
  if doc.token_num > 0:
144
  e = DocumentService.increment_chunk_num(doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1,
145
  doc.process_duation * -1)
@@ -210,10 +224,10 @@ def list_docs(dataset_id, tenant_id):
210
  }
211
  renamed_doc = {}
212
  for key, value in doc.items():
213
- if key =="run":
214
- renamed_doc["run"]=run_mapping.get(str(value))
215
  new_key = key_mapping.get(key, key)
216
  renamed_doc[new_key] = value
 
 
217
  renamed_doc_list.append(renamed_doc)
218
  return get_result(data={"total": tol, "docs": renamed_doc_list})
219
 
@@ -280,14 +294,11 @@ def parse(tenant_id,dataset_id):
280
  doc = DocumentService.query(id=id,kb_id=dataset_id)
281
  if not doc:
282
  return get_error_data_result(retmsg=f"You don't own the document {id}.")
283
- if doc[0].progress != 0.0:
284
- return get_error_data_result("Can't stop parsing document with progress at 0 or 100")
285
  info = {"run": "1", "progress": 0}
286
  info["progress_msg"] = ""
287
  info["chunk_num"] = 0
288
  info["token_num"] = 0
289
  DocumentService.update_by_id(id, info)
290
- # if str(req["run"]) == TaskStatus.CANCEL.value:
291
  ELASTICSEARCH.deleteByQuery(
292
  Q("match", doc_id=id), idxnm=search.index_name(tenant_id))
293
  TaskService.filter_delete([Task.doc_id == id])
@@ -312,10 +323,8 @@ def stop_parsing(tenant_id,dataset_id):
312
  return get_error_data_result(retmsg=f"You don't own the document {id}.")
313
  if doc[0].progress == 100.0 or doc[0].progress == 0.0:
314
  return get_error_data_result("Can't stop parsing document with progress at 0 or 100")
315
- info = {"run": "2", "progress": 0}
316
  DocumentService.update_by_id(id, info)
317
- # if str(req["run"]) == TaskStatus.CANCEL.value:
318
- tenant_id = DocumentService.get_tenant_id(id)
319
  ELASTICSEARCH.deleteByQuery(
320
  Q("match", doc_id=id), idxnm=search.index_name(tenant_id))
321
  return get_result()
@@ -355,10 +364,10 @@ def list_chunks(tenant_id,dataset_id,document_id):
355
  doc=doc.to_dict()
356
  renamed_doc = {}
357
  for key, value in doc.items():
358
- if key == "run":
359
- renamed_doc["run"] = run_mapping.get(str(value))
360
  new_key = key_mapping.get(key, key)
361
  renamed_doc[new_key] = value
 
 
362
  res = {"total": sres.total, "chunks": [], "doc": renamed_doc}
363
  origin_chunks = []
364
  sign = 0
@@ -398,12 +407,17 @@ def list_chunks(tenant_id,dataset_id,document_id):
398
  "content_with_weight": "content",
399
  "doc_id": "document_id",
400
  "important_kwd": "important_keywords",
401
- "img_id": "image_id"
 
402
  }
403
  renamed_chunk = {}
404
  for key, value in chunk.items():
405
  new_key = key_mapping.get(key, key)
406
  renamed_chunk[new_key] = value
 
 
 
 
407
  res["chunks"].append(renamed_chunk)
408
  return get_result(data=res)
409
 
@@ -441,7 +455,7 @@ def add_chunk(tenant_id,dataset_id,document_id):
441
  embd_id = DocumentService.get_embd_id(document_id)
442
  embd_mdl = TenantLLMService.model_instance(
443
  tenant_id, LLMType.EMBEDDING.value, embd_id)
444
-
445
  v, c = embd_mdl.encode([doc.name, req["content"]])
446
  v = 0.1 * v[0] + 0.9 * v[1]
447
  d["q_%d_vec" % len(v)] = v.tolist()
@@ -459,7 +473,7 @@ def add_chunk(tenant_id,dataset_id,document_id):
459
  "kb_id": "dataset_id",
460
  "create_timestamp_flt": "create_timestamp",
461
  "create_time": "create_time",
462
- "document_keyword": "document",
463
  }
464
  renamed_chunk = {}
465
  for key, value in d.items():
@@ -480,12 +494,18 @@ def rm_chunk(tenant_id,dataset_id,document_id):
480
  return get_error_data_result(retmsg=f"You don't own the document {document_id}.")
481
  doc = doc[0]
482
  req = request.json
483
- if not req.get("chunk_ids"):
484
- return get_error_data_result("`chunk_ids` is required")
485
  query = {
486
  "doc_ids": [doc.id], "page": 1, "size": 1024, "question": "", "sort": True}
487
  sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True)
488
- for chunk_id in req.get("chunk_ids"):
 
 
 
 
 
 
 
 
489
  if chunk_id not in sres.ids:
490
  return get_error_data_result(f"Chunk {chunk_id} not found")
491
  if not ELASTICSEARCH.deleteByQuery(
 
46
  from rag.utils.storage_factory import STORAGE_IMPL
47
  import os
48
 
49
+ MAXIMUM_OF_UPLOADING_FILES = 256
50
+
51
+
52
 
53
  @manager.route('/datasets/<dataset_id>/documents', methods=['POST'])
54
  @token_required
 
61
  if file_obj.filename == '':
62
  return get_result(
63
  retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
64
+ # total size
65
+ total_size = 0
66
+ for file_obj in file_objs:
67
+ file_obj.seek(0, os.SEEK_END)
68
+ total_size += file_obj.tell()
69
+ file_obj.seek(0)
70
+ MAX_TOTAL_FILE_SIZE=10*1024*1024
71
+ if total_size > MAX_TOTAL_FILE_SIZE:
72
+ return get_result(
73
+ retmsg=f'Total file size exceeds 10MB limit! ({total_size / (1024 * 1024):.2f} MB)',
74
+ retcode=RetCode.ARGUMENT_ERROR)
75
  e, kb = KnowledgebaseService.get_by_id(dataset_id)
76
  if not e:
77
  raise LookupError(f"Can't find the dataset with ID {dataset_id}!")
78
+ err, files= FileService.upload_document(kb, file_objs, tenant_id)
79
  if err:
80
  return get_result(
81
  retmsg="\n".join(err), retcode=RetCode.SERVER_ERROR)
 
153
  if not e:
154
  return get_error_data_result(retmsg="Document not found!")
155
  req["parser_config"] = get_parser_config(req["chunk_method"], req.get("parser_config"))
156
+ DocumentService.update_parser_config(doc.id, req["parser_config"])
157
  if doc.token_num > 0:
158
  e = DocumentService.increment_chunk_num(doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1,
159
  doc.process_duation * -1)
 
224
  }
225
  renamed_doc = {}
226
  for key, value in doc.items():
 
 
227
  new_key = key_mapping.get(key, key)
228
  renamed_doc[new_key] = value
229
+ if key =="run":
230
+ renamed_doc["run"]=run_mapping.get(value)
231
  renamed_doc_list.append(renamed_doc)
232
  return get_result(data={"total": tol, "docs": renamed_doc_list})
233
 
 
294
  doc = DocumentService.query(id=id,kb_id=dataset_id)
295
  if not doc:
296
  return get_error_data_result(retmsg=f"You don't own the document {id}.")
 
 
297
  info = {"run": "1", "progress": 0}
298
  info["progress_msg"] = ""
299
  info["chunk_num"] = 0
300
  info["token_num"] = 0
301
  DocumentService.update_by_id(id, info)
 
302
  ELASTICSEARCH.deleteByQuery(
303
  Q("match", doc_id=id), idxnm=search.index_name(tenant_id))
304
  TaskService.filter_delete([Task.doc_id == id])
 
323
  return get_error_data_result(retmsg=f"You don't own the document {id}.")
324
  if doc[0].progress == 100.0 or doc[0].progress == 0.0:
325
  return get_error_data_result("Can't stop parsing document with progress at 0 or 100")
326
+ info = {"run": "2", "progress": 0,"chunk_num":0}
327
  DocumentService.update_by_id(id, info)
 
 
328
  ELASTICSEARCH.deleteByQuery(
329
  Q("match", doc_id=id), idxnm=search.index_name(tenant_id))
330
  return get_result()
 
364
  doc=doc.to_dict()
365
  renamed_doc = {}
366
  for key, value in doc.items():
 
 
367
  new_key = key_mapping.get(key, key)
368
  renamed_doc[new_key] = value
369
+ if key == "run":
370
+ renamed_doc["run"] = run_mapping.get(str(value))
371
  res = {"total": sres.total, "chunks": [], "doc": renamed_doc}
372
  origin_chunks = []
373
  sign = 0
 
407
  "content_with_weight": "content",
408
  "doc_id": "document_id",
409
  "important_kwd": "important_keywords",
410
+ "img_id": "image_id",
411
+ "available_int":"available"
412
  }
413
  renamed_chunk = {}
414
  for key, value in chunk.items():
415
  new_key = key_mapping.get(key, key)
416
  renamed_chunk[new_key] = value
417
+ if renamed_chunk["available"] == "0":
418
+ renamed_chunk["available"] = False
419
+ if renamed_chunk["available"] == "1":
420
+ renamed_chunk["available"] = True
421
  res["chunks"].append(renamed_chunk)
422
  return get_result(data=res)
423
 
 
455
  embd_id = DocumentService.get_embd_id(document_id)
456
  embd_mdl = TenantLLMService.model_instance(
457
  tenant_id, LLMType.EMBEDDING.value, embd_id)
458
+ print(embd_mdl,flush=True)
459
  v, c = embd_mdl.encode([doc.name, req["content"]])
460
  v = 0.1 * v[0] + 0.9 * v[1]
461
  d["q_%d_vec" % len(v)] = v.tolist()
 
473
  "kb_id": "dataset_id",
474
  "create_timestamp_flt": "create_timestamp",
475
  "create_time": "create_time",
476
+ "document_keyword": "document"
477
  }
478
  renamed_chunk = {}
479
  for key, value in d.items():
 
494
  return get_error_data_result(retmsg=f"You don't own the document {document_id}.")
495
  doc = doc[0]
496
  req = request.json
 
 
497
  query = {
498
  "doc_ids": [doc.id], "page": 1, "size": 1024, "question": "", "sort": True}
499
  sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True)
500
+ if not req:
501
+ chunk_ids=None
502
+ else:
503
+ chunk_ids=req.get("chunk_ids")
504
+ if not chunk_ids:
505
+ chunk_list=sres.ids
506
+ else:
507
+ chunk_list=chunk_ids
508
+ for chunk_id in chunk_list:
509
  if chunk_id not in sres.ids:
510
  return get_error_data_result(f"Chunk {chunk_id} not found")
511
  if not ELASTICSEARCH.deleteByQuery(
api/apps/sdk/session.py CHANGED
@@ -100,7 +100,7 @@ def completion(tenant_id,chat_id):
100
  return get_error_data_result(retmsg="Session does not exist")
101
  conv = conv[0]
102
  if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
103
- return get_error_data_result(retmsg="You do not own the session")
104
  msg = []
105
  question = {
106
  "content": req.get("question"),
@@ -168,9 +168,6 @@ def list(chat_id,tenant_id):
168
  return get_error_data_result(retmsg=f"You don't own the assistant {chat_id}.")
169
  id = request.args.get("id")
170
  name = request.args.get("name")
171
- session = ConversationService.query(id=id,name=name,dialog_id=chat_id)
172
- if not session:
173
- return get_error_data_result(retmsg="The session doesn't exist")
174
  page_number = int(request.args.get("page", 1))
175
  items_per_page = int(request.args.get("page_size", 1024))
176
  orderby = request.args.get("orderby", "create_time")
@@ -183,6 +180,10 @@ def list(chat_id,tenant_id):
183
  return get_result(data=[])
184
  for conv in convs:
185
  conv['messages'] = conv.pop("message")
 
 
 
 
186
  conv["chat"] = conv.pop("dialog_id")
187
  if conv["reference"]:
188
  messages = conv["messages"]
@@ -218,10 +219,20 @@ def list(chat_id,tenant_id):
218
  def delete(tenant_id,chat_id):
219
  if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
220
  return get_error_data_result(retmsg="You don't own the chat")
221
- ids = request.json.get("ids")
 
 
 
 
 
 
222
  if not ids:
223
- return get_error_data_result(retmsg="`ids` is required in deleting operation")
224
- for id in ids:
 
 
 
 
225
  conv = ConversationService.query(id=id,dialog_id=chat_id)
226
  if not conv:
227
  return get_error_data_result(retmsg="The chat doesn't own the session")
 
100
  return get_error_data_result(retmsg="Session does not exist")
101
  conv = conv[0]
102
  if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
103
+ return get_error_data_result(retmsg="You do not own the chat")
104
  msg = []
105
  question = {
106
  "content": req.get("question"),
 
168
  return get_error_data_result(retmsg=f"You don't own the assistant {chat_id}.")
169
  id = request.args.get("id")
170
  name = request.args.get("name")
 
 
 
171
  page_number = int(request.args.get("page", 1))
172
  items_per_page = int(request.args.get("page_size", 1024))
173
  orderby = request.args.get("orderby", "create_time")
 
180
  return get_result(data=[])
181
  for conv in convs:
182
  conv['messages'] = conv.pop("message")
183
+ infos = conv["messages"]
184
+ for info in infos:
185
+ if "prompt" in info:
186
+ info.pop("prompt")
187
  conv["chat"] = conv.pop("dialog_id")
188
  if conv["reference"]:
189
  messages = conv["messages"]
 
219
  def delete(tenant_id,chat_id):
220
  if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
221
  return get_error_data_result(retmsg="You don't own the chat")
222
+ req = request.json
223
+ convs = ConversationService.query(dialog_id=chat_id)
224
+ if not req:
225
+ ids = None
226
+ else:
227
+ ids=req.get("ids")
228
+
229
  if not ids:
230
+ conv_list = []
231
+ for conv in convs:
232
+ conv_list.append(conv.id)
233
+ else:
234
+ conv_list=ids
235
+ for id in conv_list:
236
  conv = ConversationService.query(id=id,dialog_id=chat_id)
237
  if not conv:
238
  return get_error_data_result(retmsg="The chat doesn't own the session")
api/utils/api_utils.py CHANGED
@@ -344,7 +344,7 @@ def get_parser_config(chunk_method,parser_config):
344
  return parser_config
345
  if not chunk_method:
346
  chunk_method = "naive"
347
- key_mapping={"naive":{"chunk_token_num": 128, "delimiter": "\\n!?;。;!?", "html4excel": False,"layout_recognize": True, "raptor": {"user_raptor": False}},
348
  "qa":{"raptor":{"use_raptor":False}},
349
  "resume":None,
350
  "manual":{"raptor":{"use_raptor":False}},
 
344
  return parser_config
345
  if not chunk_method:
346
  chunk_method = "naive"
347
+ key_mapping={"naive":{"chunk_token_num": 128, "delimiter": "\\n!?;。;!?", "html4excel": False,"layout_recognize": True, "raptor": {"use_raptor": False}},
348
  "qa":{"raptor":{"use_raptor":False}},
349
  "resume":None,
350
  "manual":{"raptor":{"use_raptor":False}},
sdk/python/ragflow/modules/chat.py CHANGED
@@ -68,7 +68,7 @@ class Chat(Base):
68
  return result_list
69
  raise Exception(res["message"])
70
 
71
- def delete_sessions(self,ids):
72
  res = self.rm(f"/chats/{self.id}/sessions", {"ids": ids})
73
  res = res.json()
74
  if res.get("code") != 0:
 
68
  return result_list
69
  raise Exception(res["message"])
70
 
71
+ def delete_sessions(self,ids:List[str]=None):
72
  res = self.rm(f"/chats/{self.id}/sessions", {"ids": ids})
73
  res = res.json()
74
  if res.get("code") != 0:
sdk/python/ragflow/ragflow.py CHANGED
@@ -64,7 +64,7 @@ class RAGFlow:
64
  return DataSet(self, res["data"])
65
  raise Exception(res["message"])
66
 
67
- def delete_datasets(self, ids: List[str]):
68
  res = self.delete("/datasets",{"ids": ids})
69
  res=res.json()
70
  if res.get("code") != 0:
@@ -135,9 +135,9 @@ class RAGFlow:
135
  return Chat(self, res["data"])
136
  raise Exception(res["message"])
137
 
138
- def delete_chats(self,ids: List[str] = None,names: List[str] = None ) -> bool:
139
  res = self.delete('/chats',
140
- {"ids":ids, "names":names})
141
  res = res.json()
142
  if res.get("code") != 0:
143
  raise Exception(res["message"])
 
64
  return DataSet(self, res["data"])
65
  raise Exception(res["message"])
66
 
67
+ def delete_datasets(self, ids: List[str] = None):
68
  res = self.delete("/datasets",{"ids": ids})
69
  res=res.json()
70
  if res.get("code") != 0:
 
135
  return Chat(self, res["data"])
136
  raise Exception(res["message"])
137
 
138
+ def delete_chats(self,ids: List[str] = None) -> bool:
139
  res = self.delete('/chats',
140
+ {"ids":ids})
141
  res = res.json()
142
  if res.get("code") != 0:
143
  raise Exception(res["message"])