cecilia-uu commited on
Commit
ee39a70
·
1 Parent(s): acf53dd

API: create update_doc method (#1341)

Browse files

### What problem does this PR solve?

Adds the API method of updating documents.


### Type of change

- [x] New Feature (non-breaking change which adds functionality)

api/apps/dataset_api.py CHANGED
@@ -14,6 +14,7 @@
14
  # limitations under the License.
15
 
16
  import os
 
17
  import re
18
  import warnings
19
 
@@ -42,12 +43,12 @@ MAXIMUM_OF_UPLOADING_FILES = 256
42
 
43
  # ------------------------------ create a dataset ---------------------------------------
44
 
45
- @manager.route('/', methods=['POST'])
46
  @login_required # use login
47
  @validate_request("name") # check name key
48
  def create_dataset():
49
  # Check if Authorization header is present
50
- authorization_token = request.headers.get('Authorization')
51
  if not authorization_token:
52
  return construct_json_result(code=RetCode.AUTHENTICATION_ERROR, message="Authorization header is missing.")
53
 
@@ -79,14 +80,15 @@ def create_dataset():
79
  # In case that the length of the name exceeds the limit
80
  dataset_name_length = len(dataset_name)
81
  if dataset_name_length > NAME_LENGTH_LIMIT:
82
- return construct_json_result(code=RetCode.DATA_ERROR,
83
- message=f"Dataset name: {dataset_name} with length {dataset_name_length} exceeds {NAME_LENGTH_LIMIT}!")
 
84
 
85
  # In case that there are other fields in the data-binary
86
  if len(request_body.keys()) > 1:
87
  name_list = []
88
  for key_name in request_body.keys():
89
- if key_name != 'name':
90
  name_list.append(key_name)
91
  return construct_json_result(code=RetCode.DATA_ERROR,
92
  message=f"fields: {name_list}, are not allowed in request body.")
@@ -115,7 +117,7 @@ def create_dataset():
115
 
116
  # -----------------------------list datasets-------------------------------------------------------
117
 
118
- @manager.route('/', methods=['GET'])
119
  @login_required
120
  def list_datasets():
121
  offset = request.args.get("offset", 0)
@@ -134,7 +136,7 @@ def list_datasets():
134
 
135
  # ---------------------------------delete a dataset ----------------------------
136
 
137
- @manager.route('/<dataset_id>', methods=['DELETE'])
138
  @login_required
139
  def remove_dataset(dataset_id):
140
  try:
@@ -142,7 +144,7 @@ def remove_dataset(dataset_id):
142
 
143
  # according to the id, searching for the dataset
144
  if not datasets:
145
- return construct_json_result(message=f'The dataset cannot be found for your current account.',
146
  code=RetCode.OPERATING_ERROR)
147
 
148
  # Iterating the documents inside the dataset
@@ -168,7 +170,7 @@ def remove_dataset(dataset_id):
168
 
169
  # ------------------------------ get details of a dataset ----------------------------------------
170
 
171
- @manager.route('/<dataset_id>', methods=['GET'])
172
  @login_required
173
  def get_dataset(dataset_id):
174
  try:
@@ -181,7 +183,7 @@ def get_dataset(dataset_id):
181
 
182
  # ------------------------------ update a dataset --------------------------------------------
183
 
184
- @manager.route('/<dataset_id>', methods=['PUT'])
185
  @login_required
186
  def update_dataset(dataset_id):
187
  req = request.json
@@ -192,7 +194,7 @@ def update_dataset(dataset_id):
192
  "you want to update!")
193
  # check whether the dataset can be found
194
  if not KnowledgebaseService.query(created_by=current_user.id, id=dataset_id):
195
- return construct_json_result(message=f'Only the owner of knowledgebase is authorized for this operation!',
196
  code=RetCode.OPERATING_ERROR)
197
 
198
  exist, dataset = KnowledgebaseService.get_by_id(dataset_id)
@@ -200,7 +202,7 @@ def update_dataset(dataset_id):
200
  if not exist:
201
  return construct_json_result(code=RetCode.DATA_ERROR, message="This dataset cannot be found!")
202
 
203
- if 'name' in req:
204
  name = req["name"].strip()
205
  # check whether there is duplicate name
206
  if name.lower() != dataset.name.lower() \
@@ -215,9 +217,9 @@ def update_dataset(dataset_id):
215
 
216
  # 2 parameters: embedding id and chunk method
217
  # only if chunk_num is 0, the user can update the embedding id
218
- if req.get('embedding_model_id'):
219
  if chunk_num == 0:
220
- dataset_updating_data['embd_id'] = req['embedding_model_id']
221
  else:
222
  construct_json_result(code=RetCode.DATA_ERROR, message="You have already parsed the document in this "
223
  "dataset, so you cannot change the embedding "
@@ -232,18 +234,18 @@ def update_dataset(dataset_id):
232
  "change the chunk method.")
233
  # convert the photo parameter to avatar
234
  if req.get("photo"):
235
- dataset_updating_data['avatar'] = req["photo"]
236
 
237
  # layout_recognize
238
- if 'layout_recognize' in req:
239
- if 'parser_config' not in dataset_updating_data:
240
  dataset_updating_data['parser_config'] = {}
241
  dataset_updating_data['parser_config']['layout_recognize'] = req['layout_recognize']
242
 
243
  # TODO: updating use_raptor needs to construct a class
244
 
245
  # 6 parameters
246
- for key in ['name', 'language', 'description', 'permission', 'id', 'token_num']:
247
  if key in req:
248
  dataset_updating_data[key] = req.get(key)
249
 
@@ -265,16 +267,16 @@ def update_dataset(dataset_id):
265
  # --------------------------------content management ----------------------------------------------
266
 
267
  # ----------------------------upload files-----------------------------------------------------
268
- @manager.route('/<dataset_id>/documents/', methods=['POST'])
269
  @login_required
270
  def upload_documents(dataset_id):
271
  # no files
272
  if not request.files:
273
  return construct_json_result(
274
- message='There is no file!', code=RetCode.ARGUMENT_ERROR)
275
 
276
  # the number of uploading files exceeds the limit
277
- file_objs = request.files.getlist('file')
278
  num_file_objs = len(file_objs)
279
 
280
  if num_file_objs > MAXIMUM_OF_UPLOADING_FILES:
@@ -288,7 +290,7 @@ def upload_documents(dataset_id):
288
  # no name
289
  if not file_name:
290
  return construct_json_result(
291
- message='There is a file without name!', code=RetCode.ARGUMENT_ERROR)
292
 
293
  # TODO: support the remote files
294
  if 'http' in file_name:
@@ -316,7 +318,7 @@ def upload_documents(dataset_id):
316
 
317
  # grab all the errs
318
  err = []
319
- MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))
320
  uploaded_docs_json = []
321
  for file in file_objs:
322
  try:
@@ -373,7 +375,7 @@ def upload_documents(dataset_id):
373
 
374
 
375
  # ----------------------------delete a file-----------------------------------------------------
376
- @manager.route('/<dataset_id>/documents/<document_id>', methods=['DELETE'])
377
  @login_required
378
  def delete_document(document_id, dataset_id): # string
379
  # get the root folder
@@ -433,7 +435,7 @@ def delete_document(document_id, dataset_id): # string
433
  def list_documents(dataset_id):
434
  if not dataset_id:
435
  return construct_json_result(
436
- data=False, message='Lack of "dataset_id"', code=RetCode.ARGUMENT_ERROR)
437
 
438
  # searching keywords
439
  keywords = request.args.get("keywords", "")
@@ -450,9 +452,109 @@ def list_documents(dataset_id):
450
  except Exception as e:
451
  return construct_error_response(e)
452
 
453
- # ----------------------------download a file-----------------------------------------------------
 
 
 
 
 
 
 
 
 
454
 
455
- # ----------------------------enable rename-----------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
456
 
457
  # ----------------------------start parsing-----------------------------------------------------
458
 
 
14
  # limitations under the License.
15
 
16
  import os
17
+ import pathlib
18
  import re
19
  import warnings
20
 
 
43
 
44
  # ------------------------------ create a dataset ---------------------------------------
45
 
46
+ @manager.route("/", methods=["POST"])
47
  @login_required # use login
48
  @validate_request("name") # check name key
49
  def create_dataset():
50
  # Check if Authorization header is present
51
+ authorization_token = request.headers.get("Authorization")
52
  if not authorization_token:
53
  return construct_json_result(code=RetCode.AUTHENTICATION_ERROR, message="Authorization header is missing.")
54
 
 
80
  # In case that the length of the name exceeds the limit
81
  dataset_name_length = len(dataset_name)
82
  if dataset_name_length > NAME_LENGTH_LIMIT:
83
+ return construct_json_result(
84
+ code=RetCode.DATA_ERROR,
85
+ message=f"Dataset name: {dataset_name} with length {dataset_name_length} exceeds {NAME_LENGTH_LIMIT}!")
86
 
87
  # In case that there are other fields in the data-binary
88
  if len(request_body.keys()) > 1:
89
  name_list = []
90
  for key_name in request_body.keys():
91
+ if key_name != "name":
92
  name_list.append(key_name)
93
  return construct_json_result(code=RetCode.DATA_ERROR,
94
  message=f"fields: {name_list}, are not allowed in request body.")
 
117
 
118
  # -----------------------------list datasets-------------------------------------------------------
119
 
120
+ @manager.route("/", methods=["GET"])
121
  @login_required
122
  def list_datasets():
123
  offset = request.args.get("offset", 0)
 
136
 
137
  # ---------------------------------delete a dataset ----------------------------
138
 
139
+ @manager.route("/<dataset_id>", methods=["DELETE"])
140
  @login_required
141
  def remove_dataset(dataset_id):
142
  try:
 
144
 
145
  # according to the id, searching for the dataset
146
  if not datasets:
147
+ return construct_json_result(message=f"The dataset cannot be found for your current account.",
148
  code=RetCode.OPERATING_ERROR)
149
 
150
  # Iterating the documents inside the dataset
 
170
 
171
  # ------------------------------ get details of a dataset ----------------------------------------
172
 
173
+ @manager.route("/<dataset_id>", methods=["GET"])
174
  @login_required
175
  def get_dataset(dataset_id):
176
  try:
 
183
 
184
  # ------------------------------ update a dataset --------------------------------------------
185
 
186
+ @manager.route("/<dataset_id>", methods=["PUT"])
187
  @login_required
188
  def update_dataset(dataset_id):
189
  req = request.json
 
194
  "you want to update!")
195
  # check whether the dataset can be found
196
  if not KnowledgebaseService.query(created_by=current_user.id, id=dataset_id):
197
+ return construct_json_result(message=f"Only the owner of knowledgebase is authorized for this operation!",
198
  code=RetCode.OPERATING_ERROR)
199
 
200
  exist, dataset = KnowledgebaseService.get_by_id(dataset_id)
 
202
  if not exist:
203
  return construct_json_result(code=RetCode.DATA_ERROR, message="This dataset cannot be found!")
204
 
205
+ if "name" in req:
206
  name = req["name"].strip()
207
  # check whether there is duplicate name
208
  if name.lower() != dataset.name.lower() \
 
217
 
218
  # 2 parameters: embedding id and chunk method
219
  # only if chunk_num is 0, the user can update the embedding id
220
+ if req.get("embedding_model_id"):
221
  if chunk_num == 0:
222
+ dataset_updating_data["embd_id"] = req["embedding_model_id"]
223
  else:
224
  construct_json_result(code=RetCode.DATA_ERROR, message="You have already parsed the document in this "
225
  "dataset, so you cannot change the embedding "
 
234
  "change the chunk method.")
235
  # convert the photo parameter to avatar
236
  if req.get("photo"):
237
+ dataset_updating_data["avatar"] = req["photo"]
238
 
239
  # layout_recognize
240
+ if "layout_recognize" in req:
241
+ if "parser_config" not in dataset_updating_data:
242
  dataset_updating_data['parser_config'] = {}
243
  dataset_updating_data['parser_config']['layout_recognize'] = req['layout_recognize']
244
 
245
  # TODO: updating use_raptor needs to construct a class
246
 
247
  # 6 parameters
248
+ for key in ["name", "language", "description", "permission", "id", "token_num"]:
249
  if key in req:
250
  dataset_updating_data[key] = req.get(key)
251
 
 
267
  # --------------------------------content management ----------------------------------------------
268
 
269
  # ----------------------------upload files-----------------------------------------------------
270
+ @manager.route("/<dataset_id>/documents/", methods=["POST"])
271
  @login_required
272
  def upload_documents(dataset_id):
273
  # no files
274
  if not request.files:
275
  return construct_json_result(
276
+ message="There is no file!", code=RetCode.ARGUMENT_ERROR)
277
 
278
  # the number of uploading files exceeds the limit
279
+ file_objs = request.files.getlist("file")
280
  num_file_objs = len(file_objs)
281
 
282
  if num_file_objs > MAXIMUM_OF_UPLOADING_FILES:
 
290
  # no name
291
  if not file_name:
292
  return construct_json_result(
293
+ message="There is a file without name!", code=RetCode.ARGUMENT_ERROR)
294
 
295
  # TODO: support the remote files
296
  if 'http' in file_name:
 
318
 
319
  # grab all the errs
320
  err = []
321
+ MAX_FILE_NUM_PER_USER = int(os.environ.get("MAX_FILE_NUM_PER_USER", 0))
322
  uploaded_docs_json = []
323
  for file in file_objs:
324
  try:
 
375
 
376
 
377
  # ----------------------------delete a file-----------------------------------------------------
378
+ @manager.route("/<dataset_id>/documents/<document_id>", methods=["DELETE"])
379
  @login_required
380
  def delete_document(document_id, dataset_id): # string
381
  # get the root folder
 
435
  def list_documents(dataset_id):
436
  if not dataset_id:
437
  return construct_json_result(
438
+ data=False, message="Lack of 'dataset_id'", code=RetCode.ARGUMENT_ERROR)
439
 
440
  # searching keywords
441
  keywords = request.args.get("keywords", "")
 
452
  except Exception as e:
453
  return construct_error_response(e)
454
 
455
+ # ----------------------------update: enable rename-----------------------------------------------------
456
+ @manager.route("/<dataset_id>/documents/<document_id>", methods=["PUT"])
457
+ @login_required
458
+ def update_document(dataset_id, document_id):
459
+ req = request.json
460
+ try:
461
+ legal_parameters = set()
462
+ legal_parameters.add("name")
463
+ legal_parameters.add("enable")
464
+ legal_parameters.add("template_type")
465
 
466
+ for key in req.keys():
467
+ if key not in legal_parameters:
468
+ return construct_json_result(code=RetCode.ARGUMENT_ERROR, message=f"{key} is an illegal parameter.")
469
+
470
+ # The request body cannot be empty
471
+ if not req:
472
+ return construct_json_result(
473
+ code=RetCode.DATA_ERROR,
474
+ message="Please input at least one parameter that you want to update!")
475
+
476
+ # Check whether there is this dataset
477
+ exist, dataset = KnowledgebaseService.get_by_id(dataset_id)
478
+ if not exist:
479
+ return construct_json_result(code=RetCode.DATA_ERROR, message=f"This dataset {dataset_id} cannot be found!")
480
+
481
+ # The document does not exist
482
+ exist, document = DocumentService.get_by_id(document_id)
483
+ if not exist:
484
+ return construct_json_result(message=f"This document {document_id} cannot be found!",
485
+ code=RetCode.ARGUMENT_ERROR)
486
+
487
+ # Deal with the different keys
488
+ updating_data = {}
489
+ if "name" in req:
490
+ new_name = req["name"]
491
+ updating_data["name"] = new_name
492
+ # Check whether the new_name is suitable
493
+ # 1. no name value
494
+ if not new_name:
495
+ return construct_json_result(code=RetCode.DATA_ERROR, message="There is no new name.")
496
+
497
+ # 2. In case that there's space in the head or the tail
498
+ new_name = new_name.strip()
499
+
500
+ # 3. Check whether the new_name has the same extension of file as before
501
+ if pathlib.Path(new_name.lower()).suffix != pathlib.Path(
502
+ document.name.lower()).suffix:
503
+ return construct_json_result(
504
+ data=False,
505
+ message="The extension of file cannot be changed",
506
+ code=RetCode.ARGUMENT_ERROR)
507
+
508
+ # 4. Check whether the new name has already been occupied by other file
509
+ for d in DocumentService.query(name=new_name, kb_id=document.kb_id):
510
+ if d.name == new_name:
511
+ return construct_json_result(
512
+ message="Duplicated document name in the same dataset.",
513
+ code=RetCode.ARGUMENT_ERROR)
514
+
515
+ if "enable" in req:
516
+ enable_value = req["enable"]
517
+ if is_illegal_value_for_enum(enable_value, StatusEnum):
518
+ return construct_json_result(message=f"Illegal value {enable_value} for 'enable' field.",
519
+ code=RetCode.DATA_ERROR)
520
+ updating_data["status"] = enable_value
521
+
522
+ # TODO: Chunk-method - update parameters inside the json object parser_config
523
+ if "template_type" in req:
524
+ type_value = req["template_type"]
525
+ if is_illegal_value_for_enum(type_value, ParserType):
526
+ return construct_json_result(message=f"Illegal value {type_value} for 'template_type' field.",
527
+ code=RetCode.DATA_ERROR)
528
+ updating_data["parser_id"] = req["template_type"]
529
+
530
+ # The process of updating
531
+ if not DocumentService.update_by_id(document_id, updating_data):
532
+ return construct_json_result(
533
+ code=RetCode.OPERATING_ERROR,
534
+ message="Failed to update document in the database! "
535
+ "Please check the status of RAGFlow server and try again!")
536
+
537
+ # name part: file service
538
+ if "name" in req:
539
+ # Get file by document id
540
+ file_information = File2DocumentService.get_by_document_id(document_id)
541
+ if file_information:
542
+ exist, file = FileService.get_by_id(file_information[0].file_id)
543
+ FileService.update_by_id(file.id, {"name": req["name"]})
544
+
545
+ exist, document = DocumentService.get_by_id(document_id)
546
+
547
+ # Success
548
+ return construct_json_result(data=document.to_json(), message="Success", code=RetCode.SUCCESS)
549
+ except Exception as e:
550
+ return construct_error_response(e)
551
+
552
+
553
+ # Helper method to judge whether it's an illegal value
554
+ def is_illegal_value_for_enum(value, enum_class):
555
+ return value not in enum_class.__members__.values()
556
+
557
+ # ----------------------------download a file-----------------------------------------------------
558
 
559
  # ----------------------------start parsing-----------------------------------------------------
560
 
api/utils/api_utils.py CHANGED
@@ -144,7 +144,7 @@ def server_error_response(e):
144
  if len(e.args) > 1:
145
  return get_json_result(
146
  retcode=RetCode.EXCEPTION_ERROR, retmsg=repr(e.args[0]), data=e.args[1])
147
- if repr(e).find("index_not_found_exception") >=0:
148
  return get_json_result(retcode=RetCode.EXCEPTION_ERROR, retmsg="No chunk found, please upload file and parse it.")
149
 
150
  return get_json_result(retcode=RetCode.EXCEPTION_ERROR, retmsg=repr(e))
 
144
  if len(e.args) > 1:
145
  return get_json_result(
146
  retcode=RetCode.EXCEPTION_ERROR, retmsg=repr(e.args[0]), data=e.args[1])
147
+ if repr(e).find("index_not_found_exception") >= 0:
148
  return get_json_result(retcode=RetCode.EXCEPTION_ERROR, retmsg="No chunk found, please upload file and parse it.")
149
 
150
  return get_json_result(retcode=RetCode.EXCEPTION_ERROR, retmsg=repr(e))
sdk/python/ragflow/ragflow.py CHANGED
@@ -23,11 +23,11 @@ from api.settings import RetCode
23
 
24
  class RAGFlow:
25
  def __init__(self, user_key, base_url, version='v1'):
26
- '''
27
  api_url: http://<host_address>/api/v1
28
  dataset_url: http://<host_address>/api/v1/dataset
29
  document_url: http://<host_address>/api/v1/dataset/{dataset_id}/documents
30
- '''
31
  self.user_key = user_key
32
  self.api_url = f"{base_url}/api/{version}"
33
  self.dataset_url = f"{self.api_url}/dataset"
@@ -50,9 +50,9 @@ class RAGFlow:
50
 
51
  def find_dataset_id_by_name(self, dataset_name):
52
  res = requests.get(self.dataset_url, headers=self.authorization_header)
53
- for dataset in res.json()['data']:
54
- if dataset['name'] == dataset_name:
55
- return dataset['id']
56
  return None
57
 
58
  def list_dataset(self, offset=0, count=-1, orderby="create_time", desc=True):
@@ -78,7 +78,7 @@ class RAGFlow:
78
  response = requests.put(endpoint, json=params, headers=self.authorization_header)
79
  return response.json()
80
 
81
- # -------------------- content management -----------------------------------------------------
82
 
83
  # ----------------------------upload local files-----------------------------------------------------
84
  def upload_local_file(self, dataset_id, file_paths):
@@ -86,15 +86,15 @@ class RAGFlow:
86
 
87
  for file_path in file_paths:
88
  if not isinstance(file_path, str):
89
- return {'code': RetCode.ARGUMENT_ERROR, 'message': f"{file_path} is not string."}
90
- if 'http' in file_path:
91
- return {'code': RetCode.ARGUMENT_ERROR, 'message': "Remote files have not unsupported."}
92
  if os.path.isfile(file_path):
93
- files.append(('file', open(file_path, 'rb')))
94
  else:
95
- return {'code': RetCode.DATA_ERROR, 'message': f"The file {file_path} does not exist"}
96
 
97
- res = requests.request('POST', url=f"{self.dataset_url}/{dataset_id}/documents", files=files,
98
  headers=self.authorization_header)
99
 
100
  result_dict = json.loads(res.text)
@@ -119,9 +119,13 @@ class RAGFlow:
119
  res = requests.get(endpoint, params=params, headers=self.authorization_header)
120
  return res.json()
121
 
122
- # ----------------------------download a file-----------------------------------------------------
 
 
 
 
123
 
124
- # ----------------------------enable rename-----------------------------------------------------
125
 
126
  # ----------------------------start parsing-----------------------------------------------------
127
 
@@ -137,8 +141,6 @@ class RAGFlow:
137
 
138
  # ----------------------------insert a new chunk-----------------------------------------------------
139
 
140
- # ----------------------------upload a file-----------------------------------------------------
141
-
142
  # ----------------------------get a specific chunk-----------------------------------------------------
143
 
144
  # ----------------------------retrieval test-----------------------------------------------------
 
23
 
24
  class RAGFlow:
25
  def __init__(self, user_key, base_url, version='v1'):
26
+ """
27
  api_url: http://<host_address>/api/v1
28
  dataset_url: http://<host_address>/api/v1/dataset
29
  document_url: http://<host_address>/api/v1/dataset/{dataset_id}/documents
30
+ """
31
  self.user_key = user_key
32
  self.api_url = f"{base_url}/api/{version}"
33
  self.dataset_url = f"{self.api_url}/dataset"
 
50
 
51
  def find_dataset_id_by_name(self, dataset_name):
52
  res = requests.get(self.dataset_url, headers=self.authorization_header)
53
+ for dataset in res.json()["data"]:
54
+ if dataset["name"] == dataset_name:
55
+ return dataset["id"]
56
  return None
57
 
58
  def list_dataset(self, offset=0, count=-1, orderby="create_time", desc=True):
 
78
  response = requests.put(endpoint, json=params, headers=self.authorization_header)
79
  return response.json()
80
 
81
+ # ------------------------------- CONTENT MANAGEMENT -----------------------------------------------------
82
 
83
  # ----------------------------upload local files-----------------------------------------------------
84
  def upload_local_file(self, dataset_id, file_paths):
 
86
 
87
  for file_path in file_paths:
88
  if not isinstance(file_path, str):
89
+ return {"code": RetCode.ARGUMENT_ERROR, "message": f"{file_path} is not string."}
90
+ if "http" in file_path:
91
+ return {"code": RetCode.ARGUMENT_ERROR, "message": "Remote files have not unsupported."}
92
  if os.path.isfile(file_path):
93
+ files.append(("file", open(file_path, "rb")))
94
  else:
95
+ return {"code": RetCode.DATA_ERROR, "message": f"The file {file_path} does not exist"}
96
 
97
+ res = requests.request("POST", url=f"{self.dataset_url}/{dataset_id}/documents", files=files,
98
  headers=self.authorization_header)
99
 
100
  result_dict = json.loads(res.text)
 
119
  res = requests.get(endpoint, params=params, headers=self.authorization_header)
120
  return res.json()
121
 
122
+ # ----------------------------update files: enable, rename, template_type-------------------------------------------
123
+ def update_file(self, dataset_id, document_id, **params):
124
+ endpoint = f"{self.dataset_url}/{dataset_id}/documents/{document_id}"
125
+ response = requests.put(endpoint, json=params, headers=self.authorization_header)
126
+ return response.json()
127
 
128
+ # ----------------------------download a file-----------------------------------------------------
129
 
130
  # ----------------------------start parsing-----------------------------------------------------
131
 
 
141
 
142
  # ----------------------------insert a new chunk-----------------------------------------------------
143
 
 
 
144
  # ----------------------------get a specific chunk-----------------------------------------------------
145
 
146
  # ----------------------------retrieval test-----------------------------------------------------
sdk/python/test/test_document.py CHANGED
@@ -34,10 +34,10 @@ class TestFile(TestSdk):
34
  """
35
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
36
  created_res = ragflow.create_dataset("test_upload_two_files")
37
- dataset_id = created_res['data']['dataset_id']
38
  file_paths = ["test_data/test.txt", "test_data/test1.txt"]
39
  res = ragflow.upload_local_file(dataset_id, file_paths)
40
- assert res['code'] == RetCode.SUCCESS and res['message'] == 'success'
41
 
42
  def test_upload_one_file(self):
43
  """
@@ -45,10 +45,10 @@ class TestFile(TestSdk):
45
  """
46
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
47
  created_res = ragflow.create_dataset("test_upload_one_file")
48
- dataset_id = created_res['data']['dataset_id']
49
  file_paths = ["test_data/test.txt"]
50
  res = ragflow.upload_local_file(dataset_id, file_paths)
51
- assert res['code'] == RetCode.SUCCESS and res['message'] == 'success'
52
 
53
  def test_upload_nonexistent_files(self):
54
  """
@@ -56,10 +56,10 @@ class TestFile(TestSdk):
56
  """
57
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
58
  created_res = ragflow.create_dataset("test_upload_nonexistent_files")
59
- dataset_id = created_res['data']['dataset_id']
60
  file_paths = ["test_data/imagination.txt"]
61
  res = ragflow.upload_local_file(dataset_id, file_paths)
62
- assert res['code'] == RetCode.DATA_ERROR and "does not exist" in res['message']
63
 
64
  def test_upload_file_if_dataset_does_not_exist(self):
65
  """
@@ -68,7 +68,7 @@ class TestFile(TestSdk):
68
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
69
  file_paths = ["test_data/test.txt"]
70
  res = ragflow.upload_local_file("111", file_paths)
71
- assert res['code'] == RetCode.DATA_ERROR and res['message'] == "Can't find this dataset"
72
 
73
  def test_upload_file_without_name(self):
74
  """
@@ -76,10 +76,10 @@ class TestFile(TestSdk):
76
  """
77
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
78
  created_res = ragflow.create_dataset("test_upload_file_without_name")
79
- dataset_id = created_res['data']['dataset_id']
80
  file_paths = ["test_data/.txt"]
81
  res = ragflow.upload_local_file(dataset_id, file_paths)
82
- assert res['code'] == RetCode.SUCCESS
83
 
84
  def test_upload_file_without_name1(self):
85
  """
@@ -87,10 +87,10 @@ class TestFile(TestSdk):
87
  """
88
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
89
  created_res = ragflow.create_dataset("test_upload_file_without_name")
90
- dataset_id = created_res['data']['dataset_id']
91
  file_paths = ["test_data/.txt", "test_data/empty.txt"]
92
  res = ragflow.upload_local_file(dataset_id, file_paths)
93
- assert res['code'] == RetCode.SUCCESS
94
 
95
  def test_upload_files_exceeding_the_number_limit(self):
96
  """
@@ -98,12 +98,12 @@ class TestFile(TestSdk):
98
  """
99
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
100
  created_res = ragflow.create_dataset("test_upload_files_exceeding_the_number_limit")
101
- dataset_id = created_res['data']['dataset_id']
102
  file_paths = ["test_data/test.txt", "test_data/test1.txt"] * 256
103
  res = ragflow.upload_local_file(dataset_id, file_paths)
104
- assert (res['message'] ==
105
- 'You try to upload 512 files, which exceeds the maximum number of uploading files: 256'
106
- and res['code'] == RetCode.DATA_ERROR)
107
 
108
  def test_upload_files_without_files(self):
109
  """
@@ -111,10 +111,10 @@ class TestFile(TestSdk):
111
  """
112
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
113
  created_res = ragflow.create_dataset("test_upload_files_without_files")
114
- dataset_id = created_res['data']['dataset_id']
115
  file_paths = [None]
116
  res = ragflow.upload_local_file(dataset_id, file_paths)
117
- assert (res['message'] == 'None is not string.' and res['code'] == RetCode.ARGUMENT_ERROR)
118
 
119
  def test_upload_files_with_two_files_with_same_name(self):
120
  """
@@ -122,10 +122,10 @@ class TestFile(TestSdk):
122
  """
123
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
124
  created_res = ragflow.create_dataset("test_upload_files_with_two_files_with_same_name")
125
- dataset_id = created_res['data']['dataset_id']
126
- file_paths = ['test_data/test.txt'] * 2
127
  res = ragflow.upload_local_file(dataset_id, file_paths)
128
- assert (res['message'] == 'success' and res['code'] == RetCode.SUCCESS)
129
 
130
  def test_upload_files_with_file_paths(self):
131
  """
@@ -133,10 +133,10 @@ class TestFile(TestSdk):
133
  """
134
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
135
  created_res = ragflow.create_dataset("test_upload_files_with_file_paths")
136
- dataset_id = created_res['data']['dataset_id']
137
- file_paths = ['test_data/']
138
  res = ragflow.upload_local_file(dataset_id, file_paths)
139
- assert (res['message'] == 'The file test_data/ does not exist' and res['code'] == RetCode.DATA_ERROR)
140
 
141
  def test_upload_files_with_remote_file_path(self):
142
  """
@@ -144,10 +144,10 @@ class TestFile(TestSdk):
144
  """
145
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
146
  created_res = ragflow.create_dataset("test_upload_files_with_remote_file_path")
147
- dataset_id = created_res['data']['dataset_id']
148
- file_paths = ['https://github.com/genostack/ragflow']
149
  res = ragflow.upload_local_file(dataset_id, file_paths)
150
- assert res['code'] == RetCode.ARGUMENT_ERROR and res['message'] == 'Remote files have not unsupported.'
151
 
152
  # ----------------------------delete a file-----------------------------------------------------
153
  def test_delete_one_file(self):
@@ -156,16 +156,16 @@ class TestFile(TestSdk):
156
  """
157
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
158
  created_res = ragflow.create_dataset("test_delete_one_file")
159
- dataset_id = created_res['data']['dataset_id']
160
  file_paths = ["test_data/test.txt"]
161
  res = ragflow.upload_local_file(dataset_id, file_paths)
162
  # get the doc_id
163
- data = res['data'][0]
164
- doc_id = data['id']
165
  # delete the files
166
  deleted_res = ragflow.delete_files(doc_id, dataset_id)
167
  # assert value
168
- assert deleted_res['code'] == RetCode.SUCCESS and deleted_res['data'] is True
169
 
170
  def test_delete_document_with_not_existing_document(self):
171
  """
@@ -173,9 +173,9 @@ class TestFile(TestSdk):
173
  """
174
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
175
  created_res = ragflow.create_dataset("test_delete_document_with_not_existing_document")
176
- dataset_id = created_res['data']['dataset_id']
177
  res = ragflow.delete_files("111", dataset_id)
178
- assert res['code'] == RetCode.DATA_ERROR and res['message'] == 'Document 111 not found!'
179
 
180
  def test_delete_document_with_creating_100_documents_and_deleting_100_documents(self):
181
  """
@@ -184,18 +184,18 @@ class TestFile(TestSdk):
184
  # upload 100 docs
185
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
186
  created_res = ragflow.create_dataset("test_delete_one_file")
187
- dataset_id = created_res['data']['dataset_id']
188
  file_paths = ["test_data/test.txt"] * 100
189
  res = ragflow.upload_local_file(dataset_id, file_paths)
190
 
191
  # get the doc_id
192
- data = res['data']
193
  for d in data:
194
- doc_id = d['id']
195
  # delete the files
196
  deleted_res = ragflow.delete_files(doc_id, dataset_id)
197
  # assert value
198
- assert deleted_res['code'] == RetCode.SUCCESS and deleted_res['data'] is True
199
 
200
  def test_delete_document_from_nonexistent_dataset(self):
201
  """
@@ -203,17 +203,17 @@ class TestFile(TestSdk):
203
  """
204
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
205
  created_res = ragflow.create_dataset("test_delete_one_file")
206
- dataset_id = created_res['data']['dataset_id']
207
  file_paths = ["test_data/test.txt"]
208
  res = ragflow.upload_local_file(dataset_id, file_paths)
209
  # get the doc_id
210
- data = res['data'][0]
211
- doc_id = data['id']
212
  # delete the files
213
  deleted_res = ragflow.delete_files(doc_id, "000")
214
  # assert value
215
- assert (deleted_res['code'] == RetCode.ARGUMENT_ERROR and deleted_res['message'] ==
216
- f'The document {doc_id} is not in the dataset: 000, but in the dataset: {dataset_id}.')
217
 
218
  def test_delete_document_which_is_located_in_other_dataset(self):
219
  """
@@ -222,20 +222,20 @@ class TestFile(TestSdk):
222
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
223
  # upload a document
224
  created_res = ragflow.create_dataset("test_delete_document_which_is_located_in_other_dataset")
225
- created_res_id = created_res['data']['dataset_id']
226
  file_paths = ["test_data/test.txt"]
227
  res = ragflow.upload_local_file(created_res_id, file_paths)
228
  # other dataset
229
  other_res = ragflow.create_dataset("other_dataset")
230
- other_dataset_id = other_res['data']['dataset_id']
231
  # get the doc_id
232
- data = res['data'][0]
233
- doc_id = data['id']
234
  # delete the files from the other dataset
235
  deleted_res = ragflow.delete_files(doc_id, other_dataset_id)
236
  # assert value
237
- assert (deleted_res['code'] == RetCode.ARGUMENT_ERROR and deleted_res['message'] ==
238
- f'The document {doc_id} is not in the dataset: {other_dataset_id}, but in the dataset: {created_res_id}.')
239
 
240
  # ----------------------------list files-----------------------------------------------------
241
  def test_list_documents_with_success(self):
@@ -245,12 +245,12 @@ class TestFile(TestSdk):
245
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
246
  # upload a document
247
  created_res = ragflow.create_dataset("test_list_documents_with_success")
248
- created_res_id = created_res['data']['dataset_id']
249
  file_paths = ["test_data/test.txt"]
250
  ragflow.upload_local_file(created_res_id, file_paths)
251
  # Call the list_document method
252
  response = ragflow.list_files(created_res_id)
253
- assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 1
254
 
255
  def test_list_documents_with_checking_size(self):
256
  """
@@ -259,12 +259,12 @@ class TestFile(TestSdk):
259
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
260
  # upload 10 documents
261
  created_res = ragflow.create_dataset("test_list_documents_with_checking_size")
262
- created_res_id = created_res['data']['dataset_id']
263
  file_paths = ["test_data/test.txt"] * 10
264
  ragflow.upload_local_file(created_res_id, file_paths)
265
  # Call the list_document method
266
  response = ragflow.list_files(created_res_id)
267
- assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 10
268
 
269
  def test_list_documents_with_getting_empty_result(self):
270
  """
@@ -273,10 +273,10 @@ class TestFile(TestSdk):
273
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
274
  # upload 0 documents
275
  created_res = ragflow.create_dataset("test_list_documents_with_getting_empty_result")
276
- created_res_id = created_res['data']['dataset_id']
277
  # Call the list_document method
278
  response = ragflow.list_files(created_res_id)
279
- assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 0
280
 
281
  def test_list_documents_with_creating_100_documents(self):
282
  """
@@ -285,12 +285,12 @@ class TestFile(TestSdk):
285
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
286
  # upload 100 documents
287
  created_res = ragflow.create_dataset("test_list_documents_with_creating_100_documents")
288
- created_res_id = created_res['data']['dataset_id']
289
  file_paths = ["test_data/test.txt"] * 100
290
  ragflow.upload_local_file(created_res_id, file_paths)
291
  # Call the list_document method
292
  response = ragflow.list_files(created_res_id)
293
- assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 100
294
 
295
  def test_list_document_with_failure(self):
296
  """
@@ -298,9 +298,9 @@ class TestFile(TestSdk):
298
  """
299
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
300
  created_res = ragflow.create_dataset("test_list_document_with_failure")
301
- created_res_id = created_res['data']['dataset_id']
302
  response = ragflow.list_files(created_res_id, offset=-1, count=-1)
303
- assert "IndexError" in response['message'] and response['code'] == RetCode.EXCEPTION_ERROR
304
 
305
  def test_list_document_with_verifying_offset_and_count(self):
306
  """
@@ -308,13 +308,13 @@ class TestFile(TestSdk):
308
  """
309
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
310
  created_res = ragflow.create_dataset("test_list_document_with_verifying_offset_and_count")
311
- created_res_id = created_res['data']['dataset_id']
312
  file_paths = ["test_data/test.txt", "test_data/empty.txt"] * 10
313
  ragflow.upload_local_file(created_res_id, file_paths)
314
  # Call the list_document method
315
  response = ragflow.list_files(created_res_id, offset=2, count=10)
316
 
317
- assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 10
318
 
319
  def test_list_document_with_verifying_keywords(self):
320
  """
@@ -322,13 +322,13 @@ class TestFile(TestSdk):
322
  """
323
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
324
  created_res = ragflow.create_dataset("test_list_document_with_verifying_keywords")
325
- created_res_id = created_res['data']['dataset_id']
326
  file_paths = ["test_data/test.txt", "test_data/empty.txt"]
327
  ragflow.upload_local_file(created_res_id, file_paths)
328
  # Call the list_document method
329
  response = ragflow.list_files(created_res_id, keywords="empty")
330
 
331
- assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 1
332
 
333
  def test_list_document_with_verifying_order_by_and_descend(self):
334
  """
@@ -336,17 +336,17 @@ class TestFile(TestSdk):
336
  """
337
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
338
  created_res = ragflow.create_dataset("test_list_document_with_verifying_order_by_and_descend")
339
- created_res_id = created_res['data']['dataset_id']
340
  file_paths = ["test_data/test.txt", "test_data/empty.txt"]
341
  ragflow.upload_local_file(created_res_id, file_paths)
342
  # Call the list_document method
343
  response = ragflow.list_files(created_res_id)
344
- assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 2
345
- docs = response['data']['docs']
346
  # reverse
347
  i = 1
348
  for doc in docs:
349
- assert doc['name'] in file_paths[i]
350
  i -= 1
351
 
352
  def test_list_document_with_verifying_order_by_and_ascend(self):
@@ -355,24 +355,277 @@ class TestFile(TestSdk):
355
  """
356
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
357
  created_res = ragflow.create_dataset("test_list_document_with_verifying_order_by_and_ascend")
358
- created_res_id = created_res['data']['dataset_id']
359
  file_paths = ["test_data/test.txt", "test_data/test1.txt", "test_data/empty.txt"]
360
  ragflow.upload_local_file(created_res_id, file_paths)
361
  # Call the list_document method
362
  response = ragflow.list_files(created_res_id, descend=False)
363
- assert response['code'] == RetCode.SUCCESS and len(response['data']['docs']) == 3
364
 
365
- docs = response['data']['docs']
366
 
367
  i = 0
368
  for doc in docs:
369
- assert doc['name'] in file_paths[i]
370
  i += 1
371
 
372
- # TODO: have to set the limitation of the number of documents
373
- # ----------------------------download a file-----------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
 
375
- # ----------------------------enable rename-----------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
 
377
  # ----------------------------start parsing-----------------------------------------------------
378
 
 
34
  """
35
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
36
  created_res = ragflow.create_dataset("test_upload_two_files")
37
+ dataset_id = created_res["data"]["dataset_id"]
38
  file_paths = ["test_data/test.txt", "test_data/test1.txt"]
39
  res = ragflow.upload_local_file(dataset_id, file_paths)
40
+ assert res["code"] == RetCode.SUCCESS and res["message"] == "success"
41
 
42
  def test_upload_one_file(self):
43
  """
 
45
  """
46
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
47
  created_res = ragflow.create_dataset("test_upload_one_file")
48
+ dataset_id = created_res["data"]["dataset_id"]
49
  file_paths = ["test_data/test.txt"]
50
  res = ragflow.upload_local_file(dataset_id, file_paths)
51
+ assert res["code"] == RetCode.SUCCESS and res["message"] == "success"
52
 
53
  def test_upload_nonexistent_files(self):
54
  """
 
56
  """
57
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
58
  created_res = ragflow.create_dataset("test_upload_nonexistent_files")
59
+ dataset_id = created_res["data"]["dataset_id"]
60
  file_paths = ["test_data/imagination.txt"]
61
  res = ragflow.upload_local_file(dataset_id, file_paths)
62
+ assert res["code"] == RetCode.DATA_ERROR and "does not exist" in res["message"]
63
 
64
  def test_upload_file_if_dataset_does_not_exist(self):
65
  """
 
68
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
69
  file_paths = ["test_data/test.txt"]
70
  res = ragflow.upload_local_file("111", file_paths)
71
+ assert res["code"] == RetCode.DATA_ERROR and res["message"] == "Can't find this dataset"
72
 
73
  def test_upload_file_without_name(self):
74
  """
 
76
  """
77
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
78
  created_res = ragflow.create_dataset("test_upload_file_without_name")
79
+ dataset_id = created_res["data"]["dataset_id"]
80
  file_paths = ["test_data/.txt"]
81
  res = ragflow.upload_local_file(dataset_id, file_paths)
82
+ assert res["code"] == RetCode.SUCCESS
83
 
84
  def test_upload_file_without_name1(self):
85
  """
 
87
  """
88
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
89
  created_res = ragflow.create_dataset("test_upload_file_without_name")
90
+ dataset_id = created_res["data"]["dataset_id"]
91
  file_paths = ["test_data/.txt", "test_data/empty.txt"]
92
  res = ragflow.upload_local_file(dataset_id, file_paths)
93
+ assert res["code"] == RetCode.SUCCESS
94
 
95
  def test_upload_files_exceeding_the_number_limit(self):
96
  """
 
98
  """
99
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
100
  created_res = ragflow.create_dataset("test_upload_files_exceeding_the_number_limit")
101
+ dataset_id = created_res["data"]["dataset_id"]
102
  file_paths = ["test_data/test.txt", "test_data/test1.txt"] * 256
103
  res = ragflow.upload_local_file(dataset_id, file_paths)
104
+ assert (res["message"] ==
105
+ "You try to upload 512 files, which exceeds the maximum number of uploading files: 256"
106
+ and res["code"] == RetCode.DATA_ERROR)
107
 
108
  def test_upload_files_without_files(self):
109
  """
 
111
  """
112
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
113
  created_res = ragflow.create_dataset("test_upload_files_without_files")
114
+ dataset_id = created_res["data"]["dataset_id"]
115
  file_paths = [None]
116
  res = ragflow.upload_local_file(dataset_id, file_paths)
117
+ assert (res["message"] == "None is not string." and res["code"] == RetCode.ARGUMENT_ERROR)
118
 
119
  def test_upload_files_with_two_files_with_same_name(self):
120
  """
 
122
  """
123
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
124
  created_res = ragflow.create_dataset("test_upload_files_with_two_files_with_same_name")
125
+ dataset_id = created_res["data"]["dataset_id"]
126
+ file_paths = ["test_data/test.txt"] * 2
127
  res = ragflow.upload_local_file(dataset_id, file_paths)
128
+ assert (res["message"] == "success" and res["code"] == RetCode.SUCCESS)
129
 
130
  def test_upload_files_with_file_paths(self):
131
  """
 
133
  """
134
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
135
  created_res = ragflow.create_dataset("test_upload_files_with_file_paths")
136
+ dataset_id = created_res["data"]["dataset_id"]
137
+ file_paths = ["test_data/"]
138
  res = ragflow.upload_local_file(dataset_id, file_paths)
139
+ assert (res["message"] == "The file test_data/ does not exist" and res["code"] == RetCode.DATA_ERROR)
140
 
141
  def test_upload_files_with_remote_file_path(self):
142
  """
 
144
  """
145
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
146
  created_res = ragflow.create_dataset("test_upload_files_with_remote_file_path")
147
+ dataset_id = created_res["data"]["dataset_id"]
148
+ file_paths = ["https://github.com/genostack/ragflow"]
149
  res = ragflow.upload_local_file(dataset_id, file_paths)
150
+ assert res["code"] == RetCode.ARGUMENT_ERROR and res["message"] == "Remote files have not unsupported."
151
 
152
  # ----------------------------delete a file-----------------------------------------------------
153
  def test_delete_one_file(self):
 
156
  """
157
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
158
  created_res = ragflow.create_dataset("test_delete_one_file")
159
+ dataset_id = created_res["data"]["dataset_id"]
160
  file_paths = ["test_data/test.txt"]
161
  res = ragflow.upload_local_file(dataset_id, file_paths)
162
  # get the doc_id
163
+ data = res["data"][0]
164
+ doc_id = data["id"]
165
  # delete the files
166
  deleted_res = ragflow.delete_files(doc_id, dataset_id)
167
  # assert value
168
+ assert deleted_res["code"] == RetCode.SUCCESS and deleted_res["data"] is True
169
 
170
  def test_delete_document_with_not_existing_document(self):
171
  """
 
173
  """
174
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
175
  created_res = ragflow.create_dataset("test_delete_document_with_not_existing_document")
176
+ dataset_id = created_res["data"]["dataset_id"]
177
  res = ragflow.delete_files("111", dataset_id)
178
+ assert res["code"] == RetCode.DATA_ERROR and res["message"] == "Document 111 not found!"
179
 
180
  def test_delete_document_with_creating_100_documents_and_deleting_100_documents(self):
181
  """
 
184
  # upload 100 docs
185
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
186
  created_res = ragflow.create_dataset("test_delete_one_file")
187
+ dataset_id = created_res["data"]["dataset_id"]
188
  file_paths = ["test_data/test.txt"] * 100
189
  res = ragflow.upload_local_file(dataset_id, file_paths)
190
 
191
  # get the doc_id
192
+ data = res["data"]
193
  for d in data:
194
+ doc_id = d["id"]
195
  # delete the files
196
  deleted_res = ragflow.delete_files(doc_id, dataset_id)
197
  # assert value
198
+ assert deleted_res["code"] == RetCode.SUCCESS and deleted_res["data"] is True
199
 
200
  def test_delete_document_from_nonexistent_dataset(self):
201
  """
 
203
  """
204
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
205
  created_res = ragflow.create_dataset("test_delete_one_file")
206
+ dataset_id = created_res["data"]["dataset_id"]
207
  file_paths = ["test_data/test.txt"]
208
  res = ragflow.upload_local_file(dataset_id, file_paths)
209
  # get the doc_id
210
+ data = res["data"][0]
211
+ doc_id = data["id"]
212
  # delete the files
213
  deleted_res = ragflow.delete_files(doc_id, "000")
214
  # assert value
215
+ assert (deleted_res["code"] == RetCode.ARGUMENT_ERROR and deleted_res["message"] ==
216
+ f"The document {doc_id} is not in the dataset: 000, but in the dataset: {dataset_id}.")
217
 
218
  def test_delete_document_which_is_located_in_other_dataset(self):
219
  """
 
222
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
223
  # upload a document
224
  created_res = ragflow.create_dataset("test_delete_document_which_is_located_in_other_dataset")
225
+ created_res_id = created_res["data"]["dataset_id"]
226
  file_paths = ["test_data/test.txt"]
227
  res = ragflow.upload_local_file(created_res_id, file_paths)
228
  # other dataset
229
  other_res = ragflow.create_dataset("other_dataset")
230
+ other_dataset_id = other_res["data"]["dataset_id"]
231
  # get the doc_id
232
+ data = res["data"][0]
233
+ doc_id = data["id"]
234
  # delete the files from the other dataset
235
  deleted_res = ragflow.delete_files(doc_id, other_dataset_id)
236
  # assert value
237
+ assert (deleted_res["code"] == RetCode.ARGUMENT_ERROR and deleted_res["message"] ==
238
+ f"The document {doc_id} is not in the dataset: {other_dataset_id}, but in the dataset: {created_res_id}.")
239
 
240
  # ----------------------------list files-----------------------------------------------------
241
  def test_list_documents_with_success(self):
 
245
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
246
  # upload a document
247
  created_res = ragflow.create_dataset("test_list_documents_with_success")
248
+ created_res_id = created_res["data"]["dataset_id"]
249
  file_paths = ["test_data/test.txt"]
250
  ragflow.upload_local_file(created_res_id, file_paths)
251
  # Call the list_document method
252
  response = ragflow.list_files(created_res_id)
253
+ assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 1
254
 
255
  def test_list_documents_with_checking_size(self):
256
  """
 
259
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
260
  # upload 10 documents
261
  created_res = ragflow.create_dataset("test_list_documents_with_checking_size")
262
+ created_res_id = created_res["data"]["dataset_id"]
263
  file_paths = ["test_data/test.txt"] * 10
264
  ragflow.upload_local_file(created_res_id, file_paths)
265
  # Call the list_document method
266
  response = ragflow.list_files(created_res_id)
267
+ assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 10
268
 
269
  def test_list_documents_with_getting_empty_result(self):
270
  """
 
273
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
274
  # upload 0 documents
275
  created_res = ragflow.create_dataset("test_list_documents_with_getting_empty_result")
276
+ created_res_id = created_res["data"]["dataset_id"]
277
  # Call the list_document method
278
  response = ragflow.list_files(created_res_id)
279
+ assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 0
280
 
281
  def test_list_documents_with_creating_100_documents(self):
282
  """
 
285
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
286
  # upload 100 documents
287
  created_res = ragflow.create_dataset("test_list_documents_with_creating_100_documents")
288
+ created_res_id = created_res["data"]["dataset_id"]
289
  file_paths = ["test_data/test.txt"] * 100
290
  ragflow.upload_local_file(created_res_id, file_paths)
291
  # Call the list_document method
292
  response = ragflow.list_files(created_res_id)
293
+ assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 100
294
 
295
  def test_list_document_with_failure(self):
296
  """
 
298
  """
299
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
300
  created_res = ragflow.create_dataset("test_list_document_with_failure")
301
+ created_res_id = created_res["data"]["dataset_id"]
302
  response = ragflow.list_files(created_res_id, offset=-1, count=-1)
303
+ assert "IndexError" in response["message"] and response["code"] == RetCode.EXCEPTION_ERROR
304
 
305
  def test_list_document_with_verifying_offset_and_count(self):
306
  """
 
308
  """
309
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
310
  created_res = ragflow.create_dataset("test_list_document_with_verifying_offset_and_count")
311
+ created_res_id = created_res["data"]["dataset_id"]
312
  file_paths = ["test_data/test.txt", "test_data/empty.txt"] * 10
313
  ragflow.upload_local_file(created_res_id, file_paths)
314
  # Call the list_document method
315
  response = ragflow.list_files(created_res_id, offset=2, count=10)
316
 
317
+ assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 10
318
 
319
  def test_list_document_with_verifying_keywords(self):
320
  """
 
322
  """
323
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
324
  created_res = ragflow.create_dataset("test_list_document_with_verifying_keywords")
325
+ created_res_id = created_res["data"]["dataset_id"]
326
  file_paths = ["test_data/test.txt", "test_data/empty.txt"]
327
  ragflow.upload_local_file(created_res_id, file_paths)
328
  # Call the list_document method
329
  response = ragflow.list_files(created_res_id, keywords="empty")
330
 
331
+ assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 1
332
 
333
  def test_list_document_with_verifying_order_by_and_descend(self):
334
  """
 
336
  """
337
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
338
  created_res = ragflow.create_dataset("test_list_document_with_verifying_order_by_and_descend")
339
+ created_res_id = created_res["data"]["dataset_id"]
340
  file_paths = ["test_data/test.txt", "test_data/empty.txt"]
341
  ragflow.upload_local_file(created_res_id, file_paths)
342
  # Call the list_document method
343
  response = ragflow.list_files(created_res_id)
344
+ assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 2
345
+ docs = response["data"]["docs"]
346
  # reverse
347
  i = 1
348
  for doc in docs:
349
+ assert doc["name"] in file_paths[i]
350
  i -= 1
351
 
352
  def test_list_document_with_verifying_order_by_and_ascend(self):
 
355
  """
356
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
357
  created_res = ragflow.create_dataset("test_list_document_with_verifying_order_by_and_ascend")
358
+ created_res_id = created_res["data"]["dataset_id"]
359
  file_paths = ["test_data/test.txt", "test_data/test1.txt", "test_data/empty.txt"]
360
  ragflow.upload_local_file(created_res_id, file_paths)
361
  # Call the list_document method
362
  response = ragflow.list_files(created_res_id, descend=False)
363
+ assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 3
364
 
365
+ docs = response["data"]["docs"]
366
 
367
  i = 0
368
  for doc in docs:
369
+ assert doc["name"] in file_paths[i]
370
  i += 1
371
 
372
+ # ----------------------------update files: enable, rename, template_type-------------------------------------------
373
+
374
+ def test_update_nonexistent_document(self):
375
+ """
376
+ Test updating a document which does not exist.
377
+ """
378
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
379
+ created_res = ragflow.create_dataset("test_update_nonexistent_document")
380
+ created_res_id = created_res["data"]["dataset_id"]
381
+ params = {
382
+ "name": "new_name"
383
+ }
384
+ res = ragflow.update_file(created_res_id, "weird_doc_id", **params)
385
+ assert res["code"] == RetCode.ARGUMENT_ERROR and res["message"] == f"This document weird_doc_id cannot be found!"
386
+
387
+ def test_update_document_without_parameters(self):
388
+ """
389
+ Test updating a document without giving parameters.
390
+ """
391
+ # create a dataset
392
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
393
+ created_res = ragflow.create_dataset("test_update_document_without_parameters")
394
+ created_res_id = created_res["data"]["dataset_id"]
395
+ # upload files
396
+ file_paths = ["test_data/test.txt"]
397
+ uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
398
+ # get the doc_id
399
+ data = uploading_res["data"][0]
400
+ doc_id = data["id"]
401
+ # update file
402
+ params = {
403
+ }
404
+ update_res = ragflow.update_file(created_res_id, doc_id, **params)
405
+ assert (update_res["code"] == RetCode.DATA_ERROR and
406
+ update_res["message"] == "Please input at least one parameter that you want to update!")
407
+
408
+ def test_update_document_in_nonexistent_dataset(self):
409
+ """
410
+ Test updating a document in the nonexistent dataset.
411
+ """
412
+ # create a dataset
413
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
414
+ created_res = ragflow.create_dataset("test_update_document_in_nonexistent_dataset")
415
+ created_res_id = created_res["data"]["dataset_id"]
416
+ # upload files
417
+ file_paths = ["test_data/test.txt"]
418
+ uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
419
+ # get the doc_id
420
+ data = uploading_res["data"][0]
421
+ doc_id = data["id"]
422
+ # update file
423
+ params = {
424
+ "name": "new_name"
425
+ }
426
+ update_res = ragflow.update_file("fake_dataset_id", doc_id, **params)
427
+ assert (update_res["code"] == RetCode.DATA_ERROR and
428
+ update_res["message"] == f"This dataset fake_dataset_id cannot be found!")
429
+
430
+ def test_update_document_with_different_extension_name(self):
431
+ """
432
+ Test the updating of a document with an extension name that differs from its original.
433
+ """
434
+ # create a dataset
435
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
436
+ created_res = ragflow.create_dataset("test_update_document_with_different_extension_name")
437
+ created_res_id = created_res["data"]["dataset_id"]
438
+ # upload files
439
+ file_paths = ["test_data/test.txt"]
440
+ uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
441
+ # get the doc_id
442
+ data = uploading_res["data"][0]
443
+ doc_id = data["id"]
444
+ # update file
445
+ params = {
446
+ "name": "new_name.doc"
447
+ }
448
+ update_res = ragflow.update_file(created_res_id, doc_id, **params)
449
+ assert (update_res["code"] == RetCode.ARGUMENT_ERROR and
450
+ update_res["message"] == "The extension of file cannot be changed")
451
+
452
+ def test_update_document_with_duplicate_name(self):
453
+ """
454
+ Test the updating of a document with a duplicate name.
455
+ """
456
+ # create a dataset
457
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
458
+ created_res = ragflow.create_dataset("test_update_document_with_different_extension_name")
459
+ created_res_id = created_res["data"]["dataset_id"]
460
+ # upload files
461
+ file_paths = ["test_data/test.txt", "test_data/test1.txt"]
462
+ uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
463
+ # get the doc_id
464
+ data = uploading_res["data"][0]
465
+ doc_id = data["id"]
466
+ # update file
467
+ params = {
468
+ "name": "test.txt"
469
+ }
470
+ update_res = ragflow.update_file(created_res_id, doc_id, **params)
471
+ assert (update_res["code"] == RetCode.ARGUMENT_ERROR and
472
+ update_res["message"] == "Duplicated document name in the same dataset.")
473
+
474
+ def test_update_document_with_updating_its_name_with_success(self):
475
+ """
476
+ Test the updating of a document's name with success.
477
+ """
478
+ # create a dataset
479
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
480
+ created_res = ragflow.create_dataset("test_update_document_with_updating_its_name_with_success")
481
+ created_res_id = created_res["data"]["dataset_id"]
482
+ # upload files
483
+ file_paths = ["test_data/test.txt", "test_data/test1.txt"]
484
+ uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
485
+ # get the doc_id
486
+ data = uploading_res["data"][0]
487
+ doc_id = data["id"]
488
+ # update file
489
+ params = {
490
+ "name": "new_name.txt"
491
+ }
492
+ update_res = ragflow.update_file(created_res_id, doc_id, **params)
493
+ assert (update_res["code"] == RetCode.SUCCESS and
494
+ update_res["message"] == "Success" and update_res["data"]["name"] == "new_name.txt")
495
+
496
+ def test_update_document_with_updating_its_template_type_with_success(self):
497
+ """
498
+ Test the updating of a document's template type with success.
499
+ """
500
+ # create a dataset
501
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
502
+ created_res = ragflow.create_dataset("test_update_document_with_updating_its_template_type_with_success")
503
+ created_res_id = created_res["data"]["dataset_id"]
504
+ # upload files
505
+ file_paths = ["test_data/test.txt", "test_data/test1.txt"]
506
+ uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
507
+ # get the doc_id
508
+ data = uploading_res["data"][0]
509
+ doc_id = data["id"]
510
+ # update file
511
+ params = {
512
+ "template_type": "laws"
513
+ }
514
+ update_res = ragflow.update_file(created_res_id, doc_id, **params)
515
+ assert (update_res["code"] == RetCode.SUCCESS and
516
+ update_res["message"] == "Success" and update_res["data"]["parser_id"] == "laws")
517
+
518
+ def test_update_document_with_updating_its_enable_value_with_success(self):
519
+ """
520
+ Test the updating of a document's enable value with success.
521
+ """
522
+ # create a dataset
523
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
524
+ created_res = ragflow.create_dataset("test_update_document_with_updating_its_enable_value_with_success")
525
+ created_res_id = created_res["data"]["dataset_id"]
526
+ # upload files
527
+ file_paths = ["test_data/test.txt", "test_data/test1.txt"]
528
+ uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
529
+ # get the doc_id
530
+ data = uploading_res["data"][0]
531
+ doc_id = data["id"]
532
+ # update file
533
+ params = {
534
+ "enable": "0"
535
+ }
536
+ update_res = ragflow.update_file(created_res_id, doc_id, **params)
537
+ assert (update_res["code"] == RetCode.SUCCESS and
538
+ update_res["message"] == "Success" and update_res["data"]["status"] == "0")
539
+
540
+ def test_update_document_with_updating_illegal_parameter(self):
541
+ """
542
+ Test the updating of a document's illegal parameter.
543
+ """
544
+ # create a dataset
545
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
546
+ created_res = ragflow.create_dataset("test_update_document_with_updating_illegal_parameter")
547
+ created_res_id = created_res["data"]["dataset_id"]
548
+ # upload files
549
+ file_paths = ["test_data/test.txt", "test_data/test1.txt"]
550
+ uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
551
+ # get the doc_id
552
+ data = uploading_res["data"][0]
553
+ doc_id = data["id"]
554
+ # update file
555
+ params = {
556
+ "illegal_parameter": "0"
557
+ }
558
+ update_res = ragflow.update_file(created_res_id, doc_id, **params)
559
+
560
+ assert (update_res["code"] == RetCode.ARGUMENT_ERROR and
561
+ update_res["message"] == "illegal_parameter is an illegal parameter.")
562
 
563
+ def test_update_document_with_giving_its_name_value(self):
564
+ """
565
+ Test the updating of a document's name without its name value.
566
+ """
567
+ # create a dataset
568
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
569
+ created_res = ragflow.create_dataset("test_update_document_with_updating_its_name_with_success")
570
+ created_res_id = created_res["data"]["dataset_id"]
571
+ # upload files
572
+ file_paths = ["test_data/test.txt", "test_data/test1.txt"]
573
+ uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
574
+ # get the doc_id
575
+ data = uploading_res["data"][0]
576
+ doc_id = data["id"]
577
+ # update file
578
+ params = {
579
+ "name": ""
580
+ }
581
+ update_res = ragflow.update_file(created_res_id, doc_id, **params)
582
+ assert (update_res["code"] == RetCode.DATA_ERROR and
583
+ update_res["message"] == "There is no new name.")
584
+
585
+ def test_update_document_with_giving_illegal_value_for_enable(self):
586
+ """
587
+ Test the updating of a document's with giving illegal enable's value.
588
+ """
589
+ # create a dataset
590
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
591
+ created_res = ragflow.create_dataset("test_update_document_with_updating_its_name_with_success")
592
+ created_res_id = created_res["data"]["dataset_id"]
593
+ # upload files
594
+ file_paths = ["test_data/test.txt", "test_data/test1.txt"]
595
+ uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
596
+ # get the doc_id
597
+ data = uploading_res["data"][0]
598
+ doc_id = data["id"]
599
+ # update file
600
+ params = {
601
+ "enable": "?"
602
+ }
603
+ update_res = ragflow.update_file(created_res_id, doc_id, **params)
604
+ assert (update_res["code"] == RetCode.DATA_ERROR and
605
+ update_res["message"] == "Illegal value ? for 'enable' field.")
606
+
607
+ def test_update_document_with_giving_illegal_value_for_type(self):
608
+ """
609
+ Test the updating of a document's with giving illegal type's value.
610
+ """
611
+ # create a dataset
612
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
613
+ created_res = ragflow.create_dataset("test_update_document_with_updating_its_name_with_success")
614
+ created_res_id = created_res["data"]["dataset_id"]
615
+ # upload files
616
+ file_paths = ["test_data/test.txt", "test_data/test1.txt"]
617
+ uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
618
+ # get the doc_id
619
+ data = uploading_res["data"][0]
620
+ doc_id = data["id"]
621
+ # update file
622
+ params = {
623
+ "template_type": "?"
624
+ }
625
+ update_res = ragflow.update_file(created_res_id, doc_id, **params)
626
+ assert (update_res["code"] == RetCode.DATA_ERROR and
627
+ update_res["message"] == "Illegal value ? for 'template_type' field.")
628
+ # ----------------------------download a file-----------------------------------------------------
629
 
630
  # ----------------------------start parsing-----------------------------------------------------
631