cecilia-uu commited on
Commit
4b9f80a
·
1 Parent(s): 9ae711a

API: show status of parsing (#1504)

Browse files

### What problem does this PR solve?

show status of parsing.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

api/apps/dataset_api.py CHANGED
@@ -750,7 +750,27 @@ def get_message_during_parsing_document(id, message):
750
  # ----------------------------stop parsing-----------------------------------------------------
751
 
752
  # ----------------------------show the status of the file-----------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
753
 
 
 
 
754
  # ----------------------------list the chunks of the file-----------------------------------------------------
755
 
756
  # -- --------------------------delete the chunk-----------------------------------------------------
 
750
  # ----------------------------stop parsing-----------------------------------------------------
751
 
752
  # ----------------------------show the status of the file-----------------------------------------------------
753
+ @manager.route("/<dataset_id>/documents/<document_id>/status", methods=["GET"])
754
+ @login_required
755
+ def show_parsing_status(dataset_id, document_id):
756
+ try:
757
+ # valid dataset
758
+ exist, _ = KnowledgebaseService.get_by_id(dataset_id)
759
+ if not exist:
760
+ return construct_json_result(code=RetCode.DATA_ERROR,
761
+ message=f"This dataset: '{dataset_id}' cannot be found!")
762
+ # valid document
763
+ exist, _ = DocumentService.get_by_id(document_id)
764
+ if not exist:
765
+ return construct_json_result(code=RetCode.DATA_ERROR,
766
+ message=f"This document: '{document_id}' is not a valid document.")
767
+
768
+ _, doc = DocumentService.get_by_id(document_id) # get doc object
769
+ doc_attributes = doc.to_dict()
770
 
771
+ return construct_json_result(data={"progress": doc_attributes["progress"], "status": doc_attributes["status"]}, code=RetCode.SUCCESS)
772
+ except Exception as e:
773
+ return construct_error_response(e)
774
  # ----------------------------list the chunks of the file-----------------------------------------------------
775
 
776
  # -- --------------------------delete the chunk-----------------------------------------------------
sdk/python/ragflow/ragflow.py CHANGED
@@ -159,7 +159,11 @@ class RAGFlow:
159
  # ----------------------------stop parsing-----------------------------------------------------
160
 
161
  # ----------------------------show the status of the file-----------------------------------------------------
 
 
 
162
 
 
163
  # ----------------------------list the chunks of the file-----------------------------------------------------
164
 
165
  # ----------------------------delete the chunk-----------------------------------------------------
 
159
  # ----------------------------stop parsing-----------------------------------------------------
160
 
161
  # ----------------------------show the status of the file-----------------------------------------------------
162
+ def show_parsing_status(self, dataset_id, document_id):
163
+ endpoint = f"{self.dataset_url}/{dataset_id}/documents/{document_id}/status"
164
+ res = requests.get(endpoint, headers=self.authorization_header)
165
 
166
+ return res.json()
167
  # ----------------------------list the chunks of the file-----------------------------------------------------
168
 
169
  # ----------------------------delete the chunk-----------------------------------------------------
sdk/python/test/test_document.py CHANGED
@@ -953,7 +953,52 @@ class TestFile(TestSdk):
953
  # ----------------------------stop parsing-----------------------------------------------------
954
 
955
  # ----------------------------show the status of the file-----------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
956
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
957
  # ----------------------------list the chunks of the file-----------------------------------------------------
958
 
959
  # ----------------------------delete the chunk-----------------------------------------------------
 
953
  # ----------------------------stop parsing-----------------------------------------------------
954
 
955
  # ----------------------------show the status of the file-----------------------------------------------------
956
+ def test_show_status_with_success(self):
957
+ # create a dataset
958
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
959
+ created_res = ragflow.create_dataset("test_show_status_with_success")
960
+ created_res_id = created_res["data"]["dataset_id"]
961
+ # upload files
962
+ file_paths = ["test_data/lol.txt"]
963
+ uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
964
+ # get the doc_id
965
+ data = uploading_res["data"][0]
966
+ doc_id = data["id"]
967
+ # parse file
968
+ res = ragflow.start_parsing_document(created_res_id, doc_id)
969
+ assert res["code"] == RetCode.SUCCESS and res["message"] == ""
970
+ # show status
971
+ status_res = ragflow.show_parsing_status(created_res_id, doc_id)
972
+ assert status_res["code"] == RetCode.SUCCESS and status_res["data"]["status"] == "1"
973
+
974
+ def test_show_status_nonexistent_document(self):
975
+ """
976
+ Test showing the status of a document which does not exist.
977
+ """
978
+ # create a dataset
979
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
980
+ created_res = ragflow.create_dataset("test_show_status_nonexistent_document")
981
+ created_res_id = created_res["data"]["dataset_id"]
982
+ res = ragflow.show_parsing_status(created_res_id, "imagination")
983
+ assert res["code"] == RetCode.DATA_ERROR and res["message"] == "This document: 'imagination' is not a valid document."
984
 
985
+ def test_show_status_document_in_nonexistent_dataset(self):
986
+ """
987
+ Test showing the status of a document whose dataset is nonexistent.
988
+ """
989
+ # create a dataset
990
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
991
+ created_res = ragflow.create_dataset("test_show_status_document_in_nonexistent_dataset")
992
+ created_res_id = created_res["data"]["dataset_id"]
993
+ # upload files
994
+ file_paths = ["test_data/test.txt"]
995
+ uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
996
+ # get the doc_id
997
+ data = uploading_res["data"][0]
998
+ doc_id = data["id"]
999
+ # parse
1000
+ res = ragflow.show_parsing_status("imagination", doc_id)
1001
+ assert res["code"] == RetCode.DATA_ERROR and res["message"] == "This dataset: 'imagination' cannot be found!"
1002
  # ----------------------------list the chunks of the file-----------------------------------------------------
1003
 
1004
  # ----------------------------delete the chunk-----------------------------------------------------