Spaces:

retopara
/

ragflow

Build error

App Files Files Community

ragflow / sdk /python /test /test_frontend_api /test_chunk.py

zhichyu

Fix errors detected by Ruff (#3918)

0404a52 3 months ago

raw

history blame

2.45 kB

	#
	# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	from common import create_dataset, list_dataset, rm_dataset, upload_file
	from common import list_document, get_docs_info, parse_docs
	from time import sleep
	from timeit import default_timer as timer


	def test_parse_txt_document(get_auth):
	# create dataset
	res = create_dataset(get_auth, "test_parse_txt_document")
	assert res.get("code") == 0, f"{res.get('message')}"

	# list dataset
	page_number = 1
	dataset_list = []
	dataset_id = None
	while True:
	res = list_dataset(get_auth, page_number)
	data = res.get("data").get("kbs")
	for item in data:
	dataset_id = item.get("id")
	dataset_list.append(dataset_id)
	if len(dataset_list) < page_number * 150:
	break
	page_number += 1

	filename = 'ragflow_test.txt'
	res = upload_file(get_auth, dataset_id, f"../test_sdk_api/test_data/{filename}")
	assert res.get("code") == 0, f"{res.get('message')}"

	res = list_document(get_auth, dataset_id)

	doc_id_list = []
	for doc in res['data']['docs']:
	doc_id_list.append(doc['id'])

	res = get_docs_info(get_auth, doc_id_list)
	print(doc_id_list)
	doc_count = len(doc_id_list)
	res = parse_docs(get_auth, doc_id_list)

	start_ts = timer()
	while True:
	res = get_docs_info(get_auth, doc_id_list)
	finished_count = 0
	for doc_info in res['data']:
	if doc_info['progress'] == 1:
	finished_count += 1
	if finished_count == doc_count:
	break
	sleep(1)
	print('time cost {:.1f}s'.format(timer() - start_ts))

	# delete dataset
	for dataset_id in dataset_list:
	res = rm_dataset(get_auth, dataset_id)
	assert res.get("code") == 0, f"{res.get('message')}"
	print(f"{len(dataset_list)} datasets are deleted")