|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from common import create_dataset, list_dataset, rm_dataset, upload_file |
|
from common import list_document, get_docs_info, parse_docs |
|
from time import sleep |
|
from timeit import default_timer as timer |
|
|
|
|
|
def test_parse_txt_document(get_auth): |
|
|
|
res = create_dataset(get_auth, "test_parse_txt_document") |
|
assert res.get("code") == 0, f"{res.get('message')}" |
|
|
|
|
|
page_number = 1 |
|
dataset_list = [] |
|
dataset_id = None |
|
while True: |
|
res = list_dataset(get_auth, page_number) |
|
data = res.get("data").get("kbs") |
|
for item in data: |
|
dataset_id = item.get("id") |
|
dataset_list.append(dataset_id) |
|
if len(dataset_list) < page_number * 150: |
|
break |
|
page_number += 1 |
|
|
|
filename = 'ragflow_test.txt' |
|
res = upload_file(get_auth, dataset_id, f"../test_sdk_api/test_data/{filename}") |
|
assert res.get("code") == 0, f"{res.get('message')}" |
|
|
|
res = list_document(get_auth, dataset_id) |
|
|
|
doc_id_list = [] |
|
for doc in res['data']['docs']: |
|
doc_id_list.append(doc['id']) |
|
|
|
res = get_docs_info(get_auth, doc_id_list) |
|
print(doc_id_list) |
|
doc_count = len(doc_id_list) |
|
res = parse_docs(get_auth, doc_id_list) |
|
|
|
start_ts = timer() |
|
while True: |
|
res = get_docs_info(get_auth, doc_id_list) |
|
finished_count = 0 |
|
for doc_info in res['data']: |
|
if doc_info['progress'] == 1: |
|
finished_count += 1 |
|
if finished_count == doc_count: |
|
break |
|
sleep(1) |
|
print('time cost {:.1f}s'.format(timer() - start_ts)) |
|
|
|
|
|
for dataset_id in dataset_list: |
|
res = rm_dataset(get_auth, dataset_id) |
|
assert res.get("code") == 0, f"{res.get('message')}" |
|
print(f"{len(dataset_list)} datasets are deleted") |
|
|