GYH
commited on
Commit
·
83bd6a2
1
Parent(s):
e346b5a
Updated document upload method (#777)
Browse files### What problem does this PR solve?
api_app.py
/document/upload
add two non mandatory parameters
parser_id:
[naive,qaresume,manual,table,paper,book,laws,presentation,picture,one]
run: 1
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- api/apps/api_app.py +38 -5
api/apps/api_app.py
CHANGED
@@ -31,11 +31,11 @@ from api.settings import RetCode
|
|
31 |
from api.utils import get_uuid, current_timestamp, datetime_format
|
32 |
from api.utils.api_utils import server_error_response, get_data_error_result, get_json_result, validate_request
|
33 |
from itsdangerous import URLSafeTimedSerializer
|
34 |
-
|
35 |
from api.utils.file_utils import filename_type, thumbnail
|
36 |
from rag.utils.minio_conn import MINIO
|
37 |
-
|
38 |
-
|
39 |
def generate_confirmation_token(tenent_id):
|
40 |
serializer = URLSafeTimedSerializer(tenent_id)
|
41 |
return "ragflow-" + serializer.dumps(get_uuid(), salt=tenent_id)[2:34]
|
@@ -229,6 +229,7 @@ def upload():
|
|
229 |
return get_json_result(
|
230 |
data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR)
|
231 |
|
|
|
232 |
file = request.files['file']
|
233 |
if file.filename == '':
|
234 |
return get_json_result(
|
@@ -252,6 +253,7 @@ def upload():
|
|
252 |
location += "_"
|
253 |
blob = request.files['file'].read()
|
254 |
MINIO.put(kb_id, location, blob)
|
|
|
255 |
doc = {
|
256 |
"id": get_uuid(),
|
257 |
"kb_id": kb.id,
|
@@ -264,11 +266,42 @@ def upload():
|
|
264 |
"size": len(blob),
|
265 |
"thumbnail": thumbnail(filename, blob)
|
266 |
}
|
|
|
|
|
|
|
|
|
|
|
267 |
if doc["type"] == FileType.VISUAL:
|
268 |
doc["parser_id"] = ParserType.PICTURE.value
|
269 |
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
270 |
doc["parser_id"] = ParserType.PRESENTATION.value
|
271 |
-
|
272 |
-
|
|
|
273 |
except Exception as e:
|
274 |
return server_error_response(e)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
from api.utils import get_uuid, current_timestamp, datetime_format
|
32 |
from api.utils.api_utils import server_error_response, get_data_error_result, get_json_result, validate_request
|
33 |
from itsdangerous import URLSafeTimedSerializer
|
34 |
+
from api.db.services.task_service import TaskService, queue_tasks
|
35 |
from api.utils.file_utils import filename_type, thumbnail
|
36 |
from rag.utils.minio_conn import MINIO
|
37 |
+
from api.db.db_models import Task
|
38 |
+
from api.db.services.file2document_service import File2DocumentService
|
39 |
def generate_confirmation_token(tenent_id):
|
40 |
serializer = URLSafeTimedSerializer(tenent_id)
|
41 |
return "ragflow-" + serializer.dumps(get_uuid(), salt=tenent_id)[2:34]
|
|
|
229 |
return get_json_result(
|
230 |
data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR)
|
231 |
|
232 |
+
|
233 |
file = request.files['file']
|
234 |
if file.filename == '':
|
235 |
return get_json_result(
|
|
|
253 |
location += "_"
|
254 |
blob = request.files['file'].read()
|
255 |
MINIO.put(kb_id, location, blob)
|
256 |
+
|
257 |
doc = {
|
258 |
"id": get_uuid(),
|
259 |
"kb_id": kb.id,
|
|
|
266 |
"size": len(blob),
|
267 |
"thumbnail": thumbnail(filename, blob)
|
268 |
}
|
269 |
+
|
270 |
+
form_data=request.form
|
271 |
+
if "parser_id" in form_data.keys():
|
272 |
+
if request.form.get("parser_id").strip() in list(vars(ParserType).values())[1:-3]:
|
273 |
+
doc["parser_id"] = request.form.get("parser_id").strip()
|
274 |
if doc["type"] == FileType.VISUAL:
|
275 |
doc["parser_id"] = ParserType.PICTURE.value
|
276 |
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
277 |
doc["parser_id"] = ParserType.PRESENTATION.value
|
278 |
+
|
279 |
+
doc_result = DocumentService.insert(doc)
|
280 |
+
|
281 |
except Exception as e:
|
282 |
return server_error_response(e)
|
283 |
+
|
284 |
+
if "run" in form_data.keys():
|
285 |
+
if request.form.get("run").strip() == "1":
|
286 |
+
try:
|
287 |
+
info = {"run": 1, "progress": 0}
|
288 |
+
info["progress_msg"] = ""
|
289 |
+
info["chunk_num"] = 0
|
290 |
+
info["token_num"] = 0
|
291 |
+
DocumentService.update_by_id(doc["id"], info)
|
292 |
+
# if str(req["run"]) == TaskStatus.CANCEL.value:
|
293 |
+
tenant_id = DocumentService.get_tenant_id(doc["id"])
|
294 |
+
if not tenant_id:
|
295 |
+
return get_data_error_result(retmsg="Tenant not found!")
|
296 |
+
|
297 |
+
#e, doc = DocumentService.get_by_id(doc["id"])
|
298 |
+
TaskService.filter_delete([Task.doc_id == doc["id"]])
|
299 |
+
e, doc = DocumentService.get_by_id(doc["id"])
|
300 |
+
doc = doc.to_dict()
|
301 |
+
doc["tenant_id"] = tenant_id
|
302 |
+
bucket, name = File2DocumentService.get_minio_address(doc_id=doc["id"])
|
303 |
+
queue_tasks(doc, bucket, name)
|
304 |
+
except Exception as e:
|
305 |
+
return server_error_response(e)
|
306 |
+
|
307 |
+
return get_json_result(data=doc_result.to_json())
|