GYH commited on
Commit
83bd6a2
·
1 Parent(s): e346b5a

Updated document upload method (#777)

Browse files

### What problem does this PR solve?

api_app.py
/document/upload
add two non mandatory parameters
parser_id:
[naive,qaresume,manual,table,paper,book,laws,presentation,picture,one]
run: 1

### Type of change
- [x] New Feature (non-breaking change which adds functionality)

Files changed (1) hide show
  1. api/apps/api_app.py +38 -5
api/apps/api_app.py CHANGED
@@ -31,11 +31,11 @@ from api.settings import RetCode
31
  from api.utils import get_uuid, current_timestamp, datetime_format
32
  from api.utils.api_utils import server_error_response, get_data_error_result, get_json_result, validate_request
33
  from itsdangerous import URLSafeTimedSerializer
34
-
35
  from api.utils.file_utils import filename_type, thumbnail
36
  from rag.utils.minio_conn import MINIO
37
-
38
-
39
  def generate_confirmation_token(tenent_id):
40
  serializer = URLSafeTimedSerializer(tenent_id)
41
  return "ragflow-" + serializer.dumps(get_uuid(), salt=tenent_id)[2:34]
@@ -229,6 +229,7 @@ def upload():
229
  return get_json_result(
230
  data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR)
231
 
 
232
  file = request.files['file']
233
  if file.filename == '':
234
  return get_json_result(
@@ -252,6 +253,7 @@ def upload():
252
  location += "_"
253
  blob = request.files['file'].read()
254
  MINIO.put(kb_id, location, blob)
 
255
  doc = {
256
  "id": get_uuid(),
257
  "kb_id": kb.id,
@@ -264,11 +266,42 @@ def upload():
264
  "size": len(blob),
265
  "thumbnail": thumbnail(filename, blob)
266
  }
 
 
 
 
 
267
  if doc["type"] == FileType.VISUAL:
268
  doc["parser_id"] = ParserType.PICTURE.value
269
  if re.search(r"\.(ppt|pptx|pages)$", filename):
270
  doc["parser_id"] = ParserType.PRESENTATION.value
271
- doc = DocumentService.insert(doc)
272
- return get_json_result(data=doc.to_json())
 
273
  except Exception as e:
274
  return server_error_response(e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  from api.utils import get_uuid, current_timestamp, datetime_format
32
  from api.utils.api_utils import server_error_response, get_data_error_result, get_json_result, validate_request
33
  from itsdangerous import URLSafeTimedSerializer
34
+ from api.db.services.task_service import TaskService, queue_tasks
35
  from api.utils.file_utils import filename_type, thumbnail
36
  from rag.utils.minio_conn import MINIO
37
+ from api.db.db_models import Task
38
+ from api.db.services.file2document_service import File2DocumentService
39
  def generate_confirmation_token(tenent_id):
40
  serializer = URLSafeTimedSerializer(tenent_id)
41
  return "ragflow-" + serializer.dumps(get_uuid(), salt=tenent_id)[2:34]
 
229
  return get_json_result(
230
  data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR)
231
 
232
+
233
  file = request.files['file']
234
  if file.filename == '':
235
  return get_json_result(
 
253
  location += "_"
254
  blob = request.files['file'].read()
255
  MINIO.put(kb_id, location, blob)
256
+
257
  doc = {
258
  "id": get_uuid(),
259
  "kb_id": kb.id,
 
266
  "size": len(blob),
267
  "thumbnail": thumbnail(filename, blob)
268
  }
269
+
270
+ form_data=request.form
271
+ if "parser_id" in form_data.keys():
272
+ if request.form.get("parser_id").strip() in list(vars(ParserType).values())[1:-3]:
273
+ doc["parser_id"] = request.form.get("parser_id").strip()
274
  if doc["type"] == FileType.VISUAL:
275
  doc["parser_id"] = ParserType.PICTURE.value
276
  if re.search(r"\.(ppt|pptx|pages)$", filename):
277
  doc["parser_id"] = ParserType.PRESENTATION.value
278
+
279
+ doc_result = DocumentService.insert(doc)
280
+
281
  except Exception as e:
282
  return server_error_response(e)
283
+
284
+ if "run" in form_data.keys():
285
+ if request.form.get("run").strip() == "1":
286
+ try:
287
+ info = {"run": 1, "progress": 0}
288
+ info["progress_msg"] = ""
289
+ info["chunk_num"] = 0
290
+ info["token_num"] = 0
291
+ DocumentService.update_by_id(doc["id"], info)
292
+ # if str(req["run"]) == TaskStatus.CANCEL.value:
293
+ tenant_id = DocumentService.get_tenant_id(doc["id"])
294
+ if not tenant_id:
295
+ return get_data_error_result(retmsg="Tenant not found!")
296
+
297
+ #e, doc = DocumentService.get_by_id(doc["id"])
298
+ TaskService.filter_delete([Task.doc_id == doc["id"]])
299
+ e, doc = DocumentService.get_by_id(doc["id"])
300
+ doc = doc.to_dict()
301
+ doc["tenant_id"] = tenant_id
302
+ bucket, name = File2DocumentService.get_minio_address(doc_id=doc["id"])
303
+ queue_tasks(doc, bucket, name)
304
+ except Exception as e:
305
+ return server_error_response(e)
306
+
307
+ return get_json_result(data=doc_result.to_json())