KevinHuSh commited on
Commit
abeee5e
·
1 Parent(s): 9b6b3f7

refine document upload (#602)

Browse files

### What problem does this PR solve?

#567

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

Files changed (2) hide show
  1. api/apps/document_app.py +52 -48
  2. docker/entrypoint.sh +2 -1
api/apps/document_app.py CHANGED
@@ -51,55 +51,59 @@ def upload():
51
  if 'file' not in request.files:
52
  return get_json_result(
53
  data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR)
54
- file = request.files['file']
55
- if file.filename == '':
56
- return get_json_result(
57
- data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
58
 
59
- try:
60
- e, kb = KnowledgebaseService.get_by_id(kb_id)
61
- if not e:
62
- return get_data_error_result(
63
- retmsg="Can't find this knowledgebase!")
64
- MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))
65
- if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(kb.tenant_id) >= MAX_FILE_NUM_PER_USER:
66
- return get_data_error_result(
67
- retmsg="Exceed the maximum file number of a free user!")
68
-
69
- filename = duplicate_name(
70
- DocumentService.query,
71
- name=file.filename,
72
- kb_id=kb.id)
73
- filetype = filename_type(filename)
74
- if filetype == FileType.OTHER.value:
75
- return get_data_error_result(
76
- retmsg="This type of file has not been supported yet!")
77
-
78
- location = filename
79
- while MINIO.obj_exist(kb_id, location):
80
- location += "_"
81
- blob = request.files['file'].read()
82
- MINIO.put(kb_id, location, blob)
83
- doc = {
84
- "id": get_uuid(),
85
- "kb_id": kb.id,
86
- "parser_id": kb.parser_id,
87
- "parser_config": kb.parser_config,
88
- "created_by": current_user.id,
89
- "type": filetype,
90
- "name": filename,
91
- "location": location,
92
- "size": len(blob),
93
- "thumbnail": thumbnail(filename, blob)
94
- }
95
- if doc["type"] == FileType.VISUAL:
96
- doc["parser_id"] = ParserType.PICTURE.value
97
- if re.search(r"\.(ppt|pptx|pages)$", filename):
98
- doc["parser_id"] = ParserType.PRESENTATION.value
99
- doc = DocumentService.insert(doc)
100
- return get_json_result(data=doc.to_json())
101
- except Exception as e:
102
- return server_error_response(e)
 
 
 
 
 
 
 
 
103
 
104
 
105
  @manager.route('/create', methods=['POST'])
 
51
  if 'file' not in request.files:
52
  return get_json_result(
53
  data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR)
 
 
 
 
54
 
55
+ file_objs = request.files.getlist('file')
56
+ for file_obj in file_objs:
57
+ if file_obj.filename == '':
58
+ return get_json_result(
59
+ data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
60
+
61
+ err = []
62
+ for file in file_objs:
63
+ try:
64
+ e, kb = KnowledgebaseService.get_by_id(kb_id)
65
+ if not e:
66
+ raise LookupError("Can't find this knowledgebase!")
67
+ MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))
68
+ if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(kb.tenant_id) >= MAX_FILE_NUM_PER_USER:
69
+ raise RuntimeError("Exceed the maximum file number of a free user!")
70
+
71
+ filename = duplicate_name(
72
+ DocumentService.query,
73
+ name=file.filename,
74
+ kb_id=kb.id)
75
+ filetype = filename_type(filename)
76
+ if filetype == FileType.OTHER.value:
77
+ raise RuntimeError("This type of file has not been supported yet!")
78
+
79
+ location = filename
80
+ while MINIO.obj_exist(kb_id, location):
81
+ location += "_"
82
+ blob = file.read()
83
+ MINIO.put(kb_id, location, blob)
84
+ doc = {
85
+ "id": get_uuid(),
86
+ "kb_id": kb.id,
87
+ "parser_id": kb.parser_id,
88
+ "parser_config": kb.parser_config,
89
+ "created_by": current_user.id,
90
+ "type": filetype,
91
+ "name": filename,
92
+ "location": location,
93
+ "size": len(blob),
94
+ "thumbnail": thumbnail(filename, blob)
95
+ }
96
+ if doc["type"] == FileType.VISUAL:
97
+ doc["parser_id"] = ParserType.PICTURE.value
98
+ if re.search(r"\.(ppt|pptx|pages)$", filename):
99
+ doc["parser_id"] = ParserType.PRESENTATION.value
100
+ DocumentService.insert(doc)
101
+ except Exception as e:
102
+ err.append(file.filename + ": " + str(e))
103
+ if err:
104
+ return get_json_result(
105
+ data=False, retmsg="\n".join(err), retcode=RetCode.SERVER_ERROR)
106
+ return get_json_result(data=True)
107
 
108
 
109
  @manager.route('/create', methods=['POST'])
docker/entrypoint.sh CHANGED
@@ -34,6 +34,7 @@ do
34
  task_exe $i $WS &
35
  done
36
 
 
37
  $PY api/ragflow_server.py
38
-
39
  wait;
 
34
  task_exe $i $WS &
35
  done
36
 
37
+ while [ 1 -eq 1 ];do
38
  $PY api/ragflow_server.py
39
+ done
40
  wait;