Kevin Hu
commited on
Commit
·
3ec35b2
1
Parent(s):
e195b4d
force eml file to be parsed by EMAIL (#2615)
Browse files### What problem does this PR solve?
#2613
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
- api/apps/dataset_api.py +2 -0
- api/apps/document_app.py +2 -0
api/apps/dataset_api.py
CHANGED
@@ -381,6 +381,8 @@ def upload_documents(dataset_id):
|
|
381 |
doc["parser_id"] = ParserType.AUDIO.value
|
382 |
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
383 |
doc["parser_id"] = ParserType.PRESENTATION.value
|
|
|
|
|
384 |
DocumentService.insert(doc)
|
385 |
|
386 |
FileService.add_file_from_kb(doc, kb_folder["id"], dataset.tenant_id)
|
|
|
381 |
doc["parser_id"] = ParserType.AUDIO.value
|
382 |
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
383 |
doc["parser_id"] = ParserType.PRESENTATION.value
|
384 |
+
if re.search(r"\.(eml)$", filename):
|
385 |
+
doc["parser_id"] = ParserType.EMAIL.value
|
386 |
DocumentService.insert(doc)
|
387 |
|
388 |
FileService.add_file_from_kb(doc, kb_folder["id"], dataset.tenant_id)
|
api/apps/document_app.py
CHANGED
@@ -139,6 +139,8 @@ def web_crawl():
|
|
139 |
doc["parser_id"] = ParserType.AUDIO.value
|
140 |
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
141 |
doc["parser_id"] = ParserType.PRESENTATION.value
|
|
|
|
|
142 |
DocumentService.insert(doc)
|
143 |
FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id)
|
144 |
except Exception as e:
|
|
|
139 |
doc["parser_id"] = ParserType.AUDIO.value
|
140 |
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
141 |
doc["parser_id"] = ParserType.PRESENTATION.value
|
142 |
+
if re.search(r"\.(eml)$", filename):
|
143 |
+
doc["parser_id"] = ParserType.EMAIL.value
|
144 |
DocumentService.insert(doc)
|
145 |
FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id)
|
146 |
except Exception as e:
|