fix: resolve invalid file extension
Browse files- docsifer/router.py +1 -1
- docsifer/service.py +7 -5
- requirements.txt +1 -1
docsifer/router.py
CHANGED
@@ -68,7 +68,7 @@ async def convert_document(
|
|
68 |
if file is not None:
|
69 |
with tempfile.TemporaryDirectory() as tmpdir:
|
70 |
contents = await file.read()
|
71 |
-
guessed_ext = mimetypes.guess_extension(file.content_type) or ""
|
72 |
new_name = f"{Path(file.filename).stem}{guessed_ext}"
|
73 |
temp_path = Path(tmpdir) / new_name
|
74 |
temp_path.write_bytes(contents)
|
|
|
68 |
if file is not None:
|
69 |
with tempfile.TemporaryDirectory() as tmpdir:
|
70 |
contents = await file.read()
|
71 |
+
guessed_ext = mimetypes.guess_extension(file.content_type) or ".tmp"
|
72 |
new_name = f"{Path(file.filename).stem}{guessed_ext}"
|
73 |
temp_path = Path(tmpdir) / new_name
|
74 |
temp_path.write_bytes(contents)
|
docsifer/service.py
CHANGED
@@ -4,7 +4,8 @@ from __future__ import annotations
|
|
4 |
|
5 |
import logging
|
6 |
import tempfile
|
7 |
-
import
|
|
|
8 |
from pathlib import Path
|
9 |
from typing import Optional, Dict, Tuple, Any
|
10 |
|
@@ -109,13 +110,14 @@ class DocsiferService:
|
|
109 |
|
110 |
# Use a temp directory so MarkItDown sees the real file extension
|
111 |
with tempfile.TemporaryDirectory() as tmpdir:
|
112 |
-
|
113 |
-
if
|
114 |
logger.warning(f"Could not detect file type for: {src}")
|
115 |
new_filename = src.name
|
116 |
else:
|
117 |
-
logger.debug(f"Detected
|
118 |
-
|
|
|
119 |
tmp_path = Path(tmpdir) / new_filename
|
120 |
tmp_path.write_bytes(src.read_bytes())
|
121 |
|
|
|
4 |
|
5 |
import logging
|
6 |
import tempfile
|
7 |
+
import magic
|
8 |
+
import mimetypes
|
9 |
from pathlib import Path
|
10 |
from typing import Optional, Dict, Tuple, Any
|
11 |
|
|
|
110 |
|
111 |
# Use a temp directory so MarkItDown sees the real file extension
|
112 |
with tempfile.TemporaryDirectory() as tmpdir:
|
113 |
+
mime_type = magic.from_file(str(src), mime=True)
|
114 |
+
if not mime_type:
|
115 |
logger.warning(f"Could not detect file type for: {src}")
|
116 |
new_filename = src.name
|
117 |
else:
|
118 |
+
logger.debug(f"Detected MIME type '{mime_type}' for: {src}")
|
119 |
+
guessed_ext = mimetypes.guess_extension(mime_type) or ""
|
120 |
+
new_filename = f"{src.stem}{guessed_ext}"
|
121 |
tmp_path = Path(tmpdir) / new_filename
|
122 |
tmp_path.write_bytes(src.read_bytes())
|
123 |
|
requirements.txt
CHANGED
@@ -11,4 +11,4 @@ pyquery==2.0.1
|
|
11 |
tiktoken==0.8.0
|
12 |
scuid
|
13 |
aiohttp==3.11.11
|
14 |
-
|
|
|
11 |
tiktoken==0.8.0
|
12 |
scuid
|
13 |
aiohttp==3.11.11
|
14 |
+
python-magic==0.4.27
|