fix: revert original file extension
Browse files- docsifer/service.py +7 -1
- requirements.txt +5 -7
docsifer/service.py
CHANGED
@@ -4,6 +4,7 @@ from __future__ import annotations
|
|
4 |
|
5 |
import logging
|
6 |
import tempfile
|
|
|
7 |
from pathlib import Path
|
8 |
from typing import Optional, Dict, Tuple, Any
|
9 |
|
@@ -107,7 +108,12 @@ class DocsiferService:
|
|
107 |
|
108 |
# Use a temp directory so MarkItDown sees the real file extension
|
109 |
with tempfile.TemporaryDirectory() as tmpdir:
|
110 |
-
|
|
|
|
|
|
|
|
|
|
|
111 |
tmp_path.write_bytes(src.read_bytes())
|
112 |
|
113 |
# If it's HTML and cleanup is requested
|
|
|
4 |
|
5 |
import logging
|
6 |
import tempfile
|
7 |
+
import filetype
|
8 |
from pathlib import Path
|
9 |
from typing import Optional, Dict, Tuple, Any
|
10 |
|
|
|
108 |
|
109 |
# Use a temp directory so MarkItDown sees the real file extension
|
110 |
with tempfile.TemporaryDirectory() as tmpdir:
|
111 |
+
kind = filetype.guess(str(src))
|
112 |
+
if kind is None:
|
113 |
+
new_filename = src.name
|
114 |
+
else:
|
115 |
+
new_filename = f"{src.stem}.{kind.extension}"
|
116 |
+
tmp_path = Path(tmpdir) / new_filename
|
117 |
tmp_path.write_bytes(src.read_bytes())
|
118 |
|
119 |
# If it's HTML and cleanup is requested
|
requirements.txt
CHANGED
@@ -6,11 +6,9 @@ pydantic
|
|
6 |
cachetools
|
7 |
upstash_redis==1.2.0
|
8 |
markitdown
|
9 |
-
openai
|
10 |
-
pyquery
|
11 |
-
tiktoken
|
12 |
scuid
|
13 |
-
|
14 |
-
|
15 |
-
matplotlib
|
16 |
-
aiohttp
|
|
|
6 |
cachetools
|
7 |
upstash_redis==1.2.0
|
8 |
markitdown
|
9 |
+
openai==1.59.7
|
10 |
+
pyquery==2.0.1
|
11 |
+
tiktoken==0.8.0
|
12 |
scuid
|
13 |
+
aiohttp==3.11.11
|
14 |
+
filetype==1.2.0
|
|
|
|