lamhieu commited on
Commit
3fac5a6
Β·
1 Parent(s): efe7d53

fix: revert original file extension

Browse files
Files changed (2) hide show
  1. docsifer/service.py +7 -1
  2. requirements.txt +5 -7
docsifer/service.py CHANGED
@@ -4,6 +4,7 @@ from __future__ import annotations
4
 
5
  import logging
6
  import tempfile
 
7
  from pathlib import Path
8
  from typing import Optional, Dict, Tuple, Any
9
 
@@ -107,7 +108,12 @@ class DocsiferService:
107
 
108
  # Use a temp directory so MarkItDown sees the real file extension
109
  with tempfile.TemporaryDirectory() as tmpdir:
110
- tmp_path = Path(tmpdir) / src.name
 
 
 
 
 
111
  tmp_path.write_bytes(src.read_bytes())
112
 
113
  # If it's HTML and cleanup is requested
 
4
 
5
  import logging
6
  import tempfile
7
+ import filetype
8
  from pathlib import Path
9
  from typing import Optional, Dict, Tuple, Any
10
 
 
108
 
109
  # Use a temp directory so MarkItDown sees the real file extension
110
  with tempfile.TemporaryDirectory() as tmpdir:
111
+ kind = filetype.guess(str(src))
112
+ if kind is None:
113
+ new_filename = src.name
114
+ else:
115
+ new_filename = f"{src.stem}.{kind.extension}"
116
+ tmp_path = Path(tmpdir) / new_filename
117
  tmp_path.write_bytes(src.read_bytes())
118
 
119
  # If it's HTML and cleanup is requested
requirements.txt CHANGED
@@ -6,11 +6,9 @@ pydantic
6
  cachetools
7
  upstash_redis==1.2.0
8
  markitdown
9
- openai
10
- pyquery
11
- tiktoken
12
  scuid
13
- python-magic
14
- plotly
15
- matplotlib
16
- aiohttp
 
6
  cachetools
7
  upstash_redis==1.2.0
8
  markitdown
9
+ openai==1.59.7
10
+ pyquery==2.0.1
11
+ tiktoken==0.8.0
12
  scuid
13
+ aiohttp==3.11.11
14
+ filetype==1.2.0