chore: update something
Browse files- docsifer/service.py +12 -12
docsifer/service.py
CHANGED
@@ -149,7 +149,7 @@ class DocsiferService:
|
|
149 |
new_filename = f"{src.stem}{guessed_ext}"
|
150 |
tmp_path = src.parent / new_filename
|
151 |
tmp_path.write_bytes(src.read_bytes())
|
152 |
-
src.unlink()
|
153 |
|
154 |
logger.info(
|
155 |
"Using temp file: %s, MIME type: %s, Guessed ext: %s, Existing: %s",
|
@@ -160,8 +160,8 @@ class DocsiferService:
|
|
160 |
)
|
161 |
|
162 |
# Perform HTML cleanup if requested.
|
163 |
-
if cleanup and guessed_ext.lower() in (".html", ".htm"):
|
164 |
-
|
165 |
|
166 |
filename = new_filename
|
167 |
source = tmp_path
|
@@ -173,13 +173,13 @@ class DocsiferService:
|
|
173 |
md_converter = self._basic_markitdown
|
174 |
|
175 |
# Load cookies if provided in the HTTP config.
|
176 |
-
if http_config:
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
|
184 |
try:
|
185 |
result_obj = md_converter.convert(source)
|
@@ -188,8 +188,8 @@ class DocsiferService:
|
|
188 |
logger.error("MarkItDown conversion failed: %s", e)
|
189 |
raise RuntimeError(f"Conversion failed for '{source}': {e}")
|
190 |
|
191 |
-
if isinstance(source, Path) and source.exists():
|
192 |
-
|
193 |
|
194 |
# Count tokens in the resulting markdown text.
|
195 |
token_count = self._count_tokens(result_obj.text_content)
|
|
|
149 |
new_filename = f"{src.stem}{guessed_ext}"
|
150 |
tmp_path = src.parent / new_filename
|
151 |
tmp_path.write_bytes(src.read_bytes())
|
152 |
+
# src.unlink()
|
153 |
|
154 |
logger.info(
|
155 |
"Using temp file: %s, MIME type: %s, Guessed ext: %s, Existing: %s",
|
|
|
160 |
)
|
161 |
|
162 |
# Perform HTML cleanup if requested.
|
163 |
+
# if cleanup and guessed_ext.lower() in (".html", ".htm"):
|
164 |
+
# self._maybe_cleanup_html(tmp_path)
|
165 |
|
166 |
filename = new_filename
|
167 |
source = tmp_path
|
|
|
173 |
md_converter = self._basic_markitdown
|
174 |
|
175 |
# Load cookies if provided in the HTTP config.
|
176 |
+
# if http_config:
|
177 |
+
# if "cookies" in http_config:
|
178 |
+
# requests.cookies.cookiejar_from_dict(
|
179 |
+
# http_config["cookies"],
|
180 |
+
# requests.cookies.RequestsCookieJar,
|
181 |
+
# overwrite=True,
|
182 |
+
# )
|
183 |
|
184 |
try:
|
185 |
result_obj = md_converter.convert(source)
|
|
|
188 |
logger.error("MarkItDown conversion failed: %s", e)
|
189 |
raise RuntimeError(f"Conversion failed for '{source}': {e}")
|
190 |
|
191 |
+
# if isinstance(source, Path) and source.exists():
|
192 |
+
# source.unlink()
|
193 |
|
194 |
# Count tokens in the resulting markdown text.
|
195 |
token_count = self._count_tokens(result_obj.text_content)
|