lamhieu commited on
Commit
25ddcaa
Β·
1 Parent(s): fe82d83

chore: update something

Browse files
Files changed (1) hide show
  1. docsifer/service.py +12 -12
docsifer/service.py CHANGED
@@ -149,7 +149,7 @@ class DocsiferService:
149
  new_filename = f"{src.stem}{guessed_ext}"
150
  tmp_path = src.parent / new_filename
151
  tmp_path.write_bytes(src.read_bytes())
152
- src.unlink()
153
 
154
  logger.info(
155
  "Using temp file: %s, MIME type: %s, Guessed ext: %s, Existing: %s",
@@ -160,8 +160,8 @@ class DocsiferService:
160
  )
161
 
162
  # Perform HTML cleanup if requested.
163
- if cleanup and guessed_ext.lower() in (".html", ".htm"):
164
- self._maybe_cleanup_html(tmp_path)
165
 
166
  filename = new_filename
167
  source = tmp_path
@@ -173,13 +173,13 @@ class DocsiferService:
173
  md_converter = self._basic_markitdown
174
 
175
  # Load cookies if provided in the HTTP config.
176
- if http_config:
177
- if "cookies" in http_config:
178
- requests.cookies.cookiejar_from_dict(
179
- http_config["cookies"],
180
- requests.cookies.RequestsCookieJar,
181
- overwrite=True,
182
- )
183
 
184
  try:
185
  result_obj = md_converter.convert(source)
@@ -188,8 +188,8 @@ class DocsiferService:
188
  logger.error("MarkItDown conversion failed: %s", e)
189
  raise RuntimeError(f"Conversion failed for '{source}': {e}")
190
 
191
- if isinstance(source, Path) and source.exists():
192
- source.unlink()
193
 
194
  # Count tokens in the resulting markdown text.
195
  token_count = self._count_tokens(result_obj.text_content)
 
149
  new_filename = f"{src.stem}{guessed_ext}"
150
  tmp_path = src.parent / new_filename
151
  tmp_path.write_bytes(src.read_bytes())
152
+ # src.unlink()
153
 
154
  logger.info(
155
  "Using temp file: %s, MIME type: %s, Guessed ext: %s, Existing: %s",
 
160
  )
161
 
162
  # Perform HTML cleanup if requested.
163
+ # if cleanup and guessed_ext.lower() in (".html", ".htm"):
164
+ # self._maybe_cleanup_html(tmp_path)
165
 
166
  filename = new_filename
167
  source = tmp_path
 
173
  md_converter = self._basic_markitdown
174
 
175
  # Load cookies if provided in the HTTP config.
176
+ # if http_config:
177
+ # if "cookies" in http_config:
178
+ # requests.cookies.cookiejar_from_dict(
179
+ # http_config["cookies"],
180
+ # requests.cookies.RequestsCookieJar,
181
+ # overwrite=True,
182
+ # )
183
 
184
  try:
185
  result_obj = md_converter.convert(source)
 
188
  logger.error("MarkItDown conversion failed: %s", e)
189
  raise RuntimeError(f"Conversion failed for '{source}': {e}")
190
 
191
+ # if isinstance(source, Path) and source.exists():
192
+ # source.unlink()
193
 
194
  # Count tokens in the resulting markdown text.
195
  token_count = self._count_tokens(result_obj.text_content)