Refactor image extraction logic in server.py
Browse files
server.py
CHANGED
|
@@ -12,15 +12,14 @@ def parse_pdf_and_return_markdown(pdf_file: bytes , extract_images: bool):
|
|
| 12 |
full_text, images, out_meta = convert_single_pdf(pdf_file, model_list)
|
| 13 |
image_data = {}
|
| 14 |
if extract_images:
|
| 15 |
-
for
|
| 16 |
-
# image_filepath = f"image_{i+1}.png"
|
| 17 |
image.save(filename, "PNG")
|
| 18 |
|
| 19 |
with open(filename, "rb") as f:
|
| 20 |
image_bytes = f.read()
|
| 21 |
|
| 22 |
image_base64 = base64.b64encode(image_bytes).decode('utf-8')
|
| 23 |
-
image_data[
|
| 24 |
|
| 25 |
os.remove(filename)
|
| 26 |
|
|
|
|
| 12 |
full_text, images, out_meta = convert_single_pdf(pdf_file, model_list)
|
| 13 |
image_data = {}
|
| 14 |
if extract_images:
|
| 15 |
+
for filename, image in images.items():
|
|
|
|
| 16 |
image.save(filename, "PNG")
|
| 17 |
|
| 18 |
with open(filename, "rb") as f:
|
| 19 |
image_bytes = f.read()
|
| 20 |
|
| 21 |
image_base64 = base64.b64encode(image_bytes).decode('utf-8')
|
| 22 |
+
image_data[filename] = image_base64
|
| 23 |
|
| 24 |
os.remove(filename)
|
| 25 |
|