Spaces:
Runtime error
Runtime error
Commit
·
cbbc0b7
1
Parent(s):
7aa959e
Update app.py
Browse files
app.py
CHANGED
@@ -14,7 +14,6 @@ from pdf2image.exceptions import (
|
|
14 |
import fitz # PyMuPDF
|
15 |
from PIL import Image
|
16 |
import io
|
17 |
-
import base64
|
18 |
|
19 |
def clean_text(text):
|
20 |
nlp = spacy.load("en_core_web_sm", disable=["tagger", "parser", "ner", "textcat"])
|
@@ -39,10 +38,6 @@ def pdf_to_text(file):
|
|
39 |
image_list = page.get_images(full=True)
|
40 |
for img in image_list:
|
41 |
xref, name, ext, color_space, width, height, bpc, image_data, image_mask, smask_data = img
|
42 |
-
print(type(image_data)) # Check the type of image_data
|
43 |
-
if isinstance(image_data, str):
|
44 |
-
# If image_data is a string, try to decode it as base64
|
45 |
-
image_data = base64.b64decode(image_data)
|
46 |
image = Image.open(io.BytesIO(image_data))
|
47 |
latex_code = image_to_latex(image)
|
48 |
page_text += "\n" + latex_code # Add LaTeX code to page text
|
|
|
14 |
import fitz # PyMuPDF
|
15 |
from PIL import Image
|
16 |
import io
|
|
|
17 |
|
18 |
def clean_text(text):
|
19 |
nlp = spacy.load("en_core_web_sm", disable=["tagger", "parser", "ner", "textcat"])
|
|
|
38 |
image_list = page.get_images(full=True)
|
39 |
for img in image_list:
|
40 |
xref, name, ext, color_space, width, height, bpc, image_data, image_mask, smask_data = img
|
|
|
|
|
|
|
|
|
41 |
image = Image.open(io.BytesIO(image_data))
|
42 |
latex_code = image_to_latex(image)
|
43 |
page_text += "\n" + latex_code # Add LaTeX code to page text
|