BhagatSurya commited on
Commit
cbbc0b7
·
1 Parent(s): 7aa959e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -5
app.py CHANGED
@@ -14,7 +14,6 @@ from pdf2image.exceptions import (
14
  import fitz # PyMuPDF
15
  from PIL import Image
16
  import io
17
- import base64
18
 
19
  def clean_text(text):
20
  nlp = spacy.load("en_core_web_sm", disable=["tagger", "parser", "ner", "textcat"])
@@ -39,10 +38,6 @@ def pdf_to_text(file):
39
  image_list = page.get_images(full=True)
40
  for img in image_list:
41
  xref, name, ext, color_space, width, height, bpc, image_data, image_mask, smask_data = img
42
- print(type(image_data)) # Check the type of image_data
43
- if isinstance(image_data, str):
44
- # If image_data is a string, try to decode it as base64
45
- image_data = base64.b64decode(image_data)
46
  image = Image.open(io.BytesIO(image_data))
47
  latex_code = image_to_latex(image)
48
  page_text += "\n" + latex_code # Add LaTeX code to page text
 
14
  import fitz # PyMuPDF
15
  from PIL import Image
16
  import io
 
17
 
18
  def clean_text(text):
19
  nlp = spacy.load("en_core_web_sm", disable=["tagger", "parser", "ner", "textcat"])
 
38
  image_list = page.get_images(full=True)
39
  for img in image_list:
40
  xref, name, ext, color_space, width, height, bpc, image_data, image_mask, smask_data = img
 
 
 
 
41
  image = Image.open(io.BytesIO(image_data))
42
  latex_code = image_to_latex(image)
43
  page_text += "\n" + latex_code # Add LaTeX code to page text