Kevin Hu commited on
Commit
10534c3
·
1 Parent(s): 942993f

pypdf2 to pypdf (#1684)

Browse files

### What problem does this PR solve?

pypdf and PyPDF2 possible Infinite Loop when a comment isn't followed by
a character #59

### Type of change

- [x] Refactoring

deepdoc/parser/pdf_parser.py CHANGED
@@ -23,7 +23,7 @@ import logging
23
  from PIL import Image, ImageDraw
24
  import numpy as np
25
  from timeit import default_timer as timer
26
- from PyPDF2 import PdfReader as pdf2_read
27
 
28
  from api.utils.file_utils import get_project_base_directory
29
  from deepdoc.vision import OCR, Recognizer, LayoutRecognizer, TableStructureRecognizer
 
23
  from PIL import Image, ImageDraw
24
  import numpy as np
25
  from timeit import default_timer as timer
26
+ from pypdf import PdfReader as pdf2_read
27
 
28
  from api.utils.file_utils import get_project_base_directory
29
  from deepdoc.vision import OCR, Recognizer, LayoutRecognizer, TableStructureRecognizer
requirements.txt CHANGED
@@ -79,3 +79,4 @@ word2number==1.1
79
  xgboost==2.1.0
80
  xpinyin==0.7.6
81
  zhipuai==2.0.1
 
 
79
  xgboost==2.1.0
80
  xpinyin==0.7.6
81
  zhipuai==2.0.1
82
+ pypdf==4.3.0
requirements_arm.txt CHANGED
@@ -153,3 +153,4 @@ groq==0.9.0
153
  wikipedia==1.4.0
154
  Bio==1.7.1
155
  arxiv==2.1.3
 
 
153
  wikipedia==1.4.0
154
  Bio==1.7.1
155
  arxiv==2.1.3
156
+ pypdf==4.3.0
requirements_dev.txt CHANGED
@@ -138,3 +138,4 @@ groq==0.9.0
138
  wikipedia==1.4.0
139
  Bio==1.7.1
140
  arxiv==2.1.3
 
 
138
  wikipedia==1.4.0
139
  Bio==1.7.1
140
  arxiv==2.1.3
141
+ pypdf==4.3.0