Kevin Hu
commited on
Commit
·
10534c3
1
Parent(s):
942993f
pypdf2 to pypdf (#1684)
Browse files### What problem does this PR solve?
pypdf and PyPDF2 possible Infinite Loop when a comment isn't followed by
a character #59
### Type of change
- [x] Refactoring
- deepdoc/parser/pdf_parser.py +1 -1
- requirements.txt +1 -0
- requirements_arm.txt +1 -0
- requirements_dev.txt +1 -0
deepdoc/parser/pdf_parser.py
CHANGED
@@ -23,7 +23,7 @@ import logging
|
|
23 |
from PIL import Image, ImageDraw
|
24 |
import numpy as np
|
25 |
from timeit import default_timer as timer
|
26 |
-
from
|
27 |
|
28 |
from api.utils.file_utils import get_project_base_directory
|
29 |
from deepdoc.vision import OCR, Recognizer, LayoutRecognizer, TableStructureRecognizer
|
|
|
23 |
from PIL import Image, ImageDraw
|
24 |
import numpy as np
|
25 |
from timeit import default_timer as timer
|
26 |
+
from pypdf import PdfReader as pdf2_read
|
27 |
|
28 |
from api.utils.file_utils import get_project_base_directory
|
29 |
from deepdoc.vision import OCR, Recognizer, LayoutRecognizer, TableStructureRecognizer
|
requirements.txt
CHANGED
@@ -79,3 +79,4 @@ word2number==1.1
|
|
79 |
xgboost==2.1.0
|
80 |
xpinyin==0.7.6
|
81 |
zhipuai==2.0.1
|
|
|
|
79 |
xgboost==2.1.0
|
80 |
xpinyin==0.7.6
|
81 |
zhipuai==2.0.1
|
82 |
+
pypdf==4.3.0
|
requirements_arm.txt
CHANGED
@@ -153,3 +153,4 @@ groq==0.9.0
|
|
153 |
wikipedia==1.4.0
|
154 |
Bio==1.7.1
|
155 |
arxiv==2.1.3
|
|
|
|
153 |
wikipedia==1.4.0
|
154 |
Bio==1.7.1
|
155 |
arxiv==2.1.3
|
156 |
+
pypdf==4.3.0
|
requirements_dev.txt
CHANGED
@@ -138,3 +138,4 @@ groq==0.9.0
|
|
138 |
wikipedia==1.4.0
|
139 |
Bio==1.7.1
|
140 |
arxiv==2.1.3
|
|
|
|
138 |
wikipedia==1.4.0
|
139 |
Bio==1.7.1
|
140 |
arxiv==2.1.3
|
141 |
+
pypdf==4.3.0
|