Kevin Hu commited on
Commit
74b229a
·
1 Parent(s): 35dcaff

Catch the exception while parsing pptx. (#4202)

Browse files

### What problem does this PR solve?
#4189

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

Files changed (1) hide show
  1. deepdoc/parser/ppt_parser.py +7 -4
deepdoc/parser/ppt_parser.py CHANGED
@@ -10,7 +10,7 @@
10
  # See the License for the specific language governing permissions and
11
  # limitations under the License.
12
  #
13
-
14
  from io import BytesIO
15
  from pptx import Presentation
16
 
@@ -53,9 +53,12 @@ class RAGFlowPptParser(object):
53
  texts = []
54
  for shape in sorted(
55
  slide.shapes, key=lambda x: ((x.top if x.top is not None else 0) // 10, x.left)):
56
- txt = self.__extract(shape)
57
- if txt:
58
- texts.append(txt)
 
 
 
59
  txts.append("\n".join(texts))
60
 
61
  return txts
 
10
  # See the License for the specific language governing permissions and
11
  # limitations under the License.
12
  #
13
+ import logging
14
  from io import BytesIO
15
  from pptx import Presentation
16
 
 
53
  texts = []
54
  for shape in sorted(
55
  slide.shapes, key=lambda x: ((x.top if x.top is not None else 0) // 10, x.left)):
56
+ try:
57
+ txt = self.__extract(shape)
58
+ if txt:
59
+ texts.append(txt)
60
+ except Exception as e:
61
+ logging.exception(e)
62
  txts.append("\n".join(texts))
63
 
64
  return txts