KevinHuSh commited on
Commit
a505adc
·
1 Parent(s): 6e3eead

To avoid assertion while no rows in excel (#197)

Browse files

### What problem does this PR solve?

_Briefly describe what this PR aims to solve. Include background context
that will help reviewers understand the purpose of the PR._

Issue link:#[[Link the issue
here](https://github.com/infiniflow/ragflow/issues/196)]

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
- [ ] New Feature (non-breaking change which adds functionality)
- [ ] Breaking Change (fix or feature that could cause existing
functionality not to work as expected)
- [ ] Documentation Update
- [ ] Refactoring
- [ ] Performance Improvement
- [ ] Test cases
- [ ] Python SDK impacted, Need to update PyPI
- [ ] Other (please describe):

deepdoc/parser/excel_parser.py CHANGED
@@ -14,6 +14,7 @@ class HuExcelParser:
14
  for sheetname in wb.sheetnames:
15
  ws = wb[sheetname]
16
  rows = list(ws.rows)
 
17
  tb += f"<table><caption>{sheetname}</caption><tr>"
18
  for t in list(rows[0]):
19
  tb += f"<th>{t.value}</th>"
@@ -38,6 +39,7 @@ class HuExcelParser:
38
  for sheetname in wb.sheetnames:
39
  ws = wb[sheetname]
40
  rows = list(ws.rows)
 
41
  ti = list(rows[0])
42
  for r in list(rows[1:]):
43
  l = []
 
14
  for sheetname in wb.sheetnames:
15
  ws = wb[sheetname]
16
  rows = list(ws.rows)
17
+ if not rows:continue
18
  tb += f"<table><caption>{sheetname}</caption><tr>"
19
  for t in list(rows[0]):
20
  tb += f"<th>{t.value}</th>"
 
39
  for sheetname in wb.sheetnames:
40
  ws = wb[sheetname]
41
  rows = list(ws.rows)
42
+ if not rows:continue
43
  ti = list(rows[0])
44
  for r in list(rows[1:]):
45
  l = []
rag/app/manual.py CHANGED
@@ -109,6 +109,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
109
  sections = [(txt, sec_ids[i], poss)
110
  for i, (txt, _, poss) in enumerate(sections)]
111
  for (img, rows), poss in tbls:
 
112
  sections.append((rows if isinstance(rows, str) else rows[0], -1,
113
  [(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss]))
114
 
 
109
  sections = [(txt, sec_ids[i], poss)
110
  for i, (txt, _, poss) in enumerate(sections)]
111
  for (img, rows), poss in tbls:
112
+ if not rows:continue
113
  sections.append((rows if isinstance(rows, str) else rows[0], -1,
114
  [(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss]))
115
 
rag/app/one.py CHANGED
@@ -44,6 +44,7 @@ class Pdf(PdfParser):
44
  sections = [(b["text"], self.get_position(b, zoomin))
45
  for i, b in enumerate(self.boxes)]
46
  for (img, rows), poss in tbls:
 
47
  sections.append((rows if isinstance(rows, str) else rows[0],
48
  [(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss]))
49
  return [(txt, "") for txt, _ in sorted(sections, key=lambda x: (
 
44
  sections = [(b["text"], self.get_position(b, zoomin))
45
  for i, b in enumerate(self.boxes)]
46
  for (img, rows), poss in tbls:
47
+ if not rows:continue
48
  sections.append((rows if isinstance(rows, str) else rows[0],
49
  [(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss]))
50
  return [(txt, "") for txt, _ in sorted(sections, key=lambda x: (
rag/app/table.py CHANGED
@@ -40,6 +40,7 @@ class Excel(ExcelParser):
40
  for sheetname in wb.sheetnames:
41
  ws = wb[sheetname]
42
  rows = list(ws.rows)
 
43
  headers = [cell.value for cell in rows[0]]
44
  missed = set([i for i, h in enumerate(headers) if h is None])
45
  headers = [
 
40
  for sheetname in wb.sheetnames:
41
  ws = wb[sheetname]
42
  rows = list(ws.rows)
43
+ if not rows:continue
44
  headers = [cell.value for cell in rows[0]]
45
  missed = set([i for i, h in enumerate(headers) if h is None])
46
  headers = [