KevinHuSh
commited on
Commit
·
a505adc
1
Parent(s):
6e3eead
To avoid assertion while no rows in excel (#197)
Browse files### What problem does this PR solve?
_Briefly describe what this PR aims to solve. Include background context
that will help reviewers understand the purpose of the PR._
Issue link:#[[Link the issue
here](https://github.com/infiniflow/ragflow/issues/196)]
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
- [ ] New Feature (non-breaking change which adds functionality)
- [ ] Breaking Change (fix or feature that could cause existing
functionality not to work as expected)
- [ ] Documentation Update
- [ ] Refactoring
- [ ] Performance Improvement
- [ ] Test cases
- [ ] Python SDK impacted, Need to update PyPI
- [ ] Other (please describe):
- deepdoc/parser/excel_parser.py +2 -0
- rag/app/manual.py +1 -0
- rag/app/one.py +1 -0
- rag/app/table.py +1 -0
deepdoc/parser/excel_parser.py
CHANGED
@@ -14,6 +14,7 @@ class HuExcelParser:
|
|
14 |
for sheetname in wb.sheetnames:
|
15 |
ws = wb[sheetname]
|
16 |
rows = list(ws.rows)
|
|
|
17 |
tb += f"<table><caption>{sheetname}</caption><tr>"
|
18 |
for t in list(rows[0]):
|
19 |
tb += f"<th>{t.value}</th>"
|
@@ -38,6 +39,7 @@ class HuExcelParser:
|
|
38 |
for sheetname in wb.sheetnames:
|
39 |
ws = wb[sheetname]
|
40 |
rows = list(ws.rows)
|
|
|
41 |
ti = list(rows[0])
|
42 |
for r in list(rows[1:]):
|
43 |
l = []
|
|
|
14 |
for sheetname in wb.sheetnames:
|
15 |
ws = wb[sheetname]
|
16 |
rows = list(ws.rows)
|
17 |
+
if not rows:continue
|
18 |
tb += f"<table><caption>{sheetname}</caption><tr>"
|
19 |
for t in list(rows[0]):
|
20 |
tb += f"<th>{t.value}</th>"
|
|
|
39 |
for sheetname in wb.sheetnames:
|
40 |
ws = wb[sheetname]
|
41 |
rows = list(ws.rows)
|
42 |
+
if not rows:continue
|
43 |
ti = list(rows[0])
|
44 |
for r in list(rows[1:]):
|
45 |
l = []
|
rag/app/manual.py
CHANGED
@@ -109,6 +109,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
|
109 |
sections = [(txt, sec_ids[i], poss)
|
110 |
for i, (txt, _, poss) in enumerate(sections)]
|
111 |
for (img, rows), poss in tbls:
|
|
|
112 |
sections.append((rows if isinstance(rows, str) else rows[0], -1,
|
113 |
[(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss]))
|
114 |
|
|
|
109 |
sections = [(txt, sec_ids[i], poss)
|
110 |
for i, (txt, _, poss) in enumerate(sections)]
|
111 |
for (img, rows), poss in tbls:
|
112 |
+
if not rows:continue
|
113 |
sections.append((rows if isinstance(rows, str) else rows[0], -1,
|
114 |
[(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss]))
|
115 |
|
rag/app/one.py
CHANGED
@@ -44,6 +44,7 @@ class Pdf(PdfParser):
|
|
44 |
sections = [(b["text"], self.get_position(b, zoomin))
|
45 |
for i, b in enumerate(self.boxes)]
|
46 |
for (img, rows), poss in tbls:
|
|
|
47 |
sections.append((rows if isinstance(rows, str) else rows[0],
|
48 |
[(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss]))
|
49 |
return [(txt, "") for txt, _ in sorted(sections, key=lambda x: (
|
|
|
44 |
sections = [(b["text"], self.get_position(b, zoomin))
|
45 |
for i, b in enumerate(self.boxes)]
|
46 |
for (img, rows), poss in tbls:
|
47 |
+
if not rows:continue
|
48 |
sections.append((rows if isinstance(rows, str) else rows[0],
|
49 |
[(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss]))
|
50 |
return [(txt, "") for txt, _ in sorted(sections, key=lambda x: (
|
rag/app/table.py
CHANGED
@@ -40,6 +40,7 @@ class Excel(ExcelParser):
|
|
40 |
for sheetname in wb.sheetnames:
|
41 |
ws = wb[sheetname]
|
42 |
rows = list(ws.rows)
|
|
|
43 |
headers = [cell.value for cell in rows[0]]
|
44 |
missed = set([i for i, h in enumerate(headers) if h is None])
|
45 |
headers = [
|
|
|
40 |
for sheetname in wb.sheetnames:
|
41 |
ws = wb[sheetname]
|
42 |
rows = list(ws.rows)
|
43 |
+
if not rows:continue
|
44 |
headers = [cell.value for cell in rows[0]]
|
45 |
missed = set([i for i, h in enumerate(headers) if h is None])
|
46 |
headers = [
|