bstraehle commited on
Commit
a3ac33b
·
verified ·
1 Parent(s): e3cf8e7

Update util.py

Browse files
Files changed (1) hide show
  1. util.py +15 -15
util.py CHANGED
@@ -36,21 +36,21 @@ def read_docx(file_path):
36
 
37
  text = []
38
 
39
- for element in doc.element.body:
40
- if element.tag.endswith("p"):
41
- paragraph = doc.document.paragraphs[len(full_text)]
42
-
43
- if paragraph.text:
44
- text.append(paragraph.text)
45
- elif element.tag.endswith("tbl"):
46
- table = doc.tables[0]
47
-
48
- for row in table.rows:
49
- row_text = []
50
-
51
- for cell in row.cells:
52
- row_text.append(cell.text.strip())
53
- text.append(" | ".join(row_text))
54
 
55
  return "\n".join(text)
56
 
 
36
 
37
  text = []
38
 
39
+ for block in doc.element.body:
40
+ if block.tag.endswith("p"):
41
+ for paragraph in doc.paragraphs:
42
+ if paragraph._element == block and paragraph.text:
43
+ text.append(paragraph.text)
44
+ elif block.tag.endswith("tbl"):
45
+ for table in doc.tables:
46
+ if table._element == block:
47
+ for row in table.rows:
48
+ row_text = []
49
+
50
+ for cell in row.cells:
51
+ row_text.append(cell.text.strip())
52
+
53
+ text.append(" | ".join(row_text))
54
 
55
  return "\n".join(text)
56