bstraehle commited on
Commit
e3cf8e7
·
verified ·
1 Parent(s): 5667912

Update util.py

Browse files
Files changed (1) hide show
  1. util.py +10 -6
util.py CHANGED
@@ -36,16 +36,20 @@ def read_docx(file_path):
36
 
37
  text = []
38
 
39
- for para in doc.paragraphs:
40
- text.append(para.text)
41
-
42
- for table in doc.tables:
 
 
 
 
 
43
  for row in table.rows:
44
  row_text = []
45
 
46
  for cell in row.cells:
47
- row_text.append(cell.text)
48
-
49
  text.append(" | ".join(row_text))
50
 
51
  return "\n".join(text)
 
36
 
37
  text = []
38
 
39
+ for element in doc.element.body:
40
+ if element.tag.endswith("p"):
41
+ paragraph = doc.document.paragraphs[len(full_text)]
42
+
43
+ if paragraph.text:
44
+ text.append(paragraph.text)
45
+ elif element.tag.endswith("tbl"):
46
+ table = doc.tables[0]
47
+
48
  for row in table.rows:
49
  row_text = []
50
 
51
  for cell in row.cells:
52
+ row_text.append(cell.text.strip())
 
53
  text.append(" | ".join(row_text))
54
 
55
  return "\n".join(text)