bstraehle commited on
Commit
acc8cb5
·
verified ·
1 Parent(s): d89899f

Update util.py

Browse files
Files changed (1) hide show
  1. util.py +6 -3
util.py CHANGED
@@ -39,8 +39,11 @@ def read_docx(file_path):
39
  for block in doc.element.body:
40
  if block.tag.endswith("p"):
41
  for paragraph in doc.paragraphs:
42
- if paragraph._element == block and paragraph.text:
43
- text.append(paragraph.text + "\n")
 
 
 
44
  elif block.tag.endswith("tbl"):
45
  for table in doc.tables:
46
  if table._element == block:
@@ -50,7 +53,7 @@ def read_docx(file_path):
50
  for cell in row.cells:
51
  row_text.append(cell.text.strip())
52
 
53
- text.append(" | ".join(row_text) + "\n")
54
 
55
  return "\n".join(text)
56
 
 
39
  for block in doc.element.body:
40
  if block.tag.endswith("p"):
41
  for paragraph in doc.paragraphs:
42
+ if paragraph._element == block:
43
+ if paragraph.style.name.startswith("Heading"):
44
+ text.append("\n" + paragraph.text + "\n")
45
+ elif paragraph.text:
46
+ text.append(paragraph.text)
47
  elif block.tag.endswith("tbl"):
48
  for table in doc.tables:
49
  if table._element == block:
 
53
  for cell in row.cells:
54
  row_text.append(cell.text.strip())
55
 
56
+ text.append(" | ".join(row_text))
57
 
58
  return "\n".join(text)
59