bstraehle commited on
Commit
193c7ed
·
verified ·
1 Parent(s): 3d3e0e3

Update util.py

Browse files
Files changed (1) hide show
  1. util.py +18 -1
util.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  import pandas as pd
3
  from docx import Document
 
4
 
5
  def get_questions(file_path, level):
6
  df = pd.read_json(file_path, lines=True)
@@ -38,4 +39,20 @@ def read_docx(file_path):
38
  for para in doc.paragraphs:
39
  text.append(para.text)
40
 
41
- return "\n".join(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import pandas as pd
3
  from docx import Document
4
+ from pptx import Presentation
5
 
6
  def get_questions(file_path, level):
7
  df = pd.read_json(file_path, lines=True)
 
39
  for para in doc.paragraphs:
40
  text.append(para.text)
41
 
42
+ return "\n".join(text)
43
+
44
+ def read_pptx(file_path):
45
+ prs = Presentation(file_path)
46
+
47
+ text = []
48
+
49
+ for slide in prs.slides:
50
+ slide_text = []
51
+
52
+ for shape in slide.shapes:
53
+ if hasattr(shape, "text"):
54
+ slide_text.append(shape.text)
55
+
56
+ text.append("\n".join(slide_text))
57
+
58
+ return "\n\n".join(text)