Spaces:

bstraehle
/

grady

Running

bstraehle commited on May 6

Commit

4caba26

verified ·

1 Parent(s): 67a608a

Update util.py

Files changed (1) hide show

util.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import os
 import pandas as pd
 def get_questions(file_path, level):
     df = pd.read_json(file_path, lines=True)
@@ -13,10 +15,10 @@ def get_questions(file_path, level):
     return result
 def read_file(file_path):
-    df = None
     ext = os.path.splitext(file_path)[1].lower()
     if ext == ".csv":
         df = pd.read_csv(file_path)
     elif ext in (".xls", ".xlsx"):
@@ -24,4 +26,30 @@ def read_file(file_path):
     elif ext in (".json", ".jsonl"):
         df = pd.read_json(file_path)
-    return "" if df is None else df.to_json()

 import os
 import pandas as pd
+from docx import Document
+from pptx import Presentation
 def get_questions(file_path, level):
     df = pd.read_json(file_path, lines=True)
     return result
 def read_file(file_path):
     ext = os.path.splitext(file_path)[1].lower()
+    df = None
     if ext == ".csv":
         df = pd.read_csv(file_path)
     elif ext in (".xls", ".xlsx"):
     elif ext in (".json", ".jsonl"):
         df = pd.read_json(file_path)
+    return "" if df is None else df.to_json()
+def read_docx(file_path):
+    doc = Document(file_path)
+    text = []
+    for para in doc.paragraphs:
+        text.append(para.text)
+    return "\n".join(text)
+def read_pptx(file_path):
+    prs = Presentation(file_path)
+    text = []
+    for slide in prs.slides:
+        slide_text = []
+        for shape in slide.shapes:
+            if hasattr(shape, "text"):
+                slide_text.append(shape.text)
+        text.append("\n".join(slide_text))
+    return "\n\n".join(text)