import os import pandas as pd from docx import Document def get_questions(file_path, level): df = pd.read_json(file_path, lines=True) df = df[df["Level"] == level] result=[] for index, row in df.iterrows(): result.append([row["Level"], row["Question"], row["file_name"], row["Final answer"]]) return result def is_ext(file_path, ext): return os.path.splitext(file_path)[1].lower() == ext.lower() def read_file(file_path): ext = os.path.splitext(file_path)[1].lower() df = None if ext == ".csv": df = pd.read_csv(file_path) elif ext in (".xls", ".xlsx"): df = pd.read_excel(file_path) elif ext in (".json", ".jsonl"): df = pd.read_json(file_path) return "" if df is None else df.to_json() def read_docx(file_path): doc = Document(file_path) text = [] for para in doc.paragraphs: text.append(para.text) return "\n".join(text)