| import os | |
| import pandas as pd | |
| from docx import Document | |
| def get_questions(file_path, level): | |
| df = pd.read_json(file_path, lines=True) | |
| df = df[df["Level"] == level] | |
| result=[] | |
| for index, row in df.iterrows(): | |
| result.append([row["Level"], row["Question"], row["file_name"], row["Final answer"]]) | |
| return result | |
| def is_ext(file_path, ext): | |
| return os.path.splitext(file_path)[1].lower() == ext.lower() | |
| def read_file(file_path): | |
| ext = os.path.splitext(file_path)[1].lower() | |
| df = None | |
| if ext == ".csv": | |
| df = pd.read_csv(file_path) | |
| elif ext in (".xls", ".xlsx"): | |
| df = pd.read_excel(file_path) | |
| elif ext in (".json", ".jsonl"): | |
| df = pd.read_json(file_path) | |
| return "" if df is None else df.to_json() | |
| def read_docx(file_path): | |
| doc = Document(file_path) | |
| text = [] | |
| for para in doc.paragraphs: | |
| text.append(para.text) | |
| return "\n".join(text) |