|
import os |
|
import pandas as pd |
|
from docx import Document |
|
|
|
def get_questions(file_path, level): |
|
df = pd.read_json(file_path, lines=True) |
|
df = df[df["Level"] == level] |
|
|
|
result=[] |
|
|
|
for index, row in df.iterrows(): |
|
result.append([row["Level"], row["Question"], row["file_name"], row["Final answer"]]) |
|
|
|
return result |
|
|
|
def is_ext(file_path, ext): |
|
return os.path.splitext(file_path)[1].lower() == ext.lower() |
|
|
|
def read_file(file_path): |
|
ext = os.path.splitext(file_path)[1].lower() |
|
|
|
df = None |
|
|
|
if ext == ".csv": |
|
df = pd.read_csv(file_path) |
|
elif ext in (".xls", ".xlsx"): |
|
df = pd.read_excel(file_path) |
|
elif ext in (".json", ".jsonl"): |
|
df = pd.read_json(file_path) |
|
|
|
return "" if df is None else df.to_json() |
|
|
|
def read_docx(file_path): |
|
doc = Document(file_path) |
|
|
|
text = [] |
|
|
|
for para in doc.paragraphs: |
|
text.append(para.text) |
|
|
|
return "\n".join(text) |