File size: 1,335 Bytes
dd80d25 da9fcee 4caba26 193c7ed decddfb 0f3972e b2010da 0f3972e b2010da f3dd2c8 4cd6fae 9d7ed12 0fccc5c 4cd6fae c945f59 7f80b30 03fc65f 4caba26 32b4458 1fede0d 32b4458 1fede0d 32b4458 1fede0d 03fc65f 4caba26 193c7ed |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import os
import pandas as pd
from docx import Document
from pptx import Presentation
def get_questions(file_path, level):
df = pd.read_json(file_path, lines=True)
df = df[df["Level"] == level]
result=[]
for index, row in df.iterrows():
result.append([row["Level"], row["Question"], row["file_name"], row["Final answer"]])
return result
def is_ext(file_path, ext):
return os.path.splitext(file_path)[1].lower() == ext.lower()
def read_file(file_path):
ext = os.path.splitext(file_path)[1].lower()
df = None
if ext == ".csv":
df = pd.read_csv(file_path)
elif ext in (".xls", ".xlsx"):
df = pd.read_excel(file_path)
elif ext in (".json", ".jsonl"):
df = pd.read_json(file_path)
return "" if df is None else df.to_json()
def read_docx(file_path):
doc = Document(file_path)
text = []
for para in doc.paragraphs:
text.append(para.text)
return "\n".join(text)
def read_pptx(file_path):
prs = Presentation(file_path)
text = []
for slide in prs.slides:
slide_text = []
for shape in slide.shapes:
if hasattr(shape, "text"):
slide_text.append(shape.text)
text.append("\n".join(slide_text))
return "\n\n".join(text) |