File size: 2,049 Bytes
dd80d25 da9fcee 4caba26 193c7ed decddfb 0f3972e b2010da 0f3972e b2010da f3dd2c8 4cd6fae 9d7ed12 0fccc5c 4cd6fae c945f59 7f80b30 be7c8ff 03fc65f 4caba26 32b4458 1fede0d 32b4458 1fede0d 32b4458 1fede0d 03fc65f 4caba26 be7c8ff 4caba26 a3ac33b 8acf5f0 a3ac33b acc8cb5 d4ca6c7 d89899f 193c7ed be7c8ff 193c7ed |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import os
import pandas as pd
from docx import Document
from pptx import Presentation
def get_questions(file_path, level):
df = pd.read_json(file_path, lines=True)
df = df[df["Level"] == level]
result=[]
for index, row in df.iterrows():
result.append([row["Level"], row["Question"], row["file_name"], row["Final answer"]])
return result
def is_ext(file_path, ext):
return os.path.splitext(file_path)[1].lower() == ext.lower()
def read_file_json(file_path):
ext = os.path.splitext(file_path)[1].lower()
df = None
if ext == ".csv":
df = pd.read_csv(file_path)
elif ext in (".xls", ".xlsx"):
df = pd.read_excel(file_path)
elif ext in (".json", ".jsonl"):
df = pd.read_json(file_path)
return "" if df is None else df.to_json()
def read_docx_text(file_path):
doc = Document(file_path)
text = []
for block in doc.element.body:
if block.tag.endswith("p"):
for paragraph in doc.paragraphs:
if paragraph._element == block:
if paragraph.style.name.startswith("Heading"):
text.append("\n**" + paragraph.text + "**\n")
elif paragraph.text:
text.append(paragraph.text)
elif block.tag.endswith("tbl"):
for table in doc.tables:
if table._element == block:
for row in table.rows:
row_text = []
for cell in row.cells:
row_text.append(cell.text.strip())
text.append(" | ".join(row_text))
return "\n".join(text)
def read_pptx_text(file_path):
prs = Presentation(file_path)
text = []
for slide in prs.slides:
slide_text = []
for shape in slide.shapes:
if hasattr(shape, "text"):
slide_text.append(shape.text)
text.append("\n".join(slide_text))
return "\n\n".join(text) |