content-pipeline / app /config.py
AK1239's picture
Removed the picture here prompt
d973029
import os
# Base paths
BASE_DIR = os.getenv('BASE_DIR', os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
PDF_DIR = os.path.join(BASE_DIR, "data", "pdfs")
TXT_DIR = os.path.join(BASE_DIR, "data", "texts")
INDEX_DIR = os.path.join(BASE_DIR, "data", "index")
# Ensure directories exist
os.makedirs(PDF_DIR, exist_ok=True)
os.makedirs(TXT_DIR, exist_ok=True)
os.makedirs(INDEX_DIR, exist_ok=True)
# Model settings
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
MODEL_ID = "meta-llama/Llama-3.2-3B-Instruct"
# Documents path (contains both PDFs and TXTs)
DATA_DIR = os.path.join(BASE_DIR, "data")
DOCUMENTS_PATH = os.path.join(INDEX_DIR, "documents.pkl")
# Index file paths
FAISS_INDEX_PATH = os.path.join(INDEX_DIR, "faiss_index.pkl")
# Science topics (PDFs)
SCIENCE_FILES = [
os.path.join(PDF_DIR, "mazingira g3.pdf"),
os.path.join(PDF_DIR, "nishati g3.pdf"),
os.path.join(PDF_DIR, "maada g3.pdf"),
os.path.join(PDF_DIR, "mawasiliano g3.pdf"),
os.path.join(PDF_DIR, "usafi g3.pdf"),
os.path.join(PDF_DIR, "vipimo g3.pdf"),
os.path.join(PDF_DIR, "mlo g3.pdf"),
os.path.join(PDF_DIR, "mfumo g3.pdf"),
os.path.join(PDF_DIR, "maambukizi g3.pdf"),
os.path.join(PDF_DIR, "huduma g3.pdf"),
os.path.join(PDF_DIR, "vifaa g3.pdf"),
os.path.join(TXT_DIR, "kinga ya mwili g4.txt"),
os.path.join(TXT_DIR, "ukimwi g4.txt"),
os.path.join(TXT_DIR, "maji g4.txt"),
os.path.join(TXT_DIR, "majaribio ya kisayansi g4.txt"),
os.path.join(TXT_DIR, "magonjwa g4.txt"),
os.path.join(TXT_DIR, "huduma g4.txt"),
os.path.join(TXT_DIR, "mazingira g4.txt"),
os.path.join(TXT_DIR, "nishati g4.txt"),
os.path.join(TXT_DIR, "matumizi ya nishati g4.txt"),
os.path.join(TXT_DIR, "mfumo g4.txt"),
os.path.join(TXT_DIR, "mawasiliano g4.txt"),
]
# Math topics (TXT files)
MATH_FILES = [
os.path.join(TXT_DIR, "namba g3.txt"),
os.path.join(TXT_DIR, "mpangilio g3.txt"),
os.path.join(TXT_DIR, "mpangilio g4.txt"),
os.path.join(TXT_DIR, "matendo katika namba g3.txt"),
os.path.join(TXT_DIR, "kutambua sehemu g3.txt"),
os.path.join(TXT_DIR, "kutambua maumbo g3.txt"),
os.path.join(TXT_DIR, "vipimo g3.txt"),
os.path.join(TXT_DIR, "vipimo g4.txt"),
os.path.join(TXT_DIR, "wakati g4.txt"),
os.path.join(TXT_DIR, "takwimu kwa picha g3.txt"),
os.path.join(TXT_DIR, "takwimu g4.txt"),
os.path.join(TXT_DIR, "kugawanya namba g4.txt"),
os.path.join(TXT_DIR, "kujumlisha namba g4.txt"),
os.path.join(TXT_DIR, "kutoa namba g4.txt"),
os.path.join(TXT_DIR, "kuzidisha namba g4.txt"),
os.path.join(TXT_DIR, "namba nzima g4.txt"),
os.path.join(TXT_DIR, "namba za kirumi g4.txt"),
os.path.join(TXT_DIR, "fedha g3.txt"),
os.path.join(TXT_DIR, "fedha g4.txt"),
os.path.join(TXT_DIR, "sehemu g4.txt"),
os.path.join(TXT_DIR, "maumbo g4.txt"),
]
# Combined list of all files
ALL_FILES = SCIENCE_FILES + MATH_FILES