import os
from langchain_community.document_loaders import (
    PyMuPDFLoader,
    TextLoader,
    Docx2txtLoader,
    DirectoryLoader,
)

class DocumentProcessor:
    def __init__(self, path: str):
        self.path = path

    def files_to_texts(self) -> list:
        loaders_config = {
            "*.pdf": PyMuPDFLoader,
            "*.txt": (TextLoader, {"encoding": "utf-8"}),
            "*.docx": Docx2txtLoader,
            "*.doc": Docx2txtLoader,
        }

        loaders = [
            DirectoryLoader(
                path=self.path,
                glob=glob,
                loader_cls=loader if isinstance(loader, type) else loader[0],
                loader_kwargs=loader[1] if isinstance(loader, tuple) else None,
            )
            for glob, loader in loaders_config.items()
            if any(fname.endswith(glob[1:]) for fname in os.listdir(self.path))
        ]

        documents = []
        for loader in loaders:
            documents.extend(loader.load())

        return documents