from PyPDF2 import PdfReader from langchain.text_splitter import RecursiveCharacterTextSplitter def extract_pdf_text(pdf_docs): """ Extracts text from a list of uploaded PDF files. """ text = "" for pdf in pdf_docs: pdf_reader = PdfReader(pdf) for page in pdf_reader.pages: text += page.extract_text() return text def split_text_into_chunks(text, chunk_size=10000, chunk_overlap=500): """ Splits extracted text into smaller chunks for better processing. """ text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) return text_splitter.split_text(text)