import os import glob import gradio as gr from langchain_groq import ChatGroq # Embeddings & vector store #from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_google_genai import GoogleGenerativeAIEmbeddings from langchain_community.vectorstores import FAISS # Loaders & splitters from langchain_community.document_loaders import DirectoryLoader, TextLoader from langchain_community.document_loaders.pdf import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter # RAG chain & memory from langchain.memory import ConversationSummaryMemory from langchain.chains import ConversationalRetrievalChain # -------------------------------------------------------------------------------------- # 1. Load documents (PDF, Markdown, TXT) from ./docs # -------------------------------------------------------------------------------------- # DOCS_DIR = "docs" # assert os.path.isdir(DOCS_DIR), f"Folder `{DOCS_DIR}` not found." GROQ_API_KEY = os.getenv("groq") # gemni_KEY = os.getenv("gemni") # export GOOGLE_API_KEY=gemni_KEY # loaders = [ # DirectoryLoader("bonusGeneralTerms_bs.pdf", loader_cls=PyPDFLoader), # DirectoryLoader("FAQ.pdf", loader_cls=PyPDFLoader), # ] # documents = [] # for ldr in loaders: # docs = ldr.load() # documents.extend(docs) # print(f"Loaded {len(documents)} file(s)") file_paths = [ "bonusGeneralTerms_bs.pdf", "FAQ.pdf" ] documents = [] # 2. Loop through the list of file paths. for path in file_paths: try: # 3. Use PyPDFLoader for each individual file path. loader = PyPDFLoader(path) # Load the documents for the current file and add them to the list. # .load() returns a list of Document objects (one per page). documents.extend(loader.load()) except Exception as e: # Optional: Add error handling in case a file is missing or corrupt. print(f"Error loading file {path}: {e}") # The user's original print statement works perfectly here. # Note: This will print the total number of *pages*, not files. print(f"Loaded {len(documents)} document pages from {len(file_paths)} file(s)") # -------------------------------------------------------------------------------------- # 2. Chunk documents & build FAISS index # -------------------------------------------------------------------------------------- splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100) chunks = splitter.split_documents(documents) # embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") vectorstore = FAISS.from_documents(chunks, embeddings) vectorstore.save_local("faiss_index") retriever = vectorstore.as_retriever(search_kwargs={"k": 4}) # -------------------------------------------------------------------------------------- # 3. Instantiate Groq LLM & memory # -------------------------------------------------------------------------------------- llm = ChatGroq(model="meta-llama/llama-4-scout-17b-16e-instruct", temperature=0.2, api_key=GROQ_API_KEY) summary_memory = ConversationSummaryMemory( llm=llm, memory_key="chat_history", return_messages=True, ) # -------------------------------------------------------------------------------------- # 4. Conversational Retrieval-Augmented Generation chain # -------------------------------------------------------------------------------------- rag_chain = ConversationalRetrievalChain.from_llm( llm=llm, retriever=retriever, memory=summary_memory, verbose=False, ) # -------------------------------------------------------------------------------------- # 5. Gradio UI # -------------------------------------------------------------------------------------- def answer(question, chat_history): """ chat_history: list[tuple[str,str]] – Gradio passes previous messages here """ result = rag_chain({ "question": question, "chat_history": chat_history }) return result["answer"] css = """ label[data-testid="block-label"] { display: none !important; } footer { display: none !important; } """ demo = gr.ChatInterface( fn=answer, title="📄🔍 Chatbot", description="Ask anything about the reference PDFs and documents", theme="default", css=css, ) demo.launch()