import os
import glob
import gradio as gr

from langchain_groq import ChatGroq

# Embeddings & vector store
#from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import FAISS

# Loaders & splitters
from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain_community.document_loaders.pdf import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# RAG chain & memory
from langchain.memory import ConversationSummaryMemory
from langchain.chains import ConversationalRetrievalChain

# --------------------------------------------------------------------------------------
# 1. Load documents (PDF, Markdown, TXT) from ./docs
# --------------------------------------------------------------------------------------
# DOCS_DIR = "docs"
# assert os.path.isdir(DOCS_DIR), f"Folder `{DOCS_DIR}` not found."
GROQ_API_KEY = os.getenv("groq")
# gemni_KEY = os.getenv("gemni")
# export GOOGLE_API_KEY=gemni_KEY
# loaders = [
#     DirectoryLoader("bonusGeneralTerms_bs.pdf", loader_cls=PyPDFLoader),
#     DirectoryLoader("FAQ.pdf", loader_cls=PyPDFLoader),
# ]
# documents = []
# for ldr in loaders:
#     docs = ldr.load()
#     documents.extend(docs)

# print(f"Loaded {len(documents)} file(s)")
file_paths = [
    "bonusGeneralTerms_bs.pdf",
    "FAQ.pdf"
]

documents = []
# 2. Loop through the list of file paths.
for path in file_paths:
    try:
        # 3. Use PyPDFLoader for each individual file path.
        loader = PyPDFLoader(path)
        # Load the documents for the current file and add them to the list.
        # .load() returns a list of Document objects (one per page).
        documents.extend(loader.load())
    except Exception as e:
        # Optional: Add error handling in case a file is missing or corrupt.
        print(f"Error loading file {path}: {e}")

# The user's original print statement works perfectly here.
# Note: This will print the total number of *pages*, not files.
print(f"Loaded {len(documents)} document pages from {len(file_paths)} file(s)")
# --------------------------------------------------------------------------------------
# 2. Chunk documents & build FAISS index
# --------------------------------------------------------------------------------------
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
chunks   = splitter.split_documents(documents)

# embeddings  = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vectorstore = FAISS.from_documents(chunks, embeddings)
vectorstore.save_local("faiss_index")
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

# --------------------------------------------------------------------------------------
# 3. Instantiate Groq LLM & memory
# --------------------------------------------------------------------------------------


llm = ChatGroq(model="meta-llama/llama-4-scout-17b-16e-instruct",
               temperature=0.2,
               api_key=GROQ_API_KEY)

summary_memory = ConversationSummaryMemory(
    llm=llm,
    memory_key="chat_history",
    return_messages=True,
)

# --------------------------------------------------------------------------------------
# 4. Conversational Retrieval-Augmented Generation chain
# --------------------------------------------------------------------------------------
rag_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=summary_memory,
    verbose=False,
)

# --------------------------------------------------------------------------------------
# 5. Gradio UI
# --------------------------------------------------------------------------------------
def answer(question, chat_history):
    """
    chat_history: list[tuple[str,str]] – Gradio passes previous messages here
    """
    result = rag_chain({
        "question": question,
        "chat_history": chat_history
    })
    return result["answer"]
css = """
label[data-testid="block-label"] {
    display: none !important;
}
footer {
    display: none !important;
}
"""
demo = gr.ChatInterface(
    fn=answer,
    title="📄🔍  Chatbot",
    description="Ask anything about the reference PDFs and documents",
    theme="default",
    css=css,
)


demo.launch()