import ollama
from langchain.chains import RetrievalQA
from langchain.chains import create_retrieval_chain
from langchain_ollama import OllamaLLM
from services.pdf_processing import load_and_split_pdf
from services.vector_store import create_vector_store
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.prompts import PromptTemplate
import streamlit as st

PROMPT_TEMPLATE = """Question: {context}

Answer: Let's think step by step."""

@st.cache_resource
def initialize_qa_chain(filepath, model_name, temperature, top_p, max_tokens):
    # Load and split the PDF
    splits = load_and_split_pdf(filepath)
    vectordb = create_vector_store(splits)

    # Use Ollama or Hugging Face LLM
    # Configure the LLM with additional parameters
    llm = OllamaLLM(
        model=model_name,
        temperature=temperature,   # Controls randomness (0 = deterministic, 1 = max randomness)
        max_tokens=max_tokens,   # Limit the number of tokens in the output
        top_p=top_p          # Nucleus sampling for controlling diversity
    )


    # # Define strict retrieval-based prompting
    # prompt_template = PromptTemplate(
    #     template=(
    #         "You are an AI assistant that only answers questions based on the provided document. "
    #         "Do not use external knowledge. If you cannot find an answer in the document, respond with: 'I don't know.'\n\n"
    #         "Document Context:\n{context}\n\n"
    #         "User Question: {query}\n\n"
    #         "Assistant Answer:"
    #     ),
    #     input_variables=["context", "query"]
    # )

    system_prompt = (
    "Use the given context to answer the question. "
    "If you don't know the answer, say you don't know. "
    "Use three sentence maximum and keep the answer concise. "
    "Context: {context}"
)
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
        ]
    )
    question_answer_chain = create_stuff_documents_chain(llm, prompt)
    chain = create_retrieval_chain(vectordb.as_retriever(), question_answer_chain)

    # return RetrievalQA.from_chain_type(
    #     llm=llm,
    #     chain_type="stuff",
    #     retriever=vectordb.as_retriever(),
    #     chain_type_kwargs={"prompt": prompt_template}
    # )
    return chain

@st.cache_resource
def initialize_chain(model_name, temperature, top_p, max_tokens):
    # Use Ollama or Hugging Face LLM
    # Configure the LLM with additional parameters
    llm = OllamaLLM(
        model=model_name,
        temperature=temperature,   # Controls randomness (0 = deterministic, 1 = max randomness)
        max_tokens=max_tokens,   # Limit the number of tokens in the output
        top_p=top_p          # Nucleus sampling for controlling diversity
    )


    prompt = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)

    chain = prompt | llm

    return chain