import ollama from langchain.chains import RetrievalQA from langchain.chains import create_retrieval_chain from langchain_ollama import OllamaLLM from services.pdf_processing import load_and_split_pdf from services.vector_store import create_vector_store from langchain.chains.combine_documents import create_stuff_documents_chain from langchain_core.prompts import ChatPromptTemplate from langchain.prompts import PromptTemplate import streamlit as st PROMPT_TEMPLATE = """Question: {context} Answer: Let's think step by step.""" @st.cache_resource def initialize_qa_chain(filepath, model_name, temperature, top_p, max_tokens): # Load and split the PDF splits = load_and_split_pdf(filepath) vectordb = create_vector_store(splits) # Use Ollama or Hugging Face LLM # Configure the LLM with additional parameters llm = OllamaLLM( model=model_name, temperature=temperature, # Controls randomness (0 = deterministic, 1 = max randomness) max_tokens=max_tokens, # Limit the number of tokens in the output top_p=top_p # Nucleus sampling for controlling diversity ) # # Define strict retrieval-based prompting # prompt_template = PromptTemplate( # template=( # "You are an AI assistant that only answers questions based on the provided document. " # "Do not use external knowledge. If you cannot find an answer in the document, respond with: 'I don't know.'\n\n" # "Document Context:\n{context}\n\n" # "User Question: {query}\n\n" # "Assistant Answer:" # ), # input_variables=["context", "query"] # ) system_prompt = ( "Use the given context to answer the question. " "If you don't know the answer, say you don't know. " "Use three sentence maximum and keep the answer concise. " "Context: {context}" ) prompt = ChatPromptTemplate.from_messages( [ ("system", system_prompt), ("human", "{input}"), ] ) question_answer_chain = create_stuff_documents_chain(llm, prompt) chain = create_retrieval_chain(vectordb.as_retriever(), question_answer_chain) # return RetrievalQA.from_chain_type( # llm=llm, # chain_type="stuff", # retriever=vectordb.as_retriever(), # chain_type_kwargs={"prompt": prompt_template} # ) return chain @st.cache_resource def initialize_chain(model_name, temperature, top_p, max_tokens): # Use Ollama or Hugging Face LLM # Configure the LLM with additional parameters llm = OllamaLLM( model=model_name, temperature=temperature, # Controls randomness (0 = deterministic, 1 = max randomness) max_tokens=max_tokens, # Limit the number of tokens in the output top_p=top_p # Nucleus sampling for controlling diversity ) prompt = ChatPromptTemplate.from_template(PROMPT_TEMPLATE) chain = prompt | llm return chain