DrishtiSharma commited on
Commit
5d5add2
Β·
verified Β·
1 Parent(s): a297053

Create lab/lacks_persistence.py

Browse files
Files changed (1) hide show
  1. lab/lacks_persistence.py +151 -0
lab/lacks_persistence.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import streamlit as st
4
+ from langchain.chains import SequentialChain, LLMChain
5
+ from langchain.prompts import PromptTemplate
6
+ from langchain_groq import ChatGroq
7
+ from langchain.document_loaders import PDFPlumberLoader
8
+ from langchain_experimental.text_splitter import SemanticChunker
9
+ from langchain_huggingface import HuggingFaceEmbeddings
10
+ from langchain_chroma import Chroma
11
+ from prompts import rag_prompt, relevancy_prompt, relevant_context_picker_prompt, response_synth
12
+
13
+
14
+ # Set API Keys
15
+ os.environ["GROQ_API_KEY"] = st.secrets.get("GROQ_API_KEY", "")
16
+
17
+ # Load LLM models
18
+ llm_judge = ChatGroq(model="deepseek-r1-distill-llama-70b")
19
+ rag_llm = ChatGroq(model="mixtral-8x7b-32768")
20
+
21
+ llm_judge.verbose = True
22
+ rag_llm.verbose = True
23
+
24
+ st.title("❓")
25
+
26
+ # Step 1: Choose PDF Source
27
+ #### Initialize pdf_path
28
+ pdf_path = None
29
+ pdf_source = st.radio("Upload or provide a link to a PDF:", ["Upload a PDF file", "Enter a PDF URL"], index=0, horizontal=True)
30
+
31
+
32
+ if pdf_source == "Upload a PDF file":
33
+ uploaded_file = st.file_uploader("Upload your PDF file", type="pdf")
34
+ if uploaded_file:
35
+ with open("temp.pdf", "wb") as f:
36
+ f.write(uploaded_file.getbuffer())
37
+ pdf_path = "temp.pdf"
38
+
39
+ elif pdf_source == "Enter a PDF URL":
40
+ pdf_url = st.text_input("Enter PDF URL:")
41
+ if pdf_url:
42
+ with st.spinner("Downloading PDF..."):
43
+ try:
44
+ response = requests.get(pdf_url)
45
+ if response.status_code == 200:
46
+ with open("temp.pdf", "wb") as f:
47
+ f.write(response.content)
48
+ pdf_path = "temp.pdf"
49
+ st.success("βœ… PDF Downloaded Successfully!")
50
+ else:
51
+ st.error("❌ Failed to download PDF. Check the URL.")
52
+ pdf_path = None
53
+ except Exception as e:
54
+ st.error(f"Error downloading PDF: {e}")
55
+ pdf_path = None
56
+ else:
57
+ pdf_path = None
58
+
59
+ # Step 2: Process PDF
60
+ if pdf_path:
61
+ with st.spinner("Loading PDF..."):
62
+ loader = PDFPlumberLoader(pdf_path)
63
+ docs = loader.load()
64
+
65
+ st.success(f"βœ… **PDF Loaded!** Total Pages: {len(docs)}")
66
+
67
+ # Step 3: Chunking
68
+ with st.spinner("Chunking the document..."):
69
+ model_name = "nomic-ai/modernbert-embed-base"
70
+ embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={'device': 'cpu'})
71
+ text_splitter = SemanticChunker(embedding_model)
72
+ documents = text_splitter.split_documents(docs)
73
+
74
+ st.success(f"βœ… **Document Chunked!** Total Chunks: {len(documents)}")
75
+
76
+ # Step 4: Setup Vectorstore
77
+ with st.spinner("Creating vector store..."):
78
+ vector_store = Chroma(
79
+ collection_name="deepseek_collection",
80
+ collection_metadata={"hnsw:space": "cosine"},
81
+ embedding_function=embedding_model
82
+ )
83
+ vector_store.add_documents(documents)
84
+ num_documents = len(vector_store.get()["documents"])
85
+
86
+ st.success(f"βœ… **Vector Store Created!** Total documents stored: {num_documents}")
87
+
88
+ # Step 5: Query Input
89
+ query = st.text_input("πŸ” Enter a Query:")
90
+ if query:
91
+ with st.spinner("Retrieving relevant contexts..."):
92
+ retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})
93
+ contexts = retriever.invoke(query)
94
+ context_texts = [doc.page_content for doc in contexts]
95
+
96
+ st.success(f"βœ… **Retrieved {len(context_texts)} Contexts!**")
97
+ for i, text in enumerate(context_texts, 1):
98
+ st.write(f"**Context {i}:** {text[:500]}...")
99
+
100
+ # Step 6: Context Relevancy Checker
101
+ with st.spinner("Evaluating context relevancy..."):
102
+ context_relevancy_checker_prompt = PromptTemplate(
103
+ input_variables=["retriever_query", "context"], template=relevancy_prompt
104
+ )
105
+ context_relevancy_chain = LLMChain(llm=llm_judge, prompt=context_relevancy_checker_prompt, output_key="relevancy_response")
106
+ relevancy_response = context_relevancy_chain.invoke({"context": context_texts, "retriever_query": query})
107
+
108
+ st.subheader("πŸŸ₯ Context Relevancy Evaluation")
109
+ st.json(relevancy_response['relevancy_response'])
110
+
111
+ # Step 7: Selecting Relevant Contexts
112
+ with st.spinner("Selecting the most relevant contexts..."):
113
+ relevant_prompt = PromptTemplate(
114
+ input_variables=["relevancy_response"], template=relevant_context_picker_prompt
115
+ )
116
+ pick_relevant_context_chain = LLMChain(llm=llm_judge, prompt=relevant_prompt, output_key="context_number")
117
+ relevant_response = pick_relevant_context_chain.invoke({"relevancy_response": relevancy_response['relevancy_response']})
118
+
119
+ st.subheader("🟦 Pick Relevant Context Chain")
120
+ st.json(relevant_response['context_number'])
121
+
122
+ # Step 8: Retrieving Context for Response Generation
123
+ with st.spinner("Retrieving final context..."):
124
+ context_prompt = PromptTemplate(
125
+ input_variables=["context_number", "context"], template=response_synth
126
+ )
127
+ relevant_contexts_chain = LLMChain(llm=llm_judge, prompt=context_prompt, output_key="relevant_contexts")
128
+ final_contexts = relevant_contexts_chain.invoke({"context_number": relevant_response['context_number'], "context": context_texts})
129
+
130
+ st.subheader("πŸŸ₯ Relevant Contexts Extracted")
131
+ st.json(final_contexts['relevant_contexts'])
132
+
133
+ # Step 9: Generate Final Response
134
+ with st.spinner("Generating the final answer..."):
135
+ final_prompt = PromptTemplate(
136
+ input_variables=["query", "context"], template=rag_prompt
137
+ )
138
+ response_chain = LLMChain(llm=rag_llm, prompt=final_prompt, output_key="final_response")
139
+ final_response = response_chain.invoke({"query": query, "context": final_contexts['relevant_contexts']})
140
+
141
+ st.subheader("πŸŸ₯ RAG Final Response")
142
+ st.success(final_response['final_response'])
143
+
144
+ # Step 10: Display Workflow Breakdown
145
+ st.subheader("πŸ” **Workflow Breakdown:**")
146
+ st.json({
147
+ "Context Relevancy Evaluation": relevancy_response["relevancy_response"],
148
+ "Relevant Contexts": relevant_response["context_number"],
149
+ "Extracted Contexts": final_contexts["relevant_contexts"],
150
+ "Final Answer": final_response["final_response"]
151
+ })