DrishtiSharma commited on
Commit
44e6288
Β·
verified Β·
1 Parent(s): 5d5add2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -49
app.py CHANGED
@@ -23,18 +23,22 @@ rag_llm.verbose = True
23
 
24
  st.title("❓")
25
 
 
 
 
 
 
 
26
  # Step 1: Choose PDF Source
27
- #### Initialize pdf_path
28
- pdf_path = None
29
  pdf_source = st.radio("Upload or provide a link to a PDF:", ["Upload a PDF file", "Enter a PDF URL"], index=0, horizontal=True)
30
 
31
-
32
  if pdf_source == "Upload a PDF file":
33
  uploaded_file = st.file_uploader("Upload your PDF file", type="pdf")
34
  if uploaded_file:
35
- with open("temp.pdf", "wb") as f:
36
- f.write(uploaded_file.getbuffer())
37
  pdf_path = "temp.pdf"
 
 
 
38
 
39
  elif pdf_source == "Enter a PDF URL":
40
  pdf_url = st.text_input("Enter PDF URL:")
@@ -43,9 +47,9 @@ elif pdf_source == "Enter a PDF URL":
43
  try:
44
  response = requests.get(pdf_url)
45
  if response.status_code == 200:
46
- with open("temp.pdf", "wb") as f:
47
- f.write(response.content)
48
  pdf_path = "temp.pdf"
 
 
49
  st.success("βœ… PDF Downloaded Successfully!")
50
  else:
51
  st.error("❌ Failed to download PDF. Check the URL.")
@@ -56,40 +60,40 @@ elif pdf_source == "Enter a PDF URL":
56
  else:
57
  pdf_path = None
58
 
59
- # Step 2: Process PDF
60
- if pdf_path:
61
- with st.spinner("Loading PDF..."):
62
  loader = PDFPlumberLoader(pdf_path)
63
  docs = loader.load()
64
-
65
- st.success(f"βœ… **PDF Loaded!** Total Pages: {len(docs)}")
66
-
67
- # Step 3: Chunking
68
- with st.spinner("Chunking the document..."):
69
- model_name = "nomic-ai/modernbert-embed-base"
70
- embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={'device': 'cpu'})
71
- text_splitter = SemanticChunker(embedding_model)
72
- documents = text_splitter.split_documents(docs)
73
-
74
- st.success(f"βœ… **Document Chunked!** Total Chunks: {len(documents)}")
75
-
76
- # Step 4: Setup Vectorstore
77
- with st.spinner("Creating vector store..."):
78
- vector_store = Chroma(
79
- collection_name="deepseek_collection",
80
- collection_metadata={"hnsw:space": "cosine"},
81
- embedding_function=embedding_model
82
- )
83
- vector_store.add_documents(documents)
84
- num_documents = len(vector_store.get()["documents"])
85
-
86
- st.success(f"βœ… **Vector Store Created!** Total documents stored: {num_documents}")
87
-
88
- # Step 5: Query Input
89
  query = st.text_input("πŸ” Enter a Query:")
90
  if query:
91
  with st.spinner("Retrieving relevant contexts..."):
92
- retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})
93
  contexts = retriever.invoke(query)
94
  context_texts = [doc.page_content for doc in contexts]
95
 
@@ -99,9 +103,7 @@ if pdf_path:
99
 
100
  # Step 6: Context Relevancy Checker
101
  with st.spinner("Evaluating context relevancy..."):
102
- context_relevancy_checker_prompt = PromptTemplate(
103
- input_variables=["retriever_query", "context"], template=relevancy_prompt
104
- )
105
  context_relevancy_chain = LLMChain(llm=llm_judge, prompt=context_relevancy_checker_prompt, output_key="relevancy_response")
106
  relevancy_response = context_relevancy_chain.invoke({"context": context_texts, "retriever_query": query})
107
 
@@ -110,9 +112,7 @@ if pdf_path:
110
 
111
  # Step 7: Selecting Relevant Contexts
112
  with st.spinner("Selecting the most relevant contexts..."):
113
- relevant_prompt = PromptTemplate(
114
- input_variables=["relevancy_response"], template=relevant_context_picker_prompt
115
- )
116
  pick_relevant_context_chain = LLMChain(llm=llm_judge, prompt=relevant_prompt, output_key="context_number")
117
  relevant_response = pick_relevant_context_chain.invoke({"relevancy_response": relevancy_response['relevancy_response']})
118
 
@@ -121,9 +121,7 @@ if pdf_path:
121
 
122
  # Step 8: Retrieving Context for Response Generation
123
  with st.spinner("Retrieving final context..."):
124
- context_prompt = PromptTemplate(
125
- input_variables=["context_number", "context"], template=response_synth
126
- )
127
  relevant_contexts_chain = LLMChain(llm=llm_judge, prompt=context_prompt, output_key="relevant_contexts")
128
  final_contexts = relevant_contexts_chain.invoke({"context_number": relevant_response['context_number'], "context": context_texts})
129
 
@@ -132,9 +130,7 @@ if pdf_path:
132
 
133
  # Step 9: Generate Final Response
134
  with st.spinner("Generating the final answer..."):
135
- final_prompt = PromptTemplate(
136
- input_variables=["query", "context"], template=rag_prompt
137
- )
138
  response_chain = LLMChain(llm=rag_llm, prompt=final_prompt, output_key="final_response")
139
  final_response = response_chain.invoke({"query": query, "context": final_contexts['relevant_contexts']})
140
 
@@ -148,4 +144,6 @@ if pdf_path:
148
  "Relevant Contexts": relevant_response["context_number"],
149
  "Extracted Contexts": final_contexts["relevant_contexts"],
150
  "Final Answer": final_response["final_response"]
151
- })
 
 
 
23
 
24
  st.title("❓")
25
 
26
+ # Initialize session state variables
27
+ if "vector_store" not in st.session_state:
28
+ st.session_state.vector_store = None
29
+ if "documents" not in st.session_state:
30
+ st.session_state.documents = None
31
+
32
  # Step 1: Choose PDF Source
 
 
33
  pdf_source = st.radio("Upload or provide a link to a PDF:", ["Upload a PDF file", "Enter a PDF URL"], index=0, horizontal=True)
34
 
 
35
  if pdf_source == "Upload a PDF file":
36
  uploaded_file = st.file_uploader("Upload your PDF file", type="pdf")
37
  if uploaded_file:
 
 
38
  pdf_path = "temp.pdf"
39
+ with open(pdf_path, "wb") as f:
40
+ f.write(uploaded_file.getbuffer())
41
+ st.success("βœ… PDF Uploaded Successfully!")
42
 
43
  elif pdf_source == "Enter a PDF URL":
44
  pdf_url = st.text_input("Enter PDF URL:")
 
47
  try:
48
  response = requests.get(pdf_url)
49
  if response.status_code == 200:
 
 
50
  pdf_path = "temp.pdf"
51
+ with open(pdf_path, "wb") as f:
52
+ f.write(response.content)
53
  st.success("βœ… PDF Downloaded Successfully!")
54
  else:
55
  st.error("❌ Failed to download PDF. Check the URL.")
 
60
  else:
61
  pdf_path = None
62
 
63
+ # Step 2: Process PDF and Create Vector Store (Only if Not Processed)
64
+ if pdf_path and st.session_state.vector_store is None:
65
+ with st.spinner("Loading and processing PDF..."):
66
  loader = PDFPlumberLoader(pdf_path)
67
  docs = loader.load()
68
+ st.success(f"βœ… **PDF Loaded!** Total Pages: {len(docs)}")
69
+
70
+ # Step 3: Chunking
71
+ with st.spinner("Chunking the document..."):
72
+ model_name = "nomic-ai/modernbert-embed-base"
73
+ embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={'device': 'cpu'})
74
+ text_splitter = SemanticChunker(embedding_model)
75
+ documents = text_splitter.split_documents(docs)
76
+ st.session_state.documents = documents # Store in session state
77
+ st.success(f"βœ… **Document Chunked!** Total Chunks: {len(documents)}")
78
+
79
+ # Step 4: Setup Vectorstore
80
+ with st.spinner("Creating vector store..."):
81
+ vector_store = Chroma(
82
+ collection_name="deepseek_collection",
83
+ collection_metadata={"hnsw:space": "cosine"},
84
+ embedding_function=embedding_model
85
+ )
86
+ vector_store.add_documents(documents)
87
+ num_documents = len(vector_store.get()["documents"])
88
+ st.session_state.vector_store = vector_store # Store vector store in session state
89
+ st.success(f"βœ… **Vector Store Created!** Total documents stored: {num_documents}")
90
+
91
+ # Step 5: Query Input (Only allow if vector store exists)
92
+ if st.session_state.vector_store:
93
  query = st.text_input("πŸ” Enter a Query:")
94
  if query:
95
  with st.spinner("Retrieving relevant contexts..."):
96
+ retriever = st.session_state.vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})
97
  contexts = retriever.invoke(query)
98
  context_texts = [doc.page_content for doc in contexts]
99
 
 
103
 
104
  # Step 6: Context Relevancy Checker
105
  with st.spinner("Evaluating context relevancy..."):
106
+ context_relevancy_checker_prompt = PromptTemplate(input_variables=["retriever_query", "context"], template=relevancy_prompt)
 
 
107
  context_relevancy_chain = LLMChain(llm=llm_judge, prompt=context_relevancy_checker_prompt, output_key="relevancy_response")
108
  relevancy_response = context_relevancy_chain.invoke({"context": context_texts, "retriever_query": query})
109
 
 
112
 
113
  # Step 7: Selecting Relevant Contexts
114
  with st.spinner("Selecting the most relevant contexts..."):
115
+ relevant_prompt = PromptTemplate(input_variables=["relevancy_response"], template=relevant_context_picker_prompt)
 
 
116
  pick_relevant_context_chain = LLMChain(llm=llm_judge, prompt=relevant_prompt, output_key="context_number")
117
  relevant_response = pick_relevant_context_chain.invoke({"relevancy_response": relevancy_response['relevancy_response']})
118
 
 
121
 
122
  # Step 8: Retrieving Context for Response Generation
123
  with st.spinner("Retrieving final context..."):
124
+ context_prompt = PromptTemplate(input_variables=["context_number", "context"], template=response_synth)
 
 
125
  relevant_contexts_chain = LLMChain(llm=llm_judge, prompt=context_prompt, output_key="relevant_contexts")
126
  final_contexts = relevant_contexts_chain.invoke({"context_number": relevant_response['context_number'], "context": context_texts})
127
 
 
130
 
131
  # Step 9: Generate Final Response
132
  with st.spinner("Generating the final answer..."):
133
+ final_prompt = PromptTemplate(input_variables=["query", "context"], template=rag_prompt)
 
 
134
  response_chain = LLMChain(llm=rag_llm, prompt=final_prompt, output_key="final_response")
135
  final_response = response_chain.invoke({"query": query, "context": final_contexts['relevant_contexts']})
136
 
 
144
  "Relevant Contexts": relevant_response["context_number"],
145
  "Extracted Contexts": final_contexts["relevant_contexts"],
146
  "Final Answer": final_response["final_response"]
147
+ })
148
+ else:
149
+ st.warning("πŸ“„ Please upload or provide a PDF URL first.")