DrishtiSharma commited on
Commit
bca3677
Β·
verified Β·
1 Parent(s): e37ff79

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -30
app.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  import chromadb
3
  import requests
4
  import streamlit as st
5
- from langchain.chains import SequentialChain, LLMChain
6
  from langchain.prompts import PromptTemplate
7
  from langchain_groq import ChatGroq
8
  from langchain.document_loaders import PDFPlumberLoader
@@ -11,7 +11,6 @@ from langchain_huggingface import HuggingFaceEmbeddings
11
  from langchain_chroma import Chroma
12
  from prompts import rag_prompt, relevancy_prompt, relevant_context_picker_prompt, response_synth
13
 
14
-
15
  # Set API Keys
16
  os.environ["GROQ_API_KEY"] = st.secrets.get("GROQ_API_KEY", "")
17
 
@@ -25,13 +24,15 @@ rag_llm.verbose = True
25
  # Clear ChromaDB cache to fix tenant issue
26
  chromadb.api.client.SharedSystemClient.clear_system_cache()
27
 
28
- st.title("❓")
29
 
30
  # Initialize session state variables
31
  if "vector_store" not in st.session_state:
32
  st.session_state.vector_store = None
33
  if "documents" not in st.session_state:
34
  st.session_state.documents = None
 
 
35
  if "pdf_loaded" not in st.session_state:
36
  st.session_state.pdf_loaded = False
37
  if "chunked" not in st.session_state:
@@ -42,44 +43,43 @@ if "vector_created" not in st.session_state:
42
  # Step 1: Choose PDF Source
43
  pdf_source = st.radio("Upload or provide a link to a PDF:", ["Upload a PDF file", "Enter a PDF URL"], index=0, horizontal=True)
44
 
45
- pdf_path = None
46
  if pdf_source == "Upload a PDF file":
47
  uploaded_file = st.file_uploader("Upload your PDF file", type="pdf")
48
  if uploaded_file:
49
- pdf_path = "temp.pdf"
50
- with open(pdf_path, "wb") as f:
51
  f.write(uploaded_file.getbuffer())
52
-
53
- st.session_state.pdf_loaded = False
54
  st.session_state.chunked = False
55
  st.session_state.vector_created = False
 
56
 
57
  elif pdf_source == "Enter a PDF URL":
58
  pdf_url = st.text_input("Enter PDF URL:", value="https://arxiv.org/pdf/2406.06998")
59
- if pdf_url:
60
  with st.spinner("Downloading PDF..."):
61
  try:
62
  response = requests.get(pdf_url)
63
  if response.status_code == 200:
64
- pdf_path = "temp.pdf"
65
- with open(pdf_path, "wb") as f:
66
  f.write(response.content)
67
- st.success("βœ… PDF Downloaded Successfully!")
68
  st.session_state.pdf_loaded = False
69
  st.session_state.chunked = False
70
  st.session_state.vector_created = False
 
71
  else:
72
  st.error("❌ Failed to download PDF. Check the URL.")
73
- except Exception as e:
74
  st.error(f"Error downloading PDF: {e}")
75
 
76
  # Step 2: Process PDF
77
- if pdf_path and not st.session_state.pdf_loaded:
78
- with st.spinner("Loading PDF..."):
79
- loader = PDFPlumberLoader(pdf_path)
80
  docs = loader.load()
81
  st.session_state.documents = docs
82
- st.session_state.pdf_loaded = True
83
  st.success(f"βœ… **PDF Loaded!** Total Pages: {len(docs)}")
84
 
85
  # Step 3: Chunking (Only if Not Already Done)
@@ -90,7 +90,7 @@ if st.session_state.pdf_loaded and not st.session_state.chunked:
90
  text_splitter = SemanticChunker(embedding_model)
91
  documents = text_splitter.split_documents(st.session_state.documents)
92
  st.session_state.documents = documents
93
- st.session_state.chunked = True
94
  st.success(f"βœ… **Document Chunked!** Total Chunks: {len(documents)}")
95
 
96
  # Step 4: Setup Vectorstore
@@ -103,8 +103,8 @@ if st.session_state.chunked and not st.session_state.vector_created:
103
  )
104
  vector_store.add_documents(st.session_state.documents)
105
  num_documents = len(vector_store.get()["documents"])
106
- st.session_state.vector_store = vector_store
107
- st.session_state.vector_created = True
108
  st.success(f"βœ… **Vector Store Created!** Total documents stored: {num_documents}")
109
 
110
  # Step 5: Query Input
@@ -156,14 +156,5 @@ if st.session_state.vector_created:
156
  st.subheader("πŸŸ₯ RAG Final Response")
157
  st.success(final_response['final_response'])
158
 
159
- # Final + Intermediate Outputs
160
- st.subheader("πŸ” **Full Workflow Breakdown:**")
161
- st.json({
162
- "Context Relevancy Evaluation": relevancy_response["relevancy_response"],
163
- "Relevant Contexts": relevant_response["context_number"],
164
- "Extracted Contexts": final_contexts["relevant_contexts"],
165
- "Final Answer": final_response["final_response"]
166
- })
167
-
168
  else:
169
- st.warning("πŸ“„ Please upload or provide a PDF URL first.")
 
2
  import chromadb
3
  import requests
4
  import streamlit as st
5
+ from langchain.chains import LLMChain
6
  from langchain.prompts import PromptTemplate
7
  from langchain_groq import ChatGroq
8
  from langchain.document_loaders import PDFPlumberLoader
 
11
  from langchain_chroma import Chroma
12
  from prompts import rag_prompt, relevancy_prompt, relevant_context_picker_prompt, response_synth
13
 
 
14
  # Set API Keys
15
  os.environ["GROQ_API_KEY"] = st.secrets.get("GROQ_API_KEY", "")
16
 
 
24
  # Clear ChromaDB cache to fix tenant issue
25
  chromadb.api.client.SharedSystemClient.clear_system_cache()
26
 
27
+ st.title("πŸ” PDF-based RAG System")
28
 
29
  # Initialize session state variables
30
  if "vector_store" not in st.session_state:
31
  st.session_state.vector_store = None
32
  if "documents" not in st.session_state:
33
  st.session_state.documents = None
34
+ if "pdf_path" not in st.session_state:
35
+ st.session_state.pdf_path = None
36
  if "pdf_loaded" not in st.session_state:
37
  st.session_state.pdf_loaded = False
38
  if "chunked" not in st.session_state:
 
43
  # Step 1: Choose PDF Source
44
  pdf_source = st.radio("Upload or provide a link to a PDF:", ["Upload a PDF file", "Enter a PDF URL"], index=0, horizontal=True)
45
 
 
46
  if pdf_source == "Upload a PDF file":
47
  uploaded_file = st.file_uploader("Upload your PDF file", type="pdf")
48
  if uploaded_file:
49
+ st.session_state.pdf_path = "temp.pdf"
50
+ with open(st.session_state.pdf_path, "wb") as f:
51
  f.write(uploaded_file.getbuffer())
52
+ st.session_state.pdf_loaded = False
 
53
  st.session_state.chunked = False
54
  st.session_state.vector_created = False
55
+ st.success("βœ… PDF Uploaded Successfully!")
56
 
57
  elif pdf_source == "Enter a PDF URL":
58
  pdf_url = st.text_input("Enter PDF URL:", value="https://arxiv.org/pdf/2406.06998")
59
+ if pdf_url and st.session_state.pdf_path is None:
60
  with st.spinner("Downloading PDF..."):
61
  try:
62
  response = requests.get(pdf_url)
63
  if response.status_code == 200:
64
+ st.session_state.pdf_path = "temp.pdf"
65
+ with open(st.session_state.pdf_path, "wb") as f:
66
  f.write(response.content)
 
67
  st.session_state.pdf_loaded = False
68
  st.session_state.chunked = False
69
  st.session_state.vector_created = False
70
+ st.success("βœ… PDF Downloaded Successfully!")
71
  else:
72
  st.error("❌ Failed to download PDF. Check the URL.")
73
+ except Exception as e:
74
  st.error(f"Error downloading PDF: {e}")
75
 
76
  # Step 2: Process PDF
77
+ if st.session_state.pdf_path and not st.session_state.pdf_loaded:
78
+ with st.spinner("Loading and processing PDF..."):
79
+ loader = PDFPlumberLoader(st.session_state.pdf_path)
80
  docs = loader.load()
81
  st.session_state.documents = docs
82
+ st.session_state.pdf_loaded = True
83
  st.success(f"βœ… **PDF Loaded!** Total Pages: {len(docs)}")
84
 
85
  # Step 3: Chunking (Only if Not Already Done)
 
90
  text_splitter = SemanticChunker(embedding_model)
91
  documents = text_splitter.split_documents(st.session_state.documents)
92
  st.session_state.documents = documents
93
+ st.session_state.chunked = True
94
  st.success(f"βœ… **Document Chunked!** Total Chunks: {len(documents)}")
95
 
96
  # Step 4: Setup Vectorstore
 
103
  )
104
  vector_store.add_documents(st.session_state.documents)
105
  num_documents = len(vector_store.get()["documents"])
106
+ st.session_state.vector_store = vector_store
107
+ st.session_state.vector_created = True
108
  st.success(f"βœ… **Vector Store Created!** Total documents stored: {num_documents}")
109
 
110
  # Step 5: Query Input
 
156
  st.subheader("πŸŸ₯ RAG Final Response")
157
  st.success(final_response['final_response'])
158
 
 
 
 
 
 
 
 
 
 
159
  else:
160
+ st.warning("πŸ“„ Please upload or provide a PDF URL first.")