DrishtiSharma commited on
Commit
afad2ef
Β·
verified Β·
1 Parent(s): 726122d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -36,13 +36,13 @@ if "chunked" not in st.session_state:
36
  if "vector_created" not in st.session_state:
37
  st.session_state.vector_created = False
38
  if "vector_store_path" not in st.session_state:
39
- st.session_state.vector_store_path = "./chroma_langchain_db"
40
  if "vector_store" not in st.session_state:
41
  st.session_state.vector_store = None
42
  if "documents" not in st.session_state:
43
  st.session_state.documents = None
44
 
45
- # **Step 1: Choose PDF Source**
46
  pdf_source = st.radio("Upload or provide a link to a PDF:", ["Upload a PDF file", "Enter a PDF URL"], index=0, horizontal=True)
47
 
48
  if pdf_source == "Upload a PDF file":
@@ -84,13 +84,13 @@ if st.session_state.pdf_path and not st.session_state.pdf_loaded:
84
  st.success(f"βœ… **PDF Loaded!** Total Pages: {len(docs)}")
85
 
86
  # Step 3: Chunking
87
- if st.session_state.pdf_loaded and not st.session_state.chunked:
88
  with st.spinner("Chunking the document..."):
89
  model_name = "nomic-ai/modernbert-embed-base"
90
  embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={'device': 'cpu'}, encode_kwargs={'normalize_embeddings': False})
91
  text_splitter = SemanticChunker(embedding_model)
92
  documents = text_splitter.split_documents(st.session_state.documents)
93
- st.session_state.documents = documents
94
  st.session_state.chunked = True
95
  st.success(f"βœ… **Document Chunked!** Total Chunks: {len(documents)}")
96
 
@@ -101,7 +101,7 @@ if st.session_state.chunked and not st.session_state.vector_created:
101
  collection_name="deepseek_collection",
102
  collection_metadata={"hnsw:space": "cosine"},
103
  embedding_function=embedding_model,
104
- persist_directory=st.session_state.vector_store_path
105
  )
106
  vector_store.add_documents(st.session_state.documents)
107
  num_documents = len(vector_store.get()["documents"])
@@ -110,10 +110,10 @@ if st.session_state.chunked and not st.session_state.vector_created:
110
  st.success(f"βœ… **Vector Store Created!** Total documents stored: {num_documents}")
111
 
112
  # Step 5: Query Input
113
- if st.session_state.vector_created:
114
  query = st.text_input("πŸ” Enter a Query:")
115
 
116
- if query and st.session_state.vector_store:
117
  with st.spinner("Retrieving relevant contexts..."):
118
  retriever = st.session_state.vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})
119
  contexts = retriever.invoke(query)
 
36
  if "vector_created" not in st.session_state:
37
  st.session_state.vector_created = False
38
  if "vector_store_path" not in st.session_state:
39
+ st.session_state.vector_store_path = "./chroma_langchain_db"
40
  if "vector_store" not in st.session_state:
41
  st.session_state.vector_store = None
42
  if "documents" not in st.session_state:
43
  st.session_state.documents = None
44
 
45
+ # Step 1: Choose PDF Source
46
  pdf_source = st.radio("Upload or provide a link to a PDF:", ["Upload a PDF file", "Enter a PDF URL"], index=0, horizontal=True)
47
 
48
  if pdf_source == "Upload a PDF file":
 
84
  st.success(f"βœ… **PDF Loaded!** Total Pages: {len(docs)}")
85
 
86
  # Step 3: Chunking
87
+ if st.session_state.pdf_loaded and not st.session_state.chunked and st.session_state.documents:
88
  with st.spinner("Chunking the document..."):
89
  model_name = "nomic-ai/modernbert-embed-base"
90
  embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={'device': 'cpu'}, encode_kwargs={'normalize_embeddings': False})
91
  text_splitter = SemanticChunker(embedding_model)
92
  documents = text_splitter.split_documents(st.session_state.documents)
93
+ st.session_state.documents = documents # Store chunked docs
94
  st.session_state.chunked = True
95
  st.success(f"βœ… **Document Chunked!** Total Chunks: {len(documents)}")
96
 
 
101
  collection_name="deepseek_collection",
102
  collection_metadata={"hnsw:space": "cosine"},
103
  embedding_function=embedding_model,
104
+ persist_directory=st.session_state.vector_store_path
105
  )
106
  vector_store.add_documents(st.session_state.documents)
107
  num_documents = len(vector_store.get()["documents"])
 
110
  st.success(f"βœ… **Vector Store Created!** Total documents stored: {num_documents}")
111
 
112
  # Step 5: Query Input
113
+ if st.session_state.vector_created and st.session_state.vector_store:
114
  query = st.text_input("πŸ” Enter a Query:")
115
 
116
+ if query:
117
  with st.spinner("Retrieving relevant contexts..."):
118
  retriever = st.session_state.vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})
119
  contexts = retriever.invoke(query)