Spaces:

DrishtiSharma
/

docqa-with-deepseek-r1

Running

App Files Files Community

DrishtiSharma commited on 12 days ago

Commit

afad2ef

verified ·

1 Parent(s): 726122d

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -7

app.py CHANGED Viewed

@@ -36,13 +36,13 @@ if "chunked" not in st.session_state:
 if "vector_created" not in st.session_state:
     st.session_state.vector_created = False
 if "vector_store_path" not in st.session_state:
-    st.session_state.vector_store_path = "./chroma_langchain_db"
 if "vector_store" not in st.session_state:
     st.session_state.vector_store = None
 if "documents" not in st.session_state:
     st.session_state.documents = None
-# **Step 1: Choose PDF Source**
 pdf_source = st.radio("Upload or provide a link to a PDF:", ["Upload a PDF file", "Enter a PDF URL"], index=0, horizontal=True)
 if pdf_source == "Upload a PDF file":
@@ -84,13 +84,13 @@ if st.session_state.pdf_path and not st.session_state.pdf_loaded:
         st.success(f"✅ **PDF Loaded!** Total Pages: {len(docs)}")
 # Step 3: Chunking
-if st.session_state.pdf_loaded and not st.session_state.chunked:
     with st.spinner("Chunking the document..."):
         model_name = "nomic-ai/modernbert-embed-base"
         embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={'device': 'cpu'}, encode_kwargs={'normalize_embeddings': False})
         text_splitter = SemanticChunker(embedding_model)
         documents = text_splitter.split_documents(st.session_state.documents)
-        st.session_state.documents = documents
         st.session_state.chunked = True
         st.success(f"✅ **Document Chunked!** Total Chunks: {len(documents)}")
@@ -101,7 +101,7 @@ if st.session_state.chunked and not st.session_state.vector_created:
             collection_name="deepseek_collection",
             collection_metadata={"hnsw:space": "cosine"},
             embedding_function=embedding_model,
-            persist_directory=st.session_state.vector_store_path
         )
         vector_store.add_documents(st.session_state.documents)
         num_documents = len(vector_store.get()["documents"])
@@ -110,10 +110,10 @@ if st.session_state.chunked and not st.session_state.vector_created:
         st.success(f"✅ **Vector Store Created!** Total documents stored: {num_documents}")
 # Step 5: Query Input
-if st.session_state.vector_created:
     query = st.text_input("🔍 Enter a Query:")
-    if query and st.session_state.vector_store:
         with st.spinner("Retrieving relevant contexts..."):
             retriever = st.session_state.vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})
             contexts = retriever.invoke(query)

 if "vector_created" not in st.session_state:
     st.session_state.vector_created = False
 if "vector_store_path" not in st.session_state:
+    st.session_state.vector_store_path = "./chroma_langchain_db"
 if "vector_store" not in st.session_state:
     st.session_state.vector_store = None
 if "documents" not in st.session_state:
     st.session_state.documents = None
+# Step 1: Choose PDF Source
 pdf_source = st.radio("Upload or provide a link to a PDF:", ["Upload a PDF file", "Enter a PDF URL"], index=0, horizontal=True)
 if pdf_source == "Upload a PDF file":
         st.success(f"✅ **PDF Loaded!** Total Pages: {len(docs)}")
 # Step 3: Chunking
+if st.session_state.pdf_loaded and not st.session_state.chunked and st.session_state.documents:
     with st.spinner("Chunking the document..."):
         model_name = "nomic-ai/modernbert-embed-base"
         embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={'device': 'cpu'}, encode_kwargs={'normalize_embeddings': False})
         text_splitter = SemanticChunker(embedding_model)
         documents = text_splitter.split_documents(st.session_state.documents)
+        st.session_state.documents = documents  # Store chunked docs
         st.session_state.chunked = True
         st.success(f"✅ **Document Chunked!** Total Chunks: {len(documents)}")
             collection_name="deepseek_collection",
             collection_metadata={"hnsw:space": "cosine"},
             embedding_function=embedding_model,
+            persist_directory=st.session_state.vector_store_path
         )
         vector_store.add_documents(st.session_state.documents)
         num_documents = len(vector_store.get()["documents"])
         st.success(f"✅ **Vector Store Created!** Total documents stored: {num_documents}")
 # Step 5: Query Input
+if st.session_state.vector_created and st.session_state.vector_store:
     query = st.text_input("🔍 Enter a Query:")
+    if query:
         with st.spinner("Retrieving relevant contexts..."):
             retriever = st.session_state.vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})
             contexts = retriever.invoke(query)