DrishtiSharma commited on
Commit
23248f2
Β·
verified Β·
1 Parent(s): c6ec0a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -17
app.py CHANGED
@@ -54,23 +54,54 @@ if pdf_source == "Upload a PDF file":
54
  st.session_state.vector_created = False
55
 
56
  elif pdf_source == "Enter a PDF URL":
57
- pdf_url = st.text_input("Enter PDF URL:", value = "https://arxiv.org/pdf/2406.06998")
58
- if pdf_url and not st.session_state.pdf_path:
59
- with st.spinner("Downloading PDF..."):
60
- try:
61
- response = requests.get(pdf_url)
62
- if response.status_code == 200:
63
- st.session_state.pdf_path = "temp.pdf"
64
- with open(st.session_state.pdf_path, "wb") as f:
65
- f.write(response.content)
66
- st.session_state.pdf_loaded = False
67
- st.session_state.chunked = False
68
- st.session_state.vector_created = False
69
- st.success("βœ… PDF Downloaded Successfully!")
70
- else:
71
- st.error("❌ Failed to download PDF. Check the URL.")
72
- except Exception as e:
73
- st.error(f"❌ Error downloading PDF: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  # Step 2: Load & Process PDF (Only Once)
76
  if st.session_state.pdf_path and not st.session_state.pdf_loaded:
 
54
  st.session_state.vector_created = False
55
 
56
  elif pdf_source == "Enter a PDF URL":
57
+ pdf_url = st.text_input("Enter PDF URL:", key="pdf_url", on_change=lambda: st.session_state.update({"process_pdf": True}))
58
+
59
+ if st.session_state.get("process_pdf") and pdf_url: # βœ… Triggered only when Enter is pressed
60
+ with st.spinner("Downloading PDF..."):
61
+ try:
62
+ # Download PDF
63
+ response = requests.get(pdf_url)
64
+ if response.status_code == 200:
65
+ st.session_state.pdf_path = "temp.pdf"
66
+ with open(st.session_state.pdf_path, "wb") as f:
67
+ f.write(response.content)
68
+ st.success("βœ… PDF Downloaded Successfully!")
69
+ else:
70
+ st.error("❌ Failed to download PDF. Check the URL.")
71
+ st.stop()
72
+
73
+ # Step 2: Load PDF
74
+ st.spinner("Loading PDF...")
75
+ loader = PDFPlumberLoader(st.session_state.pdf_path)
76
+ docs = loader.load()
77
+ st.session_state.documents = docs
78
+ st.session_state.pdf_loaded = True
79
+ st.success(f"βœ… **PDF Loaded!** Total Pages: {len(docs)}")
80
+
81
+ # Step 3: Chunking the document
82
+ st.spinner("Chunking the document...")
83
+ model_name = "nomic-ai/modernbert-embed-base"
84
+ embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={'device': 'cpu'})
85
+ text_splitter = SemanticChunker(embedding_model)
86
+
87
+ if st.session_state.documents:
88
+ documents = text_splitter.split_documents(st.session_state.documents)
89
+ st.session_state.documents = documents
90
+ st.session_state.chunked = True
91
+
92
+ # Save chunks for persistence
93
+ CHUNKS_FILE = "/tmp/chunks.pkl"
94
+ with open(CHUNKS_FILE, "wb") as f:
95
+ pickle.dump(documents, f)
96
+
97
+ st.success(f"βœ… **Document Chunked!** Total Chunks: {len(documents)}")
98
+
99
+ # Reset trigger to prevent looping
100
+ st.session_state.process_pdf = False
101
+
102
+ except Exception as e:
103
+ st.error(f"❌ Error: {e}")
104
+
105
 
106
  # Step 2: Load & Process PDF (Only Once)
107
  if st.session_state.pdf_path and not st.session_state.pdf_loaded: