deepali1021 commited on
Commit
7d6aa6f
Β·
1 Parent(s): 2f0d46d
Files changed (1) hide show
  1. pages/Load_Documents.py +27 -47
pages/Load_Documents.py CHANGED
@@ -3,7 +3,6 @@ import openai
3
  from utils._admin_util import create_embeddings, create_vector_store, read_pdf_data, split_data
4
  import streamlit as st
5
  from dotenv import load_dotenv
6
- import requests
7
 
8
  def validate_api_key(api_key):
9
  """Test if the API key is valid"""
@@ -37,70 +36,51 @@ def main():
37
  # if not validate_api_key(api_key):
38
  # st.stop()
39
 
40
- print("API KEY :",api_key)
41
 
42
  st.set_page_config(page_title="Dump PDFs to QDrant - Vector Store")
43
  st.title("Please upload your files...πŸ“ ")
 
 
 
44
 
45
- # Upload multiple PDF files
46
- uploaded_files = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)
47
-
48
- if uploaded_files:
49
- try:
50
  with st.spinner('Processing PDF files...'):
51
  all_chunks = []
52
 
53
  # Process each PDF file
54
  for pdf in uploaded_files:
55
- try:
56
- st.write(f"Processing: {pdf.name}")
57
-
58
- # Extract text from PDF
59
- text = read_pdf_data(pdf)
60
- st.write(f"πŸ‘‰ Reading {pdf.name} done")
61
-
62
- # Create chunks for this PDF
63
- chunks = split_data(text)
64
- all_chunks.extend(chunks)
65
- st.write(f"πŸ‘‰ Splitting {pdf.name} into chunks done")
66
- except Exception as e:
67
- st.error(f"❌ Error processing {pdf.name}: {str(e)}")
68
- continue
69
 
 
 
 
 
 
70
  if not all_chunks:
71
  st.error("❌ No valid chunks were created from the PDFs")
72
  st.stop()
73
 
74
- # Create embeddings with progress tracking
75
- try:
76
- st.write("Creating embeddings...")
77
- embeddings = create_embeddings()
78
- st.write("πŸ‘‰ Creating embeddings instance done")
79
- except openai.RateLimitError:
80
- st.error("❌ Rate limit exceeded. Please try again later.")
81
- st.stop()
82
- except Exception as e:
83
- st.error(f"❌ Error creating embeddings: {str(e)}")
84
- st.stop()
85
-
86
  # Create vector store with all chunks
87
- try:
88
- vector_store = create_vector_store(embeddings, all_chunks)
89
- st.session_state.vector_store = vector_store
90
- except Exception as e:
91
- st.error(f"❌ Error creating vector store: {str(e)}")
92
- st.stop()
93
-
94
  st.success(f"βœ… Successfully processed {len(uploaded_files)} files and pushed embeddings to Qdrant")
95
  st.write(f"Total chunks created: {len(all_chunks)}")
96
 
97
- except requests.exceptions.RequestException as e:
98
- st.error(f"❌ Network error: {str(e)}")
99
- st.error("Please check your internet connection and try again.")
100
- except Exception as e:
101
  st.error(f"❌ An unexpected error occurred: {str(e)}")
102
- st.error("Please check your API key and permissions.")
103
- st.error("If the problem persists, please contact support.")
104
-
105
  if __name__ == '__main__':
106
  main()
 
3
  from utils._admin_util import create_embeddings, create_vector_store, read_pdf_data, split_data
4
  import streamlit as st
5
  from dotenv import load_dotenv
 
6
 
7
  def validate_api_key(api_key):
8
  """Test if the API key is valid"""
 
36
  # if not validate_api_key(api_key):
37
  # st.stop()
38
 
39
+ #print("API KEY :",api_key)
40
 
41
  st.set_page_config(page_title="Dump PDFs to QDrant - Vector Store")
42
  st.title("Please upload your files...πŸ“ ")
43
+ try:
44
+ # Upload multiple PDF files
45
+ uploaded_files = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)
46
 
47
+ if uploaded_files:
48
+
 
 
 
49
  with st.spinner('Processing PDF files...'):
50
  all_chunks = []
51
 
52
  # Process each PDF file
53
  for pdf in uploaded_files:
54
+
55
+ st.write(f"Processing: {pdf.name}")
56
+
57
+ # Extract text from PDF
58
+ text = read_pdf_data(pdf)
59
+ st.write(f"πŸ‘‰ Reading {pdf.name} done")
 
 
 
 
 
 
 
 
60
 
61
+ # Create chunks for this PDF
62
+ chunks = split_data(text)
63
+ all_chunks.extend(chunks)
64
+ st.write(f"πŸ‘‰ Splitting {pdf.name} into chunks done")
65
+
66
  if not all_chunks:
67
  st.error("❌ No valid chunks were created from the PDFs")
68
  st.stop()
69
 
70
+ st.write("Creating embeddings...")
71
+ embeddings = create_embeddings()
72
+ st.write("πŸ‘‰ Creating embeddings instance done")
73
+
 
 
 
 
 
 
 
 
74
  # Create vector store with all chunks
75
+ vector_store = create_vector_store(embeddings, all_chunks)
76
+ st.session_state.vector_store = vector_store
77
+
 
 
 
 
78
  st.success(f"βœ… Successfully processed {len(uploaded_files)} files and pushed embeddings to Qdrant")
79
  st.write(f"Total chunks created: {len(all_chunks)}")
80
 
81
+ except Exception as e:
 
 
 
82
  st.error(f"❌ An unexpected error occurred: {str(e)}")
83
+
84
+
 
85
  if __name__ == '__main__':
86
  main()