Midterm-streamlit / pages /Load_Documents.py
deepali1021's picture
Added files
b092604
import os
import openai
from utils._admin_util import create_embeddings, create_vector_store, read_pdf_data, split_data
import streamlit as st
from dotenv import load_dotenv
def main():
load_dotenv()
# Add detailed API key verification
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
st.error("❌ OpenAI API key not found! Please ensure it's set in the environment variables.")
st.info("To set up your API key:")
st.code("1. Go to Hugging Face Space settings\n2. Add OPENAI_API_KEY in Repository Secrets")
st.stop()
st.set_page_config(page_title="Dump PDFs to QDrant - Vector Store")
st.title("Please upload your files...πŸ“ ")
try:
# Upload multiple PDF files
uploaded_files = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)
if uploaded_files:
with st.spinner('Processing PDF files...'):
all_chunks = []
# Process each PDF file
for pdf in uploaded_files:
st.write(f"Processing: {pdf.name}")
# Extract text from PDF
text = read_pdf_data(pdf)
st.write(f"πŸ‘‰ Reading {pdf.name} done")
# Create chunks for this PDF
chunks = split_data(text)
all_chunks.extend(chunks)
st.write(f"πŸ‘‰ Splitting {pdf.name} into chunks done")
if not all_chunks:
st.error("❌ No valid chunks were created from the PDFs")
st.stop()
st.write("Creating embeddings...")
embeddings = create_embeddings()
st.write("πŸ‘‰ Creating embeddings instance done")
# Create vector store with all chunks
vector_store = create_vector_store(embeddings, all_chunks)
st.session_state.vector_store = vector_store
st.success(f"βœ… Successfully processed {len(uploaded_files)} files and pushed embeddings to Qdrant")
st.write(f"Total chunks created: {len(all_chunks)}")
except Exception as e:
st.error(f"❌ An unexpected error occurred: {str(e)}")
if __name__ == '__main__':
main()