File size: 2,437 Bytes
bd37926
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import os
import openai
from utils._admin_util import create_embeddings, create_vector_store, read_pdf_data, split_data
import streamlit as st
from dotenv import load_dotenv


def main():
    load_dotenv()
    
    # Add detailed API key verification
    api_key = os.getenv("OPENAI_API_KEY")
    if not api_key:
        st.error("❌ OpenAI API key not found! Please ensure it's set in the environment variables.")
        st.info("To set up your API key:")
        st.code("1. Go to Hugging Face Space settings\n2. Add OPENAI_API_KEY in Repository Secrets")
        st.stop()
    

    st.set_page_config(page_title="Dump PDFs to QDrant - Vector Store")
    st.title("Please upload your files...πŸ“ ")
    try:
        # Upload multiple PDF files
        uploaded_files = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)

        if uploaded_files:
        
            with st.spinner('Processing PDF files...'):
                all_chunks = []
                
                # Process each PDF file
                for pdf in uploaded_files:
                  
                    st.write(f"Processing: {pdf.name}")
                    
                    # Extract text from PDF
                    text = read_pdf_data(pdf)
                    st.write(f"πŸ‘‰ Reading {pdf.name} done")

                    # Create chunks for this PDF
                    chunks = split_data(text)
                    all_chunks.extend(chunks)
                    st.write(f"πŸ‘‰ Splitting {pdf.name} into chunks done")
                    
                if not all_chunks:
                    st.error("❌ No valid chunks were created from the PDFs")
                    st.stop()

                st.write("Creating embeddings...")
                embeddings = create_embeddings()
                st.write("πŸ‘‰ Creating embeddings instance done")
        
                # Create vector store with all chunks
                vector_store = create_vector_store(embeddings, all_chunks)
                st.session_state.vector_store = vector_store
            
                st.success(f"βœ… Successfully processed {len(uploaded_files)} files and pushed embeddings to Qdrant")
                st.write(f"Total chunks created: {len(all_chunks)}")

    except Exception as e:
            st.error(f"❌ An unexpected error occurred: {str(e)}")
            
            
if __name__ == '__main__':
    main()