Spaces:
Sleeping
Sleeping
import os | |
import openai | |
from utils._admin_util import create_embeddings, create_vector_store, read_pdf_data, split_data | |
import streamlit as st | |
from dotenv import load_dotenv | |
def main(): | |
load_dotenv() | |
# Add detailed API key verification | |
api_key = os.getenv("OPENAI_API_KEY") | |
if not api_key: | |
st.error("β OpenAI API key not found! Please ensure it's set in the environment variables.") | |
st.info("To set up your API key:") | |
st.code("1. Go to Hugging Face Space settings\n2. Add OPENAI_API_KEY in Repository Secrets") | |
st.stop() | |
st.set_page_config(page_title="Dump PDFs to QDrant - Vector Store") | |
st.title("Please upload your files...π ") | |
try: | |
# Upload multiple PDF files | |
uploaded_files = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True) | |
if uploaded_files: | |
with st.spinner('Processing PDF files...'): | |
all_chunks = [] | |
# Process each PDF file | |
for pdf in uploaded_files: | |
st.write(f"Processing: {pdf.name}") | |
# Extract text from PDF | |
text = read_pdf_data(pdf) | |
st.write(f"π Reading {pdf.name} done") | |
# Create chunks for this PDF | |
chunks = split_data(text) | |
all_chunks.extend(chunks) | |
st.write(f"π Splitting {pdf.name} into chunks done") | |
if not all_chunks: | |
st.error("β No valid chunks were created from the PDFs") | |
st.stop() | |
st.write("Creating embeddings...") | |
embeddings = create_embeddings() | |
st.write("π Creating embeddings instance done") | |
# Create vector store with all chunks | |
vector_store = create_vector_store(embeddings, all_chunks) | |
st.session_state.vector_store = vector_store | |
st.success(f"β Successfully processed {len(uploaded_files)} files and pushed embeddings to Qdrant") | |
st.write(f"Total chunks created: {len(all_chunks)}") | |
except Exception as e: | |
st.error(f"β An unexpected error occurred: {str(e)}") | |
if __name__ == '__main__': | |
main() | |