""" Upload documents tab functionality for the Gradio app """ import gradio as gr def upload_documents(files, global_vars): """Handle document upload and processing""" doc_ingestion = global_vars.get('doc_ingestion') if not doc_ingestion: return "❌ Please initialize systems first using the 'Initialize System' tab!" if not files: return "❌ Please upload at least one PDF file." try: # Filter for PDF files only pdf_files = [] for file_path in files: if file_path.endswith('.pdf'): pdf_files.append(file_path) if not pdf_files: return "❌ Please upload PDF files only." print(f"📄 Processing {len(pdf_files)} PDF file(s)...") # Process documents documents = doc_ingestion.process_documents(pdf_files) if documents: print("🔗 Creating vector store...") # Create vector store vectorstore = doc_ingestion.create_vector_store(documents) if vectorstore: # Store vectorstore in global vars global_vars['vectorstore'] = vectorstore # Create summary summary = f"✅ Successfully processed {len(documents)} document(s):\n\n" for i, doc in enumerate(documents, 1): metadata = doc.metadata university = metadata.get('university', 'Unknown') country = metadata.get('country', 'Unknown') doc_type = metadata.get('document_type', 'Unknown') language = metadata.get('language', 'Unknown') summary += f"{i}. **{metadata['source']}**\n" summary += f" - University: {university}\n" summary += f" - Country: {country}\n" summary += f" - Type: {doc_type}\n" summary += f" - Language: {language}\n\n" summary += "🎉 **Ready for queries!** Go to the 'Search & Query' tab to start asking questions." return summary else: return "❌ Failed to create vector store from documents." else: return "❌ No documents were successfully processed. Please check if your PDFs are readable." except Exception as e: return f"❌ Error processing documents: {str(e)}\n\nPlease check the console for more details." def create_upload_tab(global_vars): """Create the Upload Documents tab""" with gr.Tab("📄 Upload Documents", id="upload"): gr.Markdown(""" ### Step 2: Upload PDF Documents Upload university documents (brochures, admission guides, etc.) in PDF format. The system will automatically extract metadata including university name, country, and document type. """) file_upload = gr.File( label="📁 Upload PDF Documents", file_types=[".pdf"], file_count="multiple", height=120 ) upload_btn = gr.Button( "📄 Process Documents", variant="primary", size="lg" ) upload_status = gr.Textbox( label="Processing Status", interactive=False, lines=12, placeholder="Upload PDF files and click 'Process Documents'..." ) upload_btn.click( lambda files: upload_documents(files, global_vars), inputs=file_upload, outputs=upload_status )