Spaces:
Sleeping
Sleeping
""" | |
Upload documents tab functionality for the Gradio app | |
""" | |
import gradio as gr | |
def upload_documents(files, global_vars): | |
"""Handle document upload and processing""" | |
doc_ingestion = global_vars.get('doc_ingestion') | |
if not doc_ingestion: | |
return "β Please initialize systems first using the 'Initialize System' tab!" | |
if not files: | |
return "β Please upload at least one PDF file." | |
try: | |
# Filter for PDF files only | |
pdf_files = [] | |
for file_path in files: | |
if file_path.endswith('.pdf'): | |
pdf_files.append(file_path) | |
if not pdf_files: | |
return "β Please upload PDF files only." | |
print(f"π Processing {len(pdf_files)} PDF file(s)...") | |
# Process documents | |
documents = doc_ingestion.process_documents(pdf_files) | |
if documents: | |
print("π Creating vector store...") | |
# Create vector store | |
vectorstore = doc_ingestion.create_vector_store(documents) | |
if vectorstore: | |
# Store vectorstore in global vars | |
global_vars['vectorstore'] = vectorstore | |
# Create summary | |
summary = f"β Successfully processed {len(documents)} document(s):\n\n" | |
for i, doc in enumerate(documents, 1): | |
metadata = doc.metadata | |
university = metadata.get('university', 'Unknown') | |
country = metadata.get('country', 'Unknown') | |
doc_type = metadata.get('document_type', 'Unknown') | |
language = metadata.get('language', 'Unknown') | |
summary += f"{i}. **{metadata['source']}**\n" | |
summary += f" - University: {university}\n" | |
summary += f" - Country: {country}\n" | |
summary += f" - Type: {doc_type}\n" | |
summary += f" - Language: {language}\n\n" | |
summary += "π **Ready for queries!** Go to the 'Search & Query' tab to start asking questions." | |
return summary | |
else: | |
return "β Failed to create vector store from documents." | |
else: | |
return "β No documents were successfully processed. Please check if your PDFs are readable." | |
except Exception as e: | |
return f"β Error processing documents: {str(e)}\n\nPlease check the console for more details." | |
def create_upload_tab(global_vars): | |
"""Create the Upload Documents tab""" | |
with gr.Tab("π Upload Documents", id="upload"): | |
gr.Markdown(""" | |
### Step 2: Upload PDF Documents | |
Upload university documents (brochures, admission guides, etc.) in PDF format. | |
The system will automatically extract metadata including university name, country, and document type. | |
""") | |
file_upload = gr.File( | |
label="π Upload PDF Documents", | |
file_types=[".pdf"], | |
file_count="multiple", | |
height=120 | |
) | |
upload_btn = gr.Button( | |
"π Process Documents", | |
variant="primary", | |
size="lg" | |
) | |
upload_status = gr.Textbox( | |
label="Processing Status", | |
interactive=False, | |
lines=12, | |
placeholder="Upload PDF files and click 'Process Documents'..." | |
) | |
upload_btn.click( | |
lambda files: upload_documents(files, global_vars), | |
inputs=file_upload, | |
outputs=upload_status | |
) | |