Spaces:
Sleeping
Sleeping
File size: 3,747 Bytes
102c695 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
"""
Upload documents tab functionality for the Gradio app
"""
import gradio as gr
def upload_documents(files, global_vars):
"""Handle document upload and processing"""
doc_ingestion = global_vars.get('doc_ingestion')
if not doc_ingestion:
return "β Please initialize systems first using the 'Initialize System' tab!"
if not files:
return "β Please upload at least one PDF file."
try:
# Filter for PDF files only
pdf_files = []
for file_path in files:
if file_path.endswith('.pdf'):
pdf_files.append(file_path)
if not pdf_files:
return "β Please upload PDF files only."
print(f"π Processing {len(pdf_files)} PDF file(s)...")
# Process documents
documents = doc_ingestion.process_documents(pdf_files)
if documents:
print("π Creating vector store...")
# Create vector store
vectorstore = doc_ingestion.create_vector_store(documents)
if vectorstore:
# Store vectorstore in global vars
global_vars['vectorstore'] = vectorstore
# Create summary
summary = f"β
Successfully processed {len(documents)} document(s):\n\n"
for i, doc in enumerate(documents, 1):
metadata = doc.metadata
university = metadata.get('university', 'Unknown')
country = metadata.get('country', 'Unknown')
doc_type = metadata.get('document_type', 'Unknown')
language = metadata.get('language', 'Unknown')
summary += f"{i}. **{metadata['source']}**\n"
summary += f" - University: {university}\n"
summary += f" - Country: {country}\n"
summary += f" - Type: {doc_type}\n"
summary += f" - Language: {language}\n\n"
summary += "π **Ready for queries!** Go to the 'Search & Query' tab to start asking questions."
return summary
else:
return "β Failed to create vector store from documents."
else:
return "β No documents were successfully processed. Please check if your PDFs are readable."
except Exception as e:
return f"β Error processing documents: {str(e)}\n\nPlease check the console for more details."
def create_upload_tab(global_vars):
"""Create the Upload Documents tab"""
with gr.Tab("π Upload Documents", id="upload"):
gr.Markdown("""
### Step 2: Upload PDF Documents
Upload university documents (brochures, admission guides, etc.) in PDF format.
The system will automatically extract metadata including university name, country, and document type.
""")
file_upload = gr.File(
label="π Upload PDF Documents",
file_types=[".pdf"],
file_count="multiple",
height=120
)
upload_btn = gr.Button(
"π Process Documents",
variant="primary",
size="lg"
)
upload_status = gr.Textbox(
label="Processing Status",
interactive=False,
lines=12,
placeholder="Upload PDF files and click 'Process Documents'..."
)
upload_btn.click(
lambda files: upload_documents(files, global_vars),
inputs=file_upload,
outputs=upload_status
)
|