File size: 3,747 Bytes
102c695
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
"""
Upload documents tab functionality for the Gradio app
"""
import gradio as gr

def upload_documents(files, global_vars):
    """Handle document upload and processing"""
    doc_ingestion = global_vars.get('doc_ingestion')
    
    if not doc_ingestion:
        return "❌ Please initialize systems first using the 'Initialize System' tab!"
    
    if not files:
        return "❌ Please upload at least one PDF file."
    
    try:
        # Filter for PDF files only
        pdf_files = []
        for file_path in files:
            if file_path.endswith('.pdf'):
                pdf_files.append(file_path)
        
        if not pdf_files:
            return "❌ Please upload PDF files only."
        
        print(f"πŸ“„ Processing {len(pdf_files)} PDF file(s)...")
        
        # Process documents
        documents = doc_ingestion.process_documents(pdf_files)
        
        if documents:
            print("πŸ”— Creating vector store...")
            # Create vector store
            vectorstore = doc_ingestion.create_vector_store(documents)
            
            if vectorstore:
                # Store vectorstore in global vars
                global_vars['vectorstore'] = vectorstore
                
                # Create summary
                summary = f"βœ… Successfully processed {len(documents)} document(s):\n\n"
                
                for i, doc in enumerate(documents, 1):
                    metadata = doc.metadata
                    university = metadata.get('university', 'Unknown')
                    country = metadata.get('country', 'Unknown')
                    doc_type = metadata.get('document_type', 'Unknown')
                    language = metadata.get('language', 'Unknown')
                    
                    summary += f"{i}. **{metadata['source']}**\n"
                    summary += f"   - University: {university}\n"
                    summary += f"   - Country: {country}\n"
                    summary += f"   - Type: {doc_type}\n"
                    summary += f"   - Language: {language}\n\n"
                
                summary += "πŸŽ‰ **Ready for queries!** Go to the 'Search & Query' tab to start asking questions."
                return summary
            else:
                return "❌ Failed to create vector store from documents."
        else:
            return "❌ No documents were successfully processed. Please check if your PDFs are readable."
            
    except Exception as e:
        return f"❌ Error processing documents: {str(e)}\n\nPlease check the console for more details."

def create_upload_tab(global_vars):
    """Create the Upload Documents tab"""
    with gr.Tab("πŸ“„ Upload Documents", id="upload"):
        gr.Markdown("""
        ### Step 2: Upload PDF Documents
        Upload university documents (brochures, admission guides, etc.) in PDF format.
        The system will automatically extract metadata including university name, country, and document type.
        """)
        
        file_upload = gr.File(
            label="πŸ“ Upload PDF Documents",
            file_types=[".pdf"],
            file_count="multiple",
            height=120
        )
        
        upload_btn = gr.Button(
            "πŸ“„ Process Documents", 
            variant="primary",
            size="lg"
        )
        
        upload_status = gr.Textbox(
            label="Processing Status", 
            interactive=False, 
            lines=12,
            placeholder="Upload PDF files and click 'Process Documents'..."
        )
        
        upload_btn.click(
            lambda files: upload_documents(files, global_vars),
            inputs=file_upload,
            outputs=upload_status
        )