File size: 6,608 Bytes
0753d2e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import streamlit as st
import os
from utils.pdf_utils import PDFProcessor
from utils.embeddings_utils import EmbeddingsManager
from utils.qa_utils import QASystem
from dotenv import load_dotenv
import openai

def initialize_session_state():
    if 'pdf_processor' not in st.session_state:
        st.session_state['pdf_processor'] = None
    if 'embeddings_manager' not in st.session_state:
        st.session_state['embeddings_manager'] = None
    if 'qa_system' not in st.session_state:
        st.session_state['qa_system'] = None
    if 'processed_pdfs' not in st.session_state:
        st.session_state['processed_pdfs'] = set()
    if 'all_text_chunks' not in st.session_state:
        st.session_state['all_text_chunks'] = []

def main():
    load_dotenv()
    st.set_page_config(page_title="AI-Powered PDF Assistant", layout="wide")

    initialize_session_state()

    # Header Section
    st.markdown(
        """
        <style>
        .main-header {
            font-size: 2.5rem;
            color: #1F77B4;
            text-align: center;
            margin-bottom: 1rem;
        }
        .sub-header {
            font-size: 1.25rem;
            color: #555;
            text-align: center;
            margin-bottom: 2rem;
        }
        </style>
        <div class="main-header">πŸ“˜ AI-Powered PDF Assistant</div>
        <div class="sub-header">Upload, Analyze, and Interact with Your Documents</div>
        """,
        unsafe_allow_html=True
    )

    # Navigation Menu
    selected_page = st.sidebar.radio(
        "Navigate", ["Upload PDFs", "Ask Questions", "About"]
    )

    api_key = os.getenv("OPENAI_API_KEY")
    if not api_key:
        st.sidebar.error("OpenAI API key not found in .env file!")
        return

    openai.api_key = api_key

    if not st.session_state['pdf_processor']:
        st.session_state['pdf_processor'] = PDFProcessor()
    if not st.session_state['embeddings_manager']:
        st.session_state['embeddings_manager'] = EmbeddingsManager(api_key)
    if not st.session_state['qa_system']:
        st.session_state['qa_system'] = QASystem(api_key)

    if selected_page == "Upload PDFs":
        st.header("πŸ“€ Upload PDFs")
        st.markdown(
            """<p style='font-size: 1.1rem;'>Drag and drop your PDF files below to extract and process content for analysis.</p>""",
            unsafe_allow_html=True
        )

        uploaded_files = st.file_uploader(
            "Upload PDF files", type=['pdf'], accept_multiple_files=True
        )

        if uploaded_files:
            new_files = [f for f in uploaded_files if f.name not in st.session_state['processed_pdfs']]
            if new_files:
                with st.spinner("Processing PDFs..."):
                    for pdf_file in new_files:
                        try:
                            pages = st.session_state['pdf_processor'].extract_text(pdf_file)
                            for page_text in pages.values():
                                chunks = st.session_state['pdf_processor'].chunk_text(page_text)
                                st.session_state['all_text_chunks'].extend(chunks)
                            st.session_state['processed_pdfs'].add(pdf_file.name)
                        except Exception as e:
                            st.error(f"Error processing {pdf_file.name}: {str(e)}")
                            continue

                    with st.spinner("Generating embeddings..."):
                        try:
                            st.session_state['embeddings_manager'].generate_embeddings(
                                st.session_state['all_text_chunks']
                            )
                            st.success("βœ… Documents processed successfully!")
                        except Exception as e:
                            st.error(f"Error generating embeddings: {str(e)}")

    elif selected_page == "Ask Questions":
        st.header("❓ Ask Questions")
        st.markdown(
            """<p style='font-size: 1.1rem;'>Query your uploaded documents and get precise answers backed by AI-powered analysis.</p>""",
            unsafe_allow_html=True
        )

        if st.session_state['all_text_chunks']:
            question = st.text_input("Enter your question:")

            if question:
                try:
                    with st.spinner("Finding relevant information..."):
                        relevant_chunks = st.session_state['embeddings_manager'].find_relevant_chunks(
                            question, k=3
                        )
                        answer = st.session_state['qa_system'].generate_answer(
                            question, relevant_chunks
                        )

                        st.markdown("### πŸ€– Answer")
                        st.write(answer)

                        with st.expander("πŸ” View Source Context"):
                            for i, chunk in enumerate(relevant_chunks, 1):
                                st.markdown(f"**Context {i}:**")
                                st.write(chunk)
                                st.markdown("---")
                except openai.error.RateLimitError:
                    st.error("Rate limit exceeded. Please try again later.")
                except Exception as e:
                    st.error(f"Error: {str(e)}")
        else:
            st.warning("Please upload and process documents in the 'Upload PDFs' section first.")

    elif selected_page == "About":
        st.header("ℹ️ About This App")
        st.markdown(
            """
            <p style='font-size: 1.1rem;'>
            <b>AI-Powered PDF Assistant</b> is a smart solution for extracting and querying information from PDF files. With powerful AI integrations, 
            this tool allows seamless document analysis and interaction.
            </p>

            <h3>πŸ”‘ Key Features</h3>
            <ul>
                <li>Upload and process multiple PDF files</li>
                <li>Generate embeddings for precise content retrieval</li>
                <li>Query documents and receive context-aware answers</li>
            </ul>

            <h3>πŸ› οΈ Technologies Used</h3>
            <ul>
                <li>Streamlit for interactive UI</li>
                <li>OpenAI GPT API for Q&A</li>
                <li>Custom PDF processing and embedding tools</li>
            </ul>

            <p style='text-align: center;'>
            Built with ❀️ by [Your Name]
            </p>
            """,
            unsafe_allow_html=True
        )

if __name__ == "__main__":
    main()