Spaces:
Sleeping
Sleeping
File size: 6,608 Bytes
0753d2e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
import streamlit as st
import os
from utils.pdf_utils import PDFProcessor
from utils.embeddings_utils import EmbeddingsManager
from utils.qa_utils import QASystem
from dotenv import load_dotenv
import openai
def initialize_session_state():
if 'pdf_processor' not in st.session_state:
st.session_state['pdf_processor'] = None
if 'embeddings_manager' not in st.session_state:
st.session_state['embeddings_manager'] = None
if 'qa_system' not in st.session_state:
st.session_state['qa_system'] = None
if 'processed_pdfs' not in st.session_state:
st.session_state['processed_pdfs'] = set()
if 'all_text_chunks' not in st.session_state:
st.session_state['all_text_chunks'] = []
def main():
load_dotenv()
st.set_page_config(page_title="AI-Powered PDF Assistant", layout="wide")
initialize_session_state()
# Header Section
st.markdown(
"""
<style>
.main-header {
font-size: 2.5rem;
color: #1F77B4;
text-align: center;
margin-bottom: 1rem;
}
.sub-header {
font-size: 1.25rem;
color: #555;
text-align: center;
margin-bottom: 2rem;
}
</style>
<div class="main-header">π AI-Powered PDF Assistant</div>
<div class="sub-header">Upload, Analyze, and Interact with Your Documents</div>
""",
unsafe_allow_html=True
)
# Navigation Menu
selected_page = st.sidebar.radio(
"Navigate", ["Upload PDFs", "Ask Questions", "About"]
)
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
st.sidebar.error("OpenAI API key not found in .env file!")
return
openai.api_key = api_key
if not st.session_state['pdf_processor']:
st.session_state['pdf_processor'] = PDFProcessor()
if not st.session_state['embeddings_manager']:
st.session_state['embeddings_manager'] = EmbeddingsManager(api_key)
if not st.session_state['qa_system']:
st.session_state['qa_system'] = QASystem(api_key)
if selected_page == "Upload PDFs":
st.header("π€ Upload PDFs")
st.markdown(
"""<p style='font-size: 1.1rem;'>Drag and drop your PDF files below to extract and process content for analysis.</p>""",
unsafe_allow_html=True
)
uploaded_files = st.file_uploader(
"Upload PDF files", type=['pdf'], accept_multiple_files=True
)
if uploaded_files:
new_files = [f for f in uploaded_files if f.name not in st.session_state['processed_pdfs']]
if new_files:
with st.spinner("Processing PDFs..."):
for pdf_file in new_files:
try:
pages = st.session_state['pdf_processor'].extract_text(pdf_file)
for page_text in pages.values():
chunks = st.session_state['pdf_processor'].chunk_text(page_text)
st.session_state['all_text_chunks'].extend(chunks)
st.session_state['processed_pdfs'].add(pdf_file.name)
except Exception as e:
st.error(f"Error processing {pdf_file.name}: {str(e)}")
continue
with st.spinner("Generating embeddings..."):
try:
st.session_state['embeddings_manager'].generate_embeddings(
st.session_state['all_text_chunks']
)
st.success("β
Documents processed successfully!")
except Exception as e:
st.error(f"Error generating embeddings: {str(e)}")
elif selected_page == "Ask Questions":
st.header("β Ask Questions")
st.markdown(
"""<p style='font-size: 1.1rem;'>Query your uploaded documents and get precise answers backed by AI-powered analysis.</p>""",
unsafe_allow_html=True
)
if st.session_state['all_text_chunks']:
question = st.text_input("Enter your question:")
if question:
try:
with st.spinner("Finding relevant information..."):
relevant_chunks = st.session_state['embeddings_manager'].find_relevant_chunks(
question, k=3
)
answer = st.session_state['qa_system'].generate_answer(
question, relevant_chunks
)
st.markdown("### π€ Answer")
st.write(answer)
with st.expander("π View Source Context"):
for i, chunk in enumerate(relevant_chunks, 1):
st.markdown(f"**Context {i}:**")
st.write(chunk)
st.markdown("---")
except openai.error.RateLimitError:
st.error("Rate limit exceeded. Please try again later.")
except Exception as e:
st.error(f"Error: {str(e)}")
else:
st.warning("Please upload and process documents in the 'Upload PDFs' section first.")
elif selected_page == "About":
st.header("βΉοΈ About This App")
st.markdown(
"""
<p style='font-size: 1.1rem;'>
<b>AI-Powered PDF Assistant</b> is a smart solution for extracting and querying information from PDF files. With powerful AI integrations,
this tool allows seamless document analysis and interaction.
</p>
<h3>π Key Features</h3>
<ul>
<li>Upload and process multiple PDF files</li>
<li>Generate embeddings for precise content retrieval</li>
<li>Query documents and receive context-aware answers</li>
</ul>
<h3>π οΈ Technologies Used</h3>
<ul>
<li>Streamlit for interactive UI</li>
<li>OpenAI GPT API for Q&A</li>
<li>Custom PDF processing and embedding tools</li>
</ul>
<p style='text-align: center;'>
Built with β€οΈ by [Your Name]
</p>
""",
unsafe_allow_html=True
)
if __name__ == "__main__":
main()
|