Spaces:
Sleeping
Sleeping
import os | |
import google.generativeai as genai | |
import streamlit as st | |
from PyPDF2 import PdfReader | |
from collections import Counter | |
import re | |
# Get the API key from environment variable | |
api_key = os.getenv("GEMINI_API_KEY") | |
if api_key is None: | |
st.error("API key not found. Please set the GEMINI_API_KEY environment variable.") | |
else: | |
# Gemini Model Initialization | |
MODEL_ID = "gemini-2.0-flash-exp" | |
genai.configure(api_key=api_key) | |
model = genai.GenerativeModel(MODEL_ID) | |
# Correct initialization of the 'chat' object | |
chat = model.start_chat() | |
st.title("π AI-Powered Document Analyzer") | |
with st.expander("π **What is this app about?**"): | |
st.write(""" | |
The **AI-Powered Document Analyzer** app is an AI-powered tool designed to help users extract valuable insights from any PDF document. | |
By leveraging **Gemini 2.0's Flash Experimental Model**, this intelligent system allows users to interactively engage with their documents, | |
making research and information retrieval more efficient. | |
""") | |
# Upload Section | |
st.header("Upload Document") | |
uploaded_file = st.file_uploader("Upload a PDF file to be analyzed", type=["pdf"]) | |
def extract_text_from_pdf(file): | |
pdf_reader = PdfReader(file) | |
return "\n".join([page.extract_text() for page in pdf_reader.pages if page.extract_text()]) | |
def extract_keywords(text, num_keywords=10): | |
words = re.findall(r'\b\w{4,}\b', text.lower()) # Extract words with 4+ letters | |
common_words = set("the and for with from this that have will are was were been has".split()) # Stop words | |
filtered_words = [word for word in words if word not in common_words] | |
most_common = Counter(filtered_words).most_common(num_keywords) | |
return [word for word, _ in most_common] | |
def generate_suggested_questions(keywords): | |
"""Generate sample questions based on extracted keywords.""" | |
questions = [] | |
for keyword in keywords: | |
questions.append(f"What is the significance of {keyword} in the document?") | |
questions.append(f"Can you summarize the document's section on {keyword}?") | |
return questions | |
if uploaded_file: | |
document_text = extract_text_from_pdf(uploaded_file) | |
st.session_state["document_text"] = document_text | |
st.success("Document uploaded successfully!") | |
# Display Keyword Insights | |
st.header("π Key Topic Insights") | |
keywords = extract_keywords(document_text) | |
st.write(", ".join(keywords)) | |
# Generate Suggested Questions | |
st.session_state["suggested_questions"] = generate_suggested_questions(keywords) | |
else: | |
st.session_state.pop("document_text", None) # Remove document text if no file is uploaded | |
st.session_state.pop("suggested_questions", None) | |
# Question-Answering Section | |
if "document_text" in st.session_state: | |
st.header("Ask AI About Your Document") | |
# Handle the selected question from buttons | |
if "selected_question" not in st.session_state: | |
st.session_state["selected_question"] = "" | |
def ask_ai(question): | |
"""Process user question with the uploaded document.""" | |
try: | |
prompt = f"Analyze the following document and answer: {question}\n\nDocument Content:\n{st.session_state['document_text'][:5000]}" | |
response = chat.send_message(prompt) # Sending the message to 'chat' | |
return response.text | |
except Exception as e: | |
return f"Error: {e}" | |
# Text input for entering a question | |
selected_question = st.text_input( | |
"Enter your question about the document contents:", | |
value=st.session_state["selected_question"] | |
) | |
# Suggested Questions Section (between input and button) | |
if "suggested_questions" in st.session_state: | |
st.write("π‘ **Suggested Questions:**") | |
# Limit to 5 questions | |
limited_suggested_questions = st.session_state["suggested_questions"][:5] | |
num_columns = len(limited_suggested_questions) | |
# Display in a row with smaller text | |
cols = st.columns(num_columns) | |
for i, question in enumerate(limited_suggested_questions): | |
with cols[i]: | |
if st.button(f"πΉ {question}", key=f"btn_{i}"): | |
st.session_state["selected_question"] = question | |
# Generate Answer Button | |
if st.button("Generate Answer") and selected_question: | |
with st.spinner("AI is reading the document..."): | |
response = ask_ai(selected_question) | |
st.markdown(f"**Response:** \n {response}") | |
else: | |
st.warning("Please upload a document to proceed.") | |