File size: 4,898 Bytes
12ffdf7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import os
import google.generativeai as genai
import streamlit as st
from PyPDF2 import PdfReader
from collections import Counter
import re

# Get the API key from environment variable
api_key = os.getenv("GEMINI_API_KEY")

if api_key is None:
    st.error("API key not found. Please set the GEMINI_API_KEY environment variable.")
else:
    # Gemini Model Initialization
    MODEL_ID = "gemini-2.0-flash-exp"
    genai.configure(api_key=api_key)
    model = genai.GenerativeModel(MODEL_ID)

    # Correct initialization of the 'chat' object
    chat = model.start_chat()

    st.title("πŸ“š AI-Powered Document Analyzer")

    with st.expander("πŸ“– **What is this app about?**"):
        st.write("""
        The **AI-Powered Document Analyzer** app is an AI-powered tool designed to help users extract valuable insights from any PDF document. 
        By leveraging **Gemini 2.0's Flash Experimental Model**, this intelligent system allows users to interactively engage with their documents, 
        making research and information retrieval more efficient.
        """)

    # Upload Section
    st.header("Upload Document")
    uploaded_file = st.file_uploader("Upload a PDF file to be analyzed", type=["pdf"])

    def extract_text_from_pdf(file):
        pdf_reader = PdfReader(file)
        return "\n".join([page.extract_text() for page in pdf_reader.pages if page.extract_text()])

    def extract_keywords(text, num_keywords=10):
        words = re.findall(r'\b\w{4,}\b', text.lower())  # Extract words with 4+ letters
        common_words = set("the and for with from this that have will are was were been has".split())  # Stop words
        filtered_words = [word for word in words if word not in common_words]
        most_common = Counter(filtered_words).most_common(num_keywords)
        return [word for word, _ in most_common]

    def generate_suggested_questions(keywords):
        """Generate sample questions based on extracted keywords."""
        questions = []
        for keyword in keywords:
            questions.append(f"What is the significance of {keyword} in the document?")
            questions.append(f"Can you summarize the document's section on {keyword}?")
        return questions

    if uploaded_file:
        document_text = extract_text_from_pdf(uploaded_file)
        st.session_state["document_text"] = document_text
        st.success("Document uploaded successfully!")
        
        # Display Keyword Insights
        st.header("πŸ”‘ Key Topic Insights")
        keywords = extract_keywords(document_text)
        st.write(", ".join(keywords))
        
        # Generate Suggested Questions
        st.session_state["suggested_questions"] = generate_suggested_questions(keywords)
    else:
        st.session_state.pop("document_text", None)  # Remove document text if no file is uploaded
        st.session_state.pop("suggested_questions", None)

    # Question-Answering Section
    if "document_text" in st.session_state:
        st.header("Ask AI About Your Document")

        # Handle the selected question from buttons
        if "selected_question" not in st.session_state:
            st.session_state["selected_question"] = ""

        def ask_ai(question):
            """Process user question with the uploaded document."""
            try:
                prompt = f"Analyze the following document and answer: {question}\n\nDocument Content:\n{st.session_state['document_text'][:5000]}"
                response = chat.send_message(prompt)  # Sending the message to 'chat'
                return response.text
            except Exception as e:
                return f"Error: {e}"

        # Text input for entering a question
        selected_question = st.text_input(
            "Enter your question about the document contents:",
            value=st.session_state["selected_question"]
        )

        # Suggested Questions Section (between input and button)
        if "suggested_questions" in st.session_state:
            st.write("πŸ’‘ **Suggested Questions:**")

            # Limit to 5 questions
            limited_suggested_questions = st.session_state["suggested_questions"][:5]
            num_columns = len(limited_suggested_questions)

            # Display in a row with smaller text
            cols = st.columns(num_columns)
            for i, question in enumerate(limited_suggested_questions):
                with cols[i]:
                    if st.button(f"πŸ”Ή {question}", key=f"btn_{i}"):
                        st.session_state["selected_question"] = question

        # Generate Answer Button
        if st.button("Generate Answer") and selected_question:
            with st.spinner("AI is reading the document..."):
                response = ask_ai(selected_question)
                st.markdown(f"**Response:** \n {response}")
    else:
        st.warning("Please upload a document to proceed.")