chiichann commited on
Commit
12ffdf7
Β·
1 Parent(s): 93c282f

first sync with remote code

Browse files
Files changed (2) hide show
  1. app.py +114 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import google.generativeai as genai
3
+ import streamlit as st
4
+ from PyPDF2 import PdfReader
5
+ from collections import Counter
6
+ import re
7
+
8
+ # Get the API key from environment variable
9
+ api_key = os.getenv("GEMINI_API_KEY")
10
+
11
+ if api_key is None:
12
+ st.error("API key not found. Please set the GEMINI_API_KEY environment variable.")
13
+ else:
14
+ # Gemini Model Initialization
15
+ MODEL_ID = "gemini-2.0-flash-exp"
16
+ genai.configure(api_key=api_key)
17
+ model = genai.GenerativeModel(MODEL_ID)
18
+
19
+ # Correct initialization of the 'chat' object
20
+ chat = model.start_chat()
21
+
22
+ st.title("πŸ“š AI-Powered Document Analyzer")
23
+
24
+ with st.expander("πŸ“– **What is this app about?**"):
25
+ st.write("""
26
+ The **AI-Powered Document Analyzer** app is an AI-powered tool designed to help users extract valuable insights from any PDF document.
27
+ By leveraging **Gemini 2.0's Flash Experimental Model**, this intelligent system allows users to interactively engage with their documents,
28
+ making research and information retrieval more efficient.
29
+ """)
30
+
31
+ # Upload Section
32
+ st.header("Upload Document")
33
+ uploaded_file = st.file_uploader("Upload a PDF file to be analyzed", type=["pdf"])
34
+
35
+ def extract_text_from_pdf(file):
36
+ pdf_reader = PdfReader(file)
37
+ return "\n".join([page.extract_text() for page in pdf_reader.pages if page.extract_text()])
38
+
39
+ def extract_keywords(text, num_keywords=10):
40
+ words = re.findall(r'\b\w{4,}\b', text.lower()) # Extract words with 4+ letters
41
+ common_words = set("the and for with from this that have will are was were been has".split()) # Stop words
42
+ filtered_words = [word for word in words if word not in common_words]
43
+ most_common = Counter(filtered_words).most_common(num_keywords)
44
+ return [word for word, _ in most_common]
45
+
46
+ def generate_suggested_questions(keywords):
47
+ """Generate sample questions based on extracted keywords."""
48
+ questions = []
49
+ for keyword in keywords:
50
+ questions.append(f"What is the significance of {keyword} in the document?")
51
+ questions.append(f"Can you summarize the document's section on {keyword}?")
52
+ return questions
53
+
54
+ if uploaded_file:
55
+ document_text = extract_text_from_pdf(uploaded_file)
56
+ st.session_state["document_text"] = document_text
57
+ st.success("Document uploaded successfully!")
58
+
59
+ # Display Keyword Insights
60
+ st.header("πŸ”‘ Key Topic Insights")
61
+ keywords = extract_keywords(document_text)
62
+ st.write(", ".join(keywords))
63
+
64
+ # Generate Suggested Questions
65
+ st.session_state["suggested_questions"] = generate_suggested_questions(keywords)
66
+ else:
67
+ st.session_state.pop("document_text", None) # Remove document text if no file is uploaded
68
+ st.session_state.pop("suggested_questions", None)
69
+
70
+ # Question-Answering Section
71
+ if "document_text" in st.session_state:
72
+ st.header("Ask AI About Your Document")
73
+
74
+ # Handle the selected question from buttons
75
+ if "selected_question" not in st.session_state:
76
+ st.session_state["selected_question"] = ""
77
+
78
+ def ask_ai(question):
79
+ """Process user question with the uploaded document."""
80
+ try:
81
+ prompt = f"Analyze the following document and answer: {question}\n\nDocument Content:\n{st.session_state['document_text'][:5000]}"
82
+ response = chat.send_message(prompt) # Sending the message to 'chat'
83
+ return response.text
84
+ except Exception as e:
85
+ return f"Error: {e}"
86
+
87
+ # Text input for entering a question
88
+ selected_question = st.text_input(
89
+ "Enter your question about the document contents:",
90
+ value=st.session_state["selected_question"]
91
+ )
92
+
93
+ # Suggested Questions Section (between input and button)
94
+ if "suggested_questions" in st.session_state:
95
+ st.write("πŸ’‘ **Suggested Questions:**")
96
+
97
+ # Limit to 5 questions
98
+ limited_suggested_questions = st.session_state["suggested_questions"][:5]
99
+ num_columns = len(limited_suggested_questions)
100
+
101
+ # Display in a row with smaller text
102
+ cols = st.columns(num_columns)
103
+ for i, question in enumerate(limited_suggested_questions):
104
+ with cols[i]:
105
+ if st.button(f"πŸ”Ή {question}", key=f"btn_{i}"):
106
+ st.session_state["selected_question"] = question
107
+
108
+ # Generate Answer Button
109
+ if st.button("Generate Answer") and selected_question:
110
+ with st.spinner("AI is reading the document..."):
111
+ response = ask_ai(selected_question)
112
+ st.markdown(f"**Response:** \n {response}")
113
+ else:
114
+ st.warning("Please upload a document to proceed.")
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ streamlit
2
+ google-generativeai
3
+ PyPDF2