Spaces:

chiichann
/

pdf_document_analyzer

Sleeping

App Files Files Community

pdf_document_analyzer / app.py

chiichann

first sync with remote code

12ffdf7 2 months ago

raw

history blame contribute delete

4.9 kB

	import os
	import google.generativeai as genai
	import streamlit as st
	from PyPDF2 import PdfReader
	from collections import Counter
	import re

	# Get the API key from environment variable
	api_key = os.getenv("GEMINI_API_KEY")

	if api_key is None:
	st.error("API key not found. Please set the GEMINI_API_KEY environment variable.")
	else:
	# Gemini Model Initialization
	MODEL_ID = "gemini-2.0-flash-exp"
	genai.configure(api_key=api_key)
	model = genai.GenerativeModel(MODEL_ID)

	# Correct initialization of the 'chat' object
	chat = model.start_chat()

	st.title("📚 AI-Powered Document Analyzer")

	with st.expander("📖 What is this app about?"):
	st.write("""
	The AI-Powered Document Analyzer app is an AI-powered tool designed to help users extract valuable insights from any PDF document.
	By leveraging Gemini 2.0's Flash Experimental Model, this intelligent system allows users to interactively engage with their documents,
	making research and information retrieval more efficient.
	""")

	# Upload Section
	st.header("Upload Document")
	uploaded_file = st.file_uploader("Upload a PDF file to be analyzed", type=["pdf"])

	def extract_text_from_pdf(file):
	pdf_reader = PdfReader(file)
	return "\n".join([page.extract_text() for page in pdf_reader.pages if page.extract_text()])

	def extract_keywords(text, num_keywords=10):
	words = re.findall(r'\b\w{4,}\b', text.lower()) # Extract words with 4+ letters
	common_words = set("the and for with from this that have will are was were been has".split()) # Stop words
	filtered_words = [word for word in words if word not in common_words]
	most_common = Counter(filtered_words).most_common(num_keywords)
	return [word for word, _ in most_common]

	def generate_suggested_questions(keywords):
	"""Generate sample questions based on extracted keywords."""
	questions = []
	for keyword in keywords:
	questions.append(f"What is the significance of {keyword} in the document?")
	questions.append(f"Can you summarize the document's section on {keyword}?")
	return questions

	if uploaded_file:
	document_text = extract_text_from_pdf(uploaded_file)
	st.session_state["document_text"] = document_text
	st.success("Document uploaded successfully!")

	# Display Keyword Insights
	st.header("🔑 Key Topic Insights")
	keywords = extract_keywords(document_text)
	st.write(", ".join(keywords))

	# Generate Suggested Questions
	st.session_state["suggested_questions"] = generate_suggested_questions(keywords)
	else:
	st.session_state.pop("document_text", None) # Remove document text if no file is uploaded
	st.session_state.pop("suggested_questions", None)

	# Question-Answering Section
	if "document_text" in st.session_state:
	st.header("Ask AI About Your Document")

	# Handle the selected question from buttons
	if "selected_question" not in st.session_state:
	st.session_state["selected_question"] = ""

	def ask_ai(question):
	"""Process user question with the uploaded document."""
	try:
	prompt = f"Analyze the following document and answer: {question}\n\nDocument Content:\n{st.session_state['document_text'][:5000]}"
	response = chat.send_message(prompt) # Sending the message to 'chat'
	return response.text
	except Exception as e:
	return f"Error: {e}"

	# Text input for entering a question
	selected_question = st.text_input(
	"Enter your question about the document contents:",
	value=st.session_state["selected_question"]
	)

	# Suggested Questions Section (between input and button)
	if "suggested_questions" in st.session_state:
	st.write("💡 Suggested Questions:")

	# Limit to 5 questions
	limited_suggested_questions = st.session_state["suggested_questions"][:5]
	num_columns = len(limited_suggested_questions)

	# Display in a row with smaller text
	cols = st.columns(num_columns)
	for i, question in enumerate(limited_suggested_questions):
	with cols[i]:
	if st.button(f"🔹 {question}", key=f"btn_{i}"):
	st.session_state["selected_question"] = question

	# Generate Answer Button
	if st.button("Generate Answer") and selected_question:
	with st.spinner("AI is reading the document..."):
	response = ask_ai(selected_question)
	st.markdown(f"Response: \n {response}")
	else:
	st.warning("Please upload a document to proceed.")