|
import streamlit as st |
|
from base64 import b64encode |
|
import os |
|
from dotenv import load_dotenv |
|
from io import BytesIO |
|
from PyPDF2 import PdfReader |
|
from langchain_community.document_loaders import PyPDFLoader, UnstructuredPDFLoader, OnlinePDFLoader |
|
from tempfile import NamedTemporaryFile |
|
|
|
import google.generativeai as genai |
|
|
|
load_dotenv() |
|
|
|
|
|
genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) |
|
|
|
def prepare_prompt(question, context): |
|
if context is None: |
|
return "Please upload a PDF first." |
|
|
|
prompt= f""" |
|
You are an expert in analyzing the context and providing accurate and comprehensive answers based on the context. |
|
Use the context prvided below and answer comprehensively to the question at the end. |
|
|
|
Context: {context} |
|
|
|
Question:{question} |
|
|
|
""" |
|
|
|
return prompt |
|
|
|
|
|
|
|
def get_gemini_response(prompt): |
|
print(prompt) |
|
|
|
model=genai.GenerativeModel('gemini-pro') |
|
response=model.generate_content(prompt) |
|
return response.text |
|
|
|
def extract_text(uploaded_file): |
|
"""Extracts text from each page of a PDF using fitz. |
|
|
|
Args: |
|
pdf_bytes (bytes): The PDF content in bytes format. |
|
|
|
Returns: |
|
list: A list containing the extracted text from each page. |
|
""" |
|
|
|
pages = [] |
|
if isinstance(uploaded_file, str): |
|
loader = OnlinePDFLoader(uploaded_file) |
|
print("Fetching Url") |
|
else: |
|
pdf_reader = PdfReader(uploaded_file) |
|
|
|
pages = [] |
|
for page in pdf_reader.pages: |
|
pages.append(page.extract_text()) |
|
|
|
st.session_state["text"] = text |
|
|
|
|
|
return pages |
|
|
|
|
|
st.set_page_config(page_title="Waiwoph App", layout="wide") |
|
|
|
|
|
st.title("Talk to your files") |
|
st.write("Upload a PDF document and enter your questions.") |
|
|
|
uploaded_file = st.file_uploader("Choose a PDF file:", type="pdf") |
|
text = None |
|
convo="" |
|
if st.session_state.get("convo") is not None: |
|
convo=st.session_state.get("convo") |
|
if uploaded_file is not None: |
|
text = extract_text(uploaded_file) |
|
st.success("PDF uploaded successfully!") |
|
|
|
if text is not None: |
|
questions = st.text_input("Ask Your Questions:") |
|
answer_button = st.button("Ask", key="find_answers_button") |
|
|
|
if answer_button: |
|
is_processing = False |
|
for question in questions.splitlines(): |
|
if not is_processing: |
|
|
|
is_processing = True |
|
with st.spinner("Processing..."): |
|
prompt=prepare_prompt(question.strip(),text) |
|
response=get_gemini_response(prompt) |
|
convo=f'''{convo} \n\n**User:** {question} \n**Waiwoph:** {response}''' |
|
st.write(convo) |
|
st.session_state["convo"] = convo |
|
|
|
questions = "" |
|
|
|
is_processing = False |
|
|
|
|
|
|
|
|
|
if answer_button: |
|
for question in questions.splitlines(): |
|
prompt=prepare_prompt(question.strip(),text) |
|
response=get_gemini_response(prompt) |
|
convo=f'''{convo} \n**User:** {question} \n**Waiwoph:** {response}''' |
|
st.write(convo) |
|
st.session_state["convo"] = convo |
|
print(convo) |
|
|
|
|
|
|