|
import streamlit as st |
|
from base64 import b64encode |
|
import os |
|
from dotenv import load_dotenv |
|
from io import BytesIO |
|
from PyPDF2 import PdfReader |
|
from langchain_community.document_loaders import PyPDFLoader, UnstructuredPDFLoader, OnlinePDFLoader |
|
from tempfile import NamedTemporaryFile |
|
|
|
import google.generativeai as genai |
|
|
|
load_dotenv() |
|
|
|
|
|
genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) |
|
|
|
def prepare_prompt(question, context): |
|
prompt= f""" |
|
You are an expert in analyzing the context and providing accurate and comprehensive answers based on the context. |
|
Use the context prvided below and answer comprehensively to the question at the end. |
|
|
|
Context: {context} |
|
|
|
Question:{question} |
|
|
|
""" |
|
|
|
return prompt |
|
|
|
|
|
|
|
def get_gemini_response(prompt): |
|
print(prompt) |
|
model=genai.GenerativeModel('gemini-pro') |
|
response=model.generate_content(prompt) |
|
return response.text |
|
|
|
def extract_text(uploaded_file): |
|
"""Extracts text from each page of a PDF using fitz. |
|
|
|
Args: |
|
pdf_bytes (bytes): The PDF content in bytes format. |
|
|
|
Returns: |
|
list: A list containing the extracted text from each page. |
|
""" |
|
|
|
pages = [] |
|
if isinstance(uploaded_file, str): |
|
loader = OnlinePDFLoader(uploaded_file) |
|
print("Fetching Url") |
|
else: |
|
pdf_reader = PdfReader(uploaded_file) |
|
|
|
pages = [] |
|
for page in pdf_reader.pages: |
|
pages.append(page.extract_text()) |
|
|
|
print(pages) |
|
return pages |
|
|
|
|
|
def display_pdf(pdf_data,col): |
|
"""Displays the PDF using base64 encoding and an iframe.""" |
|
if pdf_data is not None: |
|
base64_pdf = b64encode(pdf_data).decode('utf-8') |
|
pdf_display = F'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600px" type="application/pdf"></iframe>' |
|
with col: |
|
st.markdown(pdf_display, unsafe_allow_html=True) |
|
|
|
def main(): |
|
"""Streamlit app with PDF viewer and chat window in a two-row layout.""" |
|
st.set_page_config(page_title="PDF Viewer with Chat", page_icon="") |
|
|
|
st.title("Upload or Enter a URL to View a PDF and Chat") |
|
|
|
extracted_text="" |
|
|
|
st.markdown(""" |
|
<style> |
|
body {{ margin: 0; padding: 0; }} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
col1_1, col1_2 = st.columns(2) |
|
with col1_1: |
|
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") |
|
with col1_2: |
|
pdf_url = st.text_input("Enter a PDF URL (optional)") |
|
|
|
|
|
col2_1, col2_2 = st.columns([1, 1]) |
|
|
|
|
|
with col2_2: |
|
chat_container = st.empty() |
|
chat_history = st.empty() |
|
|
|
def show_chat(extracted_text): |
|
chat_container.empty() |
|
with chat_container: |
|
|
|
with col2_2: |
|
user_input = st.text_input("Type your query...", key="chat_input") |
|
|
|
|
|
|
|
|
|
|
|
|
|
if user_input: |
|
|
|
|
|
|
|
prompt=prepare_prompt(user_input,extracted_text) |
|
res=get_gemini_response(prompt) |
|
with col2_2: |
|
chat_history.text_area(f"""User: {user_input}\nAnswer: {res}""") |
|
user_input.text_input("") |
|
|
|
if uploaded_file is not None: |
|
pdf_data = uploaded_file.getvalue() |
|
|
|
extracted_text = extract_text(uploaded_file) |
|
show_chat(extracted_text) |
|
|
|
|
|
|
|
elif pdf_url: |
|
try: |
|
import requests |
|
response = requests.get(pdf_url, stream=True) |
|
if response.status_code == 200: |
|
pdf_data = response.content |
|
|
|
extracted_text = extract_text(pdf_url) |
|
show_chat(extracted_text) |
|
else: |
|
st.error(f"Error: Failed to fetch PDF from URL. Status code: {response.status_code}") |
|
except Exception as e: |
|
st.error(f"Error: An error occurred while fetching PDF from URL: {e}") |
|
|
|
if __name__ == '__main__': |
|
main() |
|
|