Spaces:

imkhan107
/

waiwoph

Runtime error

App Files Files Community

imkhan107 commited on Apr 17, 2024

Commit

f9d300b

1 Parent(s): d139ac8

initial commit

Browse files

Files changed (3) hide show

app.py +117 -0
app_2.py +145 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import streamlit as st
+from base64 import b64encode
+import os
+from dotenv import load_dotenv
+from io import BytesIO
+from PyPDF2 import PdfReader
+from langchain_community.document_loaders import PyPDFLoader, UnstructuredPDFLoader, OnlinePDFLoader
+from tempfile import NamedTemporaryFile
+import google.generativeai as genai
+load_dotenv() ## load all the environemnt variables
+## Configure Genai Key
+genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
+def prepare_prompt(question, context):
+    if context is None:
+        return "Please upload a PDF first."
+    prompt= f"""
+    You are an expert in analyzing the context and providing accurate and comprehensive answers based on the context.
+    Use the context prvided below and answer comprehensively to the question at the end.
+    Context: {context}
+    Question:{question}
+    """
+    return prompt
+def get_gemini_response(prompt):
+    print(prompt)
+    model=genai.GenerativeModel('gemini-pro')
+    response=model.generate_content(prompt)
+    return response.text
+def extract_text(uploaded_file):
+    """Extracts text from each page of a PDF using fitz.
+    Args:
+        pdf_bytes (bytes): The PDF content in bytes format.
+    Returns:
+        list: A list containing the extracted text from each page.
+    """
+    pages = []
+    if isinstance(uploaded_file, str):  # Handle URL case
+        loader = OnlinePDFLoader(uploaded_file)
+        print("Fetching Url")
+    else:
+        pdf_reader = PdfReader(uploaded_file)  # Handle uploaded file case
+    pages = []
+    for page in pdf_reader.pages:
+        pages.append(page.extract_text())
+    st.session_state["text"] = text
+    return pages
+st.set_page_config(page_title="Waiwoph  App", layout="wide")
+# Beautiful interface elements
+st.title("Talk to your files")
+st.write("Upload a PDF document and enter your questions.")
+uploaded_file = st.file_uploader("Choose a PDF file:", type="pdf")
+text = None  # Initialize text to store extracted content
+convo=""
+if st.session_state.get("convo") is not None:
+    convo=st.session_state.get("convo")
+if uploaded_file is not None:
+    text = extract_text(uploaded_file)
+    st.success("PDF uploaded successfully!")
+if text is not None:
+    questions = st.text_input("Ask Your Questions:")
+    answer_button = st.button("Ask", key="find_answers_button")  # Initially enabled
+    if answer_button:
+        is_processing = False  # Flag to track processing state
+        for question in questions.splitlines():
+            if not is_processing:
+                #answer_button.disabled = True  # Disable button before processing
+                is_processing = True
+            with st.spinner("Processing..."):
+                prompt=prepare_prompt(question.strip(),text)
+                response=get_gemini_response(prompt)
+                convo=f'''{convo}  \n\n**User:** {question}  \n**Waiwoph:** {response}'''
+                st.write(convo)
+                st.session_state["convo"] = convo
+            # Clear question text area after each response
+            questions = ""
+            #st.text_input("Ask Your Questions:", value=questions)  # Clear questions
+        is_processing = False  # Reset processing flag
+        #answer_button.disabled = False  # Re-enable button after all processed
+    if answer_button:
+        for question in questions.splitlines():  # Split questions at line breaks
+            prompt=prepare_prompt(question.strip(),text)
+            response=get_gemini_response(prompt)
+            convo=f'''{convo}  \n**User:** {question}  \n**Waiwoph:** {response}'''
+            st.write(convo)
+            st.session_state["convo"] = convo
+            print(convo)
+            #st.write("**Answer:**",response)
+            #st.text_area("Search Result", response, height=500)

app_2.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import streamlit as st
+from base64 import b64encode
+import os
+from dotenv import load_dotenv
+from io import BytesIO
+from PyPDF2 import PdfReader
+from langchain_community.document_loaders import PyPDFLoader, UnstructuredPDFLoader, OnlinePDFLoader
+from tempfile import NamedTemporaryFile
+import google.generativeai as genai
+load_dotenv() ## load all the environemnt variables
+## Configure Genai Key
+genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
+def prepare_prompt(question, context):
+    prompt= f"""
+    You are an expert in analyzing the context and providing accurate and comprehensive answers based on the context.
+    Use the context prvided below and answer comprehensively to the question at the end.
+    Context: {context}
+    Question:{question}
+    """
+    return prompt
+def get_gemini_response(prompt):
+    print(prompt)
+    model=genai.GenerativeModel('gemini-pro')
+    response=model.generate_content(prompt)
+    return response.text
+def extract_text(uploaded_file):
+    """Extracts text from each page of a PDF using fitz.
+    Args:
+        pdf_bytes (bytes): The PDF content in bytes format.
+    Returns:
+        list: A list containing the extracted text from each page.
+    """
+    pages = []
+    if isinstance(uploaded_file, str):  # Handle URL case
+        loader = OnlinePDFLoader(uploaded_file)
+        print("Fetching Url")
+    else:
+        pdf_reader = PdfReader(uploaded_file)  # Handle uploaded file case
+    pages = []
+    for page in pdf_reader.pages:
+        pages.append(page.extract_text())
+    print(pages)
+    return pages
+def display_pdf(pdf_data,col):
+    """Displays the PDF using base64 encoding and an iframe."""
+    if pdf_data is not None:
+        base64_pdf = b64encode(pdf_data).decode('utf-8')
+        pdf_display = F'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600px" type="application/pdf"></iframe>'
+        with col:
+            st.markdown(pdf_display, unsafe_allow_html=True)
+def main():
+    """Streamlit app with PDF viewer and chat window in a two-row layout."""
+    st.set_page_config(page_title="PDF Viewer with Chat", page_icon="")
+    st.title("Upload or Enter a URL to View a PDF and Chat")
+    extracted_text=""
+    # Full page width layout
+    st.markdown("""
+    <style>
+    body {{ margin: 0; padding: 0; }}
+    </style>
+    """, unsafe_allow_html=True)
+    # Row 1 for file upload and URL
+    col1_1, col1_2 = st.columns(2)
+    with col1_1:
+        uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
+    with col1_2:
+        pdf_url = st.text_input("Enter a PDF URL (optional)")
+    # Row 2 for PDF viewer and chat
+    col2_1, col2_2 = st.columns([1, 1])  # 66.6% for PDF viewer, 33.3% for chat
+    # Chat window (right column, hidden initially)
+    with col2_2:
+        chat_container = st.empty()  # Placeholder for chat area
+        chat_history = st.empty()  # Placeholder for chat history (optional)
+    def show_chat(extracted_text):
+        chat_container.empty()  # Clear previous chat elements
+        with chat_container:
+            # User input for chat
+            with col2_2:
+                user_input = st.text_input("Type your query...", key="chat_input")
+            # Send button (optional)
+            # You can add a button here to trigger actions based on user input
+            # send_button = st.button("Send")
+        # Update chat history (optional)
+        if user_input:
+            # Here, you can process the user input and update the chat history
+            # You can use a DataFrame (like `chat_history`) to store and display messages
+            # This example just shows a placeholder for future implementation
+            prompt=prepare_prompt(user_input,extracted_text)
+            res=get_gemini_response(prompt)
+            with col2_2:
+                chat_history.text_area(f"""User: {user_input}\nAnswer: {res}""")
+                user_input.text_input("")
+    if uploaded_file is not None:
+        pdf_data = uploaded_file.getvalue()
+        #display_pdf(pdf_data,col2_1)
+        extracted_text = extract_text(uploaded_file)
+        show_chat(extracted_text)  # Show chat window only after upload
+        #chat_history.text("User: " + user_input)
+    elif pdf_url:
+        try:
+            import requests
+            response = requests.get(pdf_url, stream=True)
+            if response.status_code == 200:
+                pdf_data = response.content
+                #display_pdf(pdf_data,col2_1)
+                extracted_text = extract_text(pdf_url)
+                show_chat(extracted_text)  # Show chat window only after successful URL fetch
+            else:
+                st.error(f"Error: Failed to fetch PDF from URL. Status code: {response.status_code}")
+        except Exception as e:
+            st.error(f"Error: An error occurred while fetching PDF from URL: {e}")
+if __name__ == '__main__':
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+streamlit
+google-generativeai
+python-dotenv
+langchain
+unstructured