Spaces:

HikmatUllah
/

Chat_with_your_PDFs

Sleeping

App Files Files Community

HikmatUllah commited on Oct 8, 2024

Commit

a1016ff

verified ·

1 Parent(s): 563e2b3

Upload 2 files

Browse files

Files changed (2) hide show

main.py +288 -0
requirements.txt +8 -0

main.py ADDED Viewed

	@@ -0,0 +1,288 @@

+# import streamlit as st
+# from PyPDF2 import PdfReader
+# from langchain.text_splitter import RecursiveCharacterTextSplitter
+# import os
+# from langchain_google_genai import GoogleGenerativeAIEmbeddings # we will use googe embiddings
+# import google.generativeai as genai
+# from langchain_community.vectorstores import FAISS # vectorstore
+# from langchain_google_genai import ChatGoogleGenerativeAI
+# from langchain.chains.question_answering import load_qa_chain
+# from langchain.prompts import PromptTemplate
+# from dotenv import load_dotenv
+# load_dotenv()
+# os.getenv("GOOGLE_API_KEY")
+# genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
+# #read pdf
+# def get_pdf_text(pdf_doc):
+#     text=""
+#     for pdf in pdf_doc:
+#         pdf_reader = PdfReader(pdf)
+#         for page in pdf_reader.pages:
+#             text+=page.extract_text()
+#     return text
+# # convert pdf into chunks
+# def get_text_chunks(text):
+#     text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
+#     chunks = text_splitter.split_text(text)
+#     return chunks
+# #convert into vectors
+# def get_vector_store(text_chunks):
+#     embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") # embedding model from huggingface and its free
+#     vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
+#     vector_store.save_local("faiss_index") #im storing it in loca
+# def get_conversational_chain():
+#     prompt_template = """
+#     Answer the question as detailed as possible from the provided context, make sure to provide all details, if the answer is not
+#     availabe in the provided context" , don't provide the wrong answer and say sorry there is no such information about that\n\n
+#      context:\n{context}?\n
+#      Question:\n{question}\n
+#      Answer:
+#     """
+#     model=ChatGoogleGenerativeAI(model="gemini-pro" , temperature=0.3)
+#     prompt = PromptTemplate(template=prompt_template, input_variables=["context","question"])
+#     chain = load_qa_chain(model , chain_type="stuff", prompt=prompt)
+#     return chain
+# def user_input(user_query):
+#     embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
+#     new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
+#     docs = new_db.similarity_search(user_query)
+#     chain = get_conversational_chain()
+#     response = chain(
+#         {"input_documents":docs, "question": user_query},
+#         return_only_outputs=True
+#     )
+#     print(response)
+#     st.write("reply: ", response["output_text"])
+# def main():
+#     st.set_page_config("Ask your PDFs")
+#     st.header("Chat with your PDFs")
+#     user_question = st.text_input("Ask any question from your PDFs")
+#     if user_question:
+#         user_input(user_question)
+#     with st.sidebar:
+#         st.title("Menu")
+#         pdf_docs = st.file_uploader("Upload your PDF files" , type=['pdf'], accept_multiple_files=True)
+#         if st.button("Submit & Process"):
+#             if pdf_docs:
+#                 with st.spinner("Processing..."):
+#                     raw_text = get_pdf_text(pdf_docs)
+#                     text_chunks = get_text_chunks(raw_text)
+#                     get_vector_store(text_chunks)
+#                     st.success("Done")
+#             else:
+#                 st.warning("Please upload PDF files before processing.")
+# if __name__ == "__main__":
+#     main()
+#------------------------- 1 ----------------------------
+import streamlit as st
+from PyPDF2 import PdfReader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+import os
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+import google.generativeai as genai
+from langchain_community.vectorstores import FAISS
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain.chains.question_answering import load_qa_chain
+from langchain.prompts import PromptTemplate
+from dotenv import load_dotenv
+from datetime import datetime
+load_dotenv()
+genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
+# Define a conversational chain for answering questions
+def get_conversational_chain():
+    prompt_template = """
+    Answer the question as detailed as possible from the provided context. If the answer is not available, say
+    "Sorry, no information is available on this topic in the context".\n\n
+    Context:\n{context}?\n
+    Question:\n{question}\n
+    Answer:
+    """
+    model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
+    prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
+    chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
+    return chain
+# Convert pdf text into chunks
+def get_text_chunks(text):
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
+    chunks = text_splitter.split_text(text)
+    return chunks
+# Convert chunks into vector embeddings
+def get_vector_store(text_chunks):
+    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
+    vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
+    vector_store.save_local("faiss_index")
+# Read pdf function
+def get_pdf_text(pdf_docs):
+    text = ""
+    for pdf in pdf_docs:
+        pdf_reader = PdfReader(pdf)
+        for page in pdf_reader.pages:
+            text += page.extract_text() or ""  # Handle None returns
+    return text
+# Function to process user input and return bot response
+def user_input(user_query):
+    try:
+        embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
+        new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
+        docs = new_db.similarity_search(user_query)
+        if not docs:
+            return {"output_text": "Sorry, no relevant documents found."}  # Handle case with no results
+        chain = get_conversational_chain()
+        response = chain({"input_documents": docs, "question": user_query}, return_only_outputs=True)
+        return response
+    except Exception as e:
+        return {"output_text": f"Error processing your request: {str(e)}"}
+# UI layout and styles for the chat interface
+st.set_page_config(page_title="Ask your PDFs", layout="centered")
+st.markdown("""
+    <style>
+    .chat-container {
+        max-width: 600px;
+        margin: 0 auto;
+    }
+    .user-message {
+        background-color: #DCF8C6;
+        padding: 10px;
+        border-radius: 10px;
+        margin-bottom: 5px;
+        text-align: left;
+    }
+    .bot-message {
+        background-color: #E5E5EA;
+        padding: 10px;
+        border-radius: 10px;
+        margin-bottom: 5px;
+        text-align: left;
+        white-space: pre-wrap;
+    }
+    .role {
+        font-weight: bold;
+        margin-top: 10px;
+    }
+    .timestamp {
+        font-size: 12px;
+        color: gray;
+        margin-bottom: 10px;
+    }
+    .fixed-bottom {
+        position: fixed;
+        bottom: 0;
+        left: 0;
+        right: 0;
+        background-color: white;
+        padding: 10px;
+        box-shadow: 0 -2px 5px rgba(0, 0, 0, 0.2);
+    }
+    .chat-history {
+        max-height: 80vh;  /* Limit height of chat history */
+        overflow-y: auto;  /* Enable scrolling */
+        margin-bottom: 60px;  /* Space for the input field */
+    }
+    .header {
+        text-align: center;
+        margin: 20px 0; /* Add margin for spacing */
+    }
+    </style>
+    """, unsafe_allow_html=True)
+# Initialize session state for chat history
+if 'chat_history' not in st.session_state:
+    st.session_state['chat_history'] = []
+# Centered header
+st.markdown('<h1 class="header">📄 Chat with your PDFs</h1>', unsafe_allow_html=True)
+# Sidebar for PDF uploads
+with st.sidebar:
+    st.title("Upload PDFs")
+    pdf_docs = st.file_uploader("Upload your PDF files", type=['pdf'], accept_multiple_files=True)
+    if st.button("Submit & Process"):
+        if pdf_docs:
+            with st.spinner("Processing..."):
+                try:
+                    raw_text = get_pdf_text(pdf_docs)
+                    text_chunks = get_text_chunks(raw_text)
+                    get_vector_store(text_chunks)
+                    st.success("Processing complete! You can start asking questions.")
+                except Exception as e:
+                    st.error(f"Error processing PDF files: {e}")
+        else:
+            st.warning("Please upload PDF files before processing.")
+# Display chat history
+chat_history_container = st.container()
+with chat_history_container:
+    st.markdown('<div class="chat-history">', unsafe_allow_html=True)  # Add scrollable container for chat history
+    for role, text, timestamp in st.session_state['chat_history']:
+        if role == "You":
+            st.markdown(f'<div class="chat-container"><div class="role">You</div><div class="user-message">{text}</div><div class="timestamp">{timestamp}</div></div>', unsafe_allow_html=True)
+        else:
+            st.markdown(f'<div class="chat-container"><div class="role">Bot</div><div class="bot-message">{text}</div><div class="timestamp">{timestamp}</div></div>', unsafe_allow_html=True)
+    st.markdown('</div>', unsafe_allow_html=True)  # Close scrollable container
+# Input field at the bottom for user question
+input_container = st.container()
+with input_container:
+    st.markdown('<div class="fixed-bottom">', unsafe_allow_html=True)
+    input_text = st.text_input("Ask your PDF a question:", value="", key="input_text")
+    submit = st.button("Send")
+    st.markdown('</div>', unsafe_allow_html=True)
+# Handle user input and bot response
+if submit and input_text:
+    now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    st.session_state['chat_history'].append(("You", input_text, now))
+    # Display placeholder
+    st.session_state['chat_history'].append(("Bot", "Analyzing Input...", now))
+    # Get response from user_input function
+    response = user_input(input_text)
+    # Get the bot's response
+    bot_response = response.get("output_text", "Sorry, something went wrong.")
+    # Remove the placeholder and add bot response
+    st.session_state['chat_history'][-1] = ("Bot", bot_response, now)  # Replace the last placeholder with the actual response
+# Display the updated chat history again
+with chat_history_container:
+    st.markdown('<div class="chat-history">', unsafe_allow_html=True)  # Add scrollable container for chat history
+    for role, text, timestamp in st.session_state['chat_history']:
+        if role == "You":
+            st.markdown(f'<div class="chat-container"><div class="role">You</div><div class="user-message">{text}</div><div class="timestamp">{timestamp}</div></div>', unsafe_allow_html=True)
+        else:
+            st.markdown(f'<div class="chat-container"><div class="role">Bot</div><div class="bot-message">{text}</div><div class="timestamp">{timestamp}</div></div>', unsafe_allow_html=True)
+    st.markdown('</div>', unsafe_allow_html=True)  # Close scrollable container

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+streamlit
+# google-genrativeai
+python-dotenv
+langchain
+PyPDF2
+faiss-cpu
+langchain_google_genai
+langchain-community