HikmatUllah commited on
Commit
a1016ff
·
verified ·
1 Parent(s): 563e2b3

Upload 2 files

Browse files
Files changed (2) hide show
  1. main.py +288 -0
  2. requirements.txt +8 -0
main.py ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import streamlit as st
2
+ # from PyPDF2 import PdfReader
3
+ # from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ # import os
5
+ # from langchain_google_genai import GoogleGenerativeAIEmbeddings # we will use googe embiddings
6
+ # import google.generativeai as genai
7
+ # from langchain_community.vectorstores import FAISS # vectorstore
8
+ # from langchain_google_genai import ChatGoogleGenerativeAI
9
+ # from langchain.chains.question_answering import load_qa_chain
10
+ # from langchain.prompts import PromptTemplate
11
+ # from dotenv import load_dotenv
12
+
13
+ # load_dotenv()
14
+ # os.getenv("GOOGLE_API_KEY")
15
+ # genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
16
+
17
+ # #read pdf
18
+ # def get_pdf_text(pdf_doc):
19
+ # text=""
20
+ # for pdf in pdf_doc:
21
+ # pdf_reader = PdfReader(pdf)
22
+ # for page in pdf_reader.pages:
23
+ # text+=page.extract_text()
24
+ # return text
25
+
26
+
27
+ # # convert pdf into chunks
28
+ # def get_text_chunks(text):
29
+ # text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
30
+ # chunks = text_splitter.split_text(text)
31
+ # return chunks
32
+ # #convert into vectors
33
+ # def get_vector_store(text_chunks):
34
+ # embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") # embedding model from huggingface and its free
35
+ # vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
36
+ # vector_store.save_local("faiss_index") #im storing it in loca
37
+
38
+ # def get_conversational_chain():
39
+ # prompt_template = """
40
+ # Answer the question as detailed as possible from the provided context, make sure to provide all details, if the answer is not
41
+ # availabe in the provided context" , don't provide the wrong answer and say sorry there is no such information about that\n\n
42
+ # context:\n{context}?\n
43
+ # Question:\n{question}\n
44
+
45
+ # Answer:
46
+ # """
47
+
48
+ # model=ChatGoogleGenerativeAI(model="gemini-pro" , temperature=0.3)
49
+
50
+ # prompt = PromptTemplate(template=prompt_template, input_variables=["context","question"])
51
+ # chain = load_qa_chain(model , chain_type="stuff", prompt=prompt)
52
+
53
+ # return chain
54
+
55
+ # def user_input(user_query):
56
+ # embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
57
+
58
+ # new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
59
+ # docs = new_db.similarity_search(user_query)
60
+
61
+ # chain = get_conversational_chain()
62
+
63
+ # response = chain(
64
+ # {"input_documents":docs, "question": user_query},
65
+ # return_only_outputs=True
66
+ # )
67
+
68
+ # print(response)
69
+ # st.write("reply: ", response["output_text"])
70
+
71
+
72
+ # def main():
73
+ # st.set_page_config("Ask your PDFs")
74
+ # st.header("Chat with your PDFs")
75
+
76
+ # user_question = st.text_input("Ask any question from your PDFs")
77
+
78
+ # if user_question:
79
+ # user_input(user_question)
80
+
81
+ # with st.sidebar:
82
+ # st.title("Menu")
83
+ # pdf_docs = st.file_uploader("Upload your PDF files" , type=['pdf'], accept_multiple_files=True)
84
+ # if st.button("Submit & Process"):
85
+ # if pdf_docs:
86
+ # with st.spinner("Processing..."):
87
+ # raw_text = get_pdf_text(pdf_docs)
88
+ # text_chunks = get_text_chunks(raw_text)
89
+ # get_vector_store(text_chunks)
90
+ # st.success("Done")
91
+ # else:
92
+ # st.warning("Please upload PDF files before processing.")
93
+
94
+
95
+ # if __name__ == "__main__":
96
+ # main()
97
+
98
+ #------------------------- 1 ----------------------------
99
+ import streamlit as st
100
+ from PyPDF2 import PdfReader
101
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
102
+ import os
103
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
104
+ import google.generativeai as genai
105
+ from langchain_community.vectorstores import FAISS
106
+ from langchain_google_genai import ChatGoogleGenerativeAI
107
+ from langchain.chains.question_answering import load_qa_chain
108
+ from langchain.prompts import PromptTemplate
109
+ from dotenv import load_dotenv
110
+ from datetime import datetime
111
+
112
+ load_dotenv()
113
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
114
+
115
+ # Define a conversational chain for answering questions
116
+ def get_conversational_chain():
117
+ prompt_template = """
118
+ Answer the question as detailed as possible from the provided context. If the answer is not available, say
119
+ "Sorry, no information is available on this topic in the context".\n\n
120
+ Context:\n{context}?\n
121
+ Question:\n{question}\n
122
+ Answer:
123
+ """
124
+ model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
125
+ prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
126
+ chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
127
+ return chain
128
+
129
+ # Convert pdf text into chunks
130
+ def get_text_chunks(text):
131
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
132
+ chunks = text_splitter.split_text(text)
133
+ return chunks
134
+
135
+ # Convert chunks into vector embeddings
136
+ def get_vector_store(text_chunks):
137
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
138
+ vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
139
+ vector_store.save_local("faiss_index")
140
+
141
+ # Read pdf function
142
+ def get_pdf_text(pdf_docs):
143
+ text = ""
144
+ for pdf in pdf_docs:
145
+ pdf_reader = PdfReader(pdf)
146
+ for page in pdf_reader.pages:
147
+ text += page.extract_text() or "" # Handle None returns
148
+ return text
149
+
150
+ # Function to process user input and return bot response
151
+ def user_input(user_query):
152
+ try:
153
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
154
+ new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
155
+ docs = new_db.similarity_search(user_query)
156
+
157
+ if not docs:
158
+ return {"output_text": "Sorry, no relevant documents found."} # Handle case with no results
159
+
160
+ chain = get_conversational_chain()
161
+ response = chain({"input_documents": docs, "question": user_query}, return_only_outputs=True)
162
+
163
+ return response
164
+ except Exception as e:
165
+ return {"output_text": f"Error processing your request: {str(e)}"}
166
+
167
+ # UI layout and styles for the chat interface
168
+ st.set_page_config(page_title="Ask your PDFs", layout="centered")
169
+ st.markdown("""
170
+ <style>
171
+ .chat-container {
172
+ max-width: 600px;
173
+ margin: 0 auto;
174
+ }
175
+ .user-message {
176
+ background-color: #DCF8C6;
177
+ padding: 10px;
178
+ border-radius: 10px;
179
+ margin-bottom: 5px;
180
+ text-align: left;
181
+ }
182
+ .bot-message {
183
+ background-color: #E5E5EA;
184
+ padding: 10px;
185
+ border-radius: 10px;
186
+ margin-bottom: 5px;
187
+ text-align: left;
188
+ white-space: pre-wrap;
189
+ }
190
+ .role {
191
+ font-weight: bold;
192
+ margin-top: 10px;
193
+ }
194
+ .timestamp {
195
+ font-size: 12px;
196
+ color: gray;
197
+ margin-bottom: 10px;
198
+ }
199
+ .fixed-bottom {
200
+ position: fixed;
201
+ bottom: 0;
202
+ left: 0;
203
+ right: 0;
204
+ background-color: white;
205
+ padding: 10px;
206
+ box-shadow: 0 -2px 5px rgba(0, 0, 0, 0.2);
207
+ }
208
+ .chat-history {
209
+ max-height: 80vh; /* Limit height of chat history */
210
+ overflow-y: auto; /* Enable scrolling */
211
+ margin-bottom: 60px; /* Space for the input field */
212
+ }
213
+ .header {
214
+ text-align: center;
215
+ margin: 20px 0; /* Add margin for spacing */
216
+ }
217
+ </style>
218
+ """, unsafe_allow_html=True)
219
+
220
+ # Initialize session state for chat history
221
+ if 'chat_history' not in st.session_state:
222
+ st.session_state['chat_history'] = []
223
+
224
+ # Centered header
225
+ st.markdown('<h1 class="header">📄 Chat with your PDFs</h1>', unsafe_allow_html=True)
226
+
227
+ # Sidebar for PDF uploads
228
+ with st.sidebar:
229
+ st.title("Upload PDFs")
230
+ pdf_docs = st.file_uploader("Upload your PDF files", type=['pdf'], accept_multiple_files=True)
231
+ if st.button("Submit & Process"):
232
+ if pdf_docs:
233
+ with st.spinner("Processing..."):
234
+ try:
235
+ raw_text = get_pdf_text(pdf_docs)
236
+ text_chunks = get_text_chunks(raw_text)
237
+ get_vector_store(text_chunks)
238
+ st.success("Processing complete! You can start asking questions.")
239
+ except Exception as e:
240
+ st.error(f"Error processing PDF files: {e}")
241
+ else:
242
+ st.warning("Please upload PDF files before processing.")
243
+
244
+ # Display chat history
245
+ chat_history_container = st.container()
246
+ with chat_history_container:
247
+ st.markdown('<div class="chat-history">', unsafe_allow_html=True) # Add scrollable container for chat history
248
+ for role, text, timestamp in st.session_state['chat_history']:
249
+ if role == "You":
250
+ st.markdown(f'<div class="chat-container"><div class="role">You</div><div class="user-message">{text}</div><div class="timestamp">{timestamp}</div></div>', unsafe_allow_html=True)
251
+ else:
252
+ st.markdown(f'<div class="chat-container"><div class="role">Bot</div><div class="bot-message">{text}</div><div class="timestamp">{timestamp}</div></div>', unsafe_allow_html=True)
253
+ st.markdown('</div>', unsafe_allow_html=True) # Close scrollable container
254
+
255
+ # Input field at the bottom for user question
256
+ input_container = st.container()
257
+ with input_container:
258
+ st.markdown('<div class="fixed-bottom">', unsafe_allow_html=True)
259
+ input_text = st.text_input("Ask your PDF a question:", value="", key="input_text")
260
+ submit = st.button("Send")
261
+ st.markdown('</div>', unsafe_allow_html=True)
262
+
263
+ # Handle user input and bot response
264
+ if submit and input_text:
265
+ now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
266
+ st.session_state['chat_history'].append(("You", input_text, now))
267
+
268
+ # Display placeholder
269
+ st.session_state['chat_history'].append(("Bot", "Analyzing Input...", now))
270
+
271
+ # Get response from user_input function
272
+ response = user_input(input_text)
273
+
274
+ # Get the bot's response
275
+ bot_response = response.get("output_text", "Sorry, something went wrong.")
276
+
277
+ # Remove the placeholder and add bot response
278
+ st.session_state['chat_history'][-1] = ("Bot", bot_response, now) # Replace the last placeholder with the actual response
279
+
280
+ # Display the updated chat history again
281
+ with chat_history_container:
282
+ st.markdown('<div class="chat-history">', unsafe_allow_html=True) # Add scrollable container for chat history
283
+ for role, text, timestamp in st.session_state['chat_history']:
284
+ if role == "You":
285
+ st.markdown(f'<div class="chat-container"><div class="role">You</div><div class="user-message">{text}</div><div class="timestamp">{timestamp}</div></div>', unsafe_allow_html=True)
286
+ else:
287
+ st.markdown(f'<div class="chat-container"><div class="role">Bot</div><div class="bot-message">{text}</div><div class="timestamp">{timestamp}</div></div>', unsafe_allow_html=True)
288
+ st.markdown('</div>', unsafe_allow_html=True) # Close scrollable container
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ # google-genrativeai
3
+ python-dotenv
4
+ langchain
5
+ PyPDF2
6
+ faiss-cpu
7
+ langchain_google_genai
8
+ langchain-community