txtuhin commited on
Commit
eac8167
ยท
verified ยท
1 Parent(s): 8596555

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +326 -0
app.py ADDED
@@ -0,0 +1,326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain_core.messages import AIMessage, HumanMessage
3
+ from langchain_openai import ChatOpenAI
4
+ from langchain_core.output_parsers import StrOutputParser
5
+ from langchain_core.prompts import ChatPromptTemplate
6
+ from PyPDF2 import PdfReader
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
9
+ from langchain_community.vectorstores import FAISS
10
+ from tavily import TavilyClient
11
+ import hashlib
12
+ from streamlit_pdf_viewer import pdf_viewer
13
+ import tempfile
14
+ import os
15
+
16
+
17
+ USER_CREDENTIALS = {"admin": "admin"} # Replace this with your username: password pairs
18
+
19
+
20
+ # Function to check if the user is authenticated
21
+ def check_login(username, password):
22
+ if USER_CREDENTIALS.get(username) == password:
23
+ return True
24
+ return False
25
+
26
+
27
+ # Function to handle login page
28
+ def login_page():
29
+ st.title("Login Page")
30
+ username = st.text_input("Username")
31
+ password = st.text_input("Password", type="password")
32
+
33
+ if st.button("Login"):
34
+ if check_login(username, password):
35
+ st.session_state.authenticated = True
36
+ st.session_state.username = username
37
+ st.session_state.password = password # You can store password or omit it
38
+ st.success(f"Welcome, {username}!")
39
+ st.rerun() # Rerun the app to show the main page after login
40
+ else:
41
+ st.error("Invalid credentials")
42
+
43
+
44
+ def main_app():
45
+ # Initialize API keys
46
+ # Option 1: Using environment variables
47
+ google_api_key = "AIzaSyDiZjRdBVZNqmhCQHnqDjz_fjgdfARyZp4"
48
+ tvly_api_key = "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"
49
+ openai_api_key = "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"
50
+
51
+ # Option 2: Using Streamlit secrets (uncomment if using secrets.toml)
52
+ # if 'google_api_key' in st.secrets:
53
+ # google_api_key = st.secrets['AIzaSyDiZjRdBVZNqmhCQHnqDjz_fjgdfARyZp4']
54
+ # tvly_api_key = st.secrets['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']
55
+ # openai_api_key = st.secrets['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']
56
+
57
+ # Validate API keys
58
+ if not all([google_api_key, tvly_api_key, openai_api_key]):
59
+ st.error("Please set up your API keys in environment variables or secrets.toml")
60
+ st.stop()
61
+
62
+ # Initialize Tavily client
63
+ web_tool_search = TavilyClient(api_key=tvly_api_key)
64
+
65
+ # Set up Streamlit page
66
+ st.set_page_config(page_title="AI Professor", page_icon="๐Ÿ‘จโ€๐Ÿซ")
67
+ st.title("๐Ÿ‘จโ€๐Ÿซ AI Professor")
68
+
69
+ def get_pdf_text(pdf_docs):
70
+ text = ""
71
+ if isinstance(pdf_docs, list):
72
+ for pdf in pdf_docs:
73
+ pdf_reader = PdfReader(pdf)
74
+ for page in pdf_reader.pages:
75
+ text += page.extract_text()
76
+ else:
77
+ pdf_reader = PdfReader(pdf_docs)
78
+ for page in pdf_reader.pages:
79
+ text += page.extract_text()
80
+ return text
81
+
82
+ def get_text_chunks(text):
83
+ text_splitter = RecursiveCharacterTextSplitter(
84
+ chunk_size=10000, chunk_overlap=1000
85
+ )
86
+ chunks = text_splitter.split_text(text)
87
+ return chunks
88
+
89
+ def get_vector_store(text_chunks):
90
+ try:
91
+ embeddings = GoogleGenerativeAIEmbeddings(
92
+ model="models/embedding-001", google_api_key=google_api_key
93
+ )
94
+ vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
95
+ return vector_store
96
+ except Exception as e:
97
+ st.error(f"Error creating vector store: {str(e)}")
98
+ return None
99
+
100
+ def get_response(user_query, chat_history, vector_store):
101
+ if vector_store is None:
102
+ return "Please upload a PDF document first."
103
+
104
+ template = """
105
+ You are a helpful assistant. Answer the following questions considering the history of the conversation and the document provided:
106
+
107
+ Context: {context}
108
+ Chat history: {chat_history}
109
+ User question: {user_question}
110
+ """
111
+
112
+ prompt = ChatPromptTemplate.from_template(template)
113
+
114
+ try:
115
+ llm = ChatOpenAI(
116
+ base_url="https://api.groq.com/openai/v1",
117
+ api_key=openai_api_key,
118
+ model_name="llama-3.1-8b-instant",
119
+ temperature=1,
120
+ max_tokens=1024,
121
+ )
122
+
123
+ docs = vector_store.similarity_search(user_query)
124
+ context = "\n".join(doc.page_content for doc in docs)
125
+
126
+ chain = prompt | llm | StrOutputParser()
127
+
128
+ return chain.invoke(
129
+ {
130
+ "context": context,
131
+ "chat_history": chat_history,
132
+ "user_question": user_query,
133
+ }
134
+ )
135
+ except Exception as e:
136
+ return f"Error generating response: {str(e)}"
137
+
138
+ def get_youtube_url(query):
139
+ try:
140
+ response = web_tool_search.search(
141
+ query=query,
142
+ search_depth="basic",
143
+ include_domains=["youtube.com"],
144
+ max_results=1,
145
+ )
146
+
147
+ for result in response["results"]:
148
+ if "youtube.com/watch" in result["url"]:
149
+ return result["url"]
150
+
151
+ return None
152
+ except Exception as e:
153
+ st.error(f"Error searching for video: {str(e)}")
154
+ return None
155
+
156
+ def get_pdfs_hash(pdf_docs):
157
+ combined_hash = hashlib.md5()
158
+ if isinstance(pdf_docs, list):
159
+ for pdf in pdf_docs:
160
+ content = pdf.read()
161
+ combined_hash.update(content)
162
+ pdf.seek(0)
163
+ else:
164
+ content = pdf_docs.read()
165
+ combined_hash.update(content)
166
+ pdf_docs.seek(0)
167
+ return combined_hash.hexdigest()
168
+
169
+ # Initialize session state
170
+ if "chat_history" not in st.session_state:
171
+ st.session_state.chat_history = [
172
+ AIMessage(
173
+ content="Hello, I am Chatbot professor assistant. How can I help you?"
174
+ ),
175
+ ]
176
+ if "vector_store" not in st.session_state:
177
+ st.session_state.vector_store = None
178
+ if "current_pdfs_hash" not in st.session_state:
179
+ st.session_state.current_pdfs_hash = None
180
+
181
+ # Display chat history
182
+ for message in st.session_state.chat_history:
183
+ if isinstance(message, AIMessage):
184
+ with st.chat_message("AI"):
185
+ st.write(message.content)
186
+ elif isinstance(message, HumanMessage):
187
+ with st.chat_message("Human"):
188
+ st.write(message.content)
189
+
190
+ # Chat input
191
+ user_query = st.chat_input("Type your message here...")
192
+
193
+ # Sidebar
194
+ with st.sidebar:
195
+ st.title("Menu:")
196
+ pdf_docs = st.file_uploader(
197
+ "Upload your PDF Files", accept_multiple_files=False, key="pdf_uploader"
198
+ )
199
+ quiz_button = st.button("๐Ÿ—’๏ธ Make a quiz", type="primary")
200
+ video_button = st.button("๐Ÿ“บ Search a video on the topic")
201
+ view = st.toggle("๐Ÿ‘๏ธ View PDF")
202
+
203
+ if view and pdf_docs:
204
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
205
+ temp_file.write(pdf_docs.read())
206
+ temp_pdf_path = temp_file.name
207
+ pdf_viewer(temp_pdf_path, width=800)
208
+
209
+ # Custom CSS for sidebar
210
+ st.markdown(
211
+ """
212
+ <style>
213
+ section[data-testid="stSidebar"] {
214
+ width: 600px;
215
+ min-width: 600px;
216
+ max-width: 800px;
217
+ background-color: #f0f2f6;
218
+ }
219
+ .css-1lcbmhc {
220
+ margin-left: 360px;
221
+ padding: 1rem;
222
+ }
223
+ .block-container {
224
+ max-width: 800px;
225
+ min-width: 600px;
226
+ margin: auto;
227
+ }
228
+ .stChatMessage {
229
+ width: 100%;
230
+ max-width: 800px;
231
+ margin: 0 auto;
232
+ }
233
+ </style>
234
+ """,
235
+ unsafe_allow_html=True,
236
+ )
237
+
238
+ # Process PDF upload
239
+ if pdf_docs:
240
+ new_hash = get_pdfs_hash(pdf_docs)
241
+ if new_hash != st.session_state.current_pdfs_hash:
242
+ text = get_pdf_text(pdf_docs)
243
+ text_chunks = get_text_chunks(text)
244
+ st.session_state.vector_store = get_vector_store(text_chunks)
245
+ st.session_state.current_pdfs_hash = new_hash
246
+ st.success("The document has been updated!")
247
+
248
+ # Handle user query
249
+ if user_query:
250
+ st.session_state.chat_history.append(HumanMessage(content=user_query))
251
+ with st.chat_message("Human"):
252
+ st.markdown(user_query, unsafe_allow_html=True)
253
+
254
+ with st.chat_message("AI"):
255
+ with st.spinner("Thinking..."):
256
+ response = get_response(
257
+ user_query,
258
+ st.session_state.chat_history,
259
+ st.session_state.vector_store,
260
+ )
261
+ st.write(response)
262
+ st.session_state.chat_history.append(AIMessage(content=response))
263
+
264
+ # Show message if no PDF is uploaded
265
+ if pdf_docs is None:
266
+ st.write("Please upload your PDF course before starting the chat.")
267
+
268
+ # Handle quiz generation
269
+ if quiz_button:
270
+ with st.spinner("Generating quiz..."):
271
+ quiz_prompt = """
272
+ Based on the document content, create a quiz with 5 multiple choice questions.
273
+ For each question:
274
+ 1. Ask a clear, specific question
275
+ 2. Provide 4 options labeled A, B, C, D
276
+ 3. Make sure the options are plausible but distinct
277
+ 4. Don't reveal the correct answer
278
+
279
+ Format each question like this:
280
+ Question X:
281
+ **A)**
282
+ **B)**
283
+ **C)**
284
+ **D)**
285
+ """
286
+ with st.chat_message("AI"):
287
+ response = get_response(
288
+ quiz_prompt,
289
+ st.session_state.chat_history,
290
+ st.session_state.vector_store,
291
+ )
292
+ st.write(response)
293
+ st.session_state.chat_history.append(AIMessage(content=response))
294
+
295
+ # Handle video search
296
+ if video_button:
297
+ with st.spinner("Searching for relevant video..."):
298
+ video_prompt = """
299
+ Extract the main topic and key concepts from the document or from the last conversation in 3-4 words maximum.
300
+ Focus on the core subject matter only.
301
+ Do not include any additional text or explanation.
302
+ Example format: "machine learning neural networks" or "quantum computing basics"
303
+ """
304
+ with st.chat_message("AI"):
305
+ response = get_response(
306
+ video_prompt,
307
+ st.session_state.chat_history,
308
+ st.session_state.vector_store,
309
+ )
310
+ youtube_url = get_youtube_url(f"Course on {response}")
311
+ if youtube_url:
312
+ st.write(f"๐Ÿ“บ Here's a video about {response}:")
313
+ st.video(youtube_url)
314
+ video_message = (
315
+ f"๐Ÿ“บ Here's a video about {response}:\n{youtube_url}"
316
+ )
317
+ st.session_state.chat_history.append(
318
+ AIMessage(content=video_message)
319
+ )
320
+
321
+
322
+ # Check if the user is authenticated
323
+ if "authenticated" not in st.session_state or not st.session_state.authenticated:
324
+ login_page() # Show login page if not authenticated
325
+ else:
326
+ main_app() # Show the main app if authenticated