majorSeaweed commited on
Commit
b052025
Β·
verified Β·
1 Parent(s): 072e3de

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +724 -0
  2. requirements.txt +35 -0
app.py ADDED
@@ -0,0 +1,724 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import streamlit as st
4
+ import pdfplumber
5
+ import arxiv
6
+ import google.generativeai as genai
7
+ import numpy as np
8
+ from pinecone import Pinecone
9
+ from langchain_core.prompts import ChatPromptTemplate
10
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
11
+ from langchain_groq import ChatGroq
12
+ from langchain.schema import Document
13
+ from langchain_pinecone import PineconeVectorStore
14
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
15
+ from langchain.chains import create_retrieval_chain
16
+ from langchain.chains.combine_documents import create_stuff_documents_chain
17
+ from langchain_community.tools import DuckDuckGoSearchRun
18
+ from dotenv import load_dotenv
19
+ from typing import List, Dict
20
+ import requests
21
+ from io import BytesIO
22
+
23
+ # Load environment variables
24
+ load_dotenv()
25
+
26
+ # Initialize services
27
+ pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
28
+ search_tool = DuckDuckGoSearchRun()
29
+ genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
30
+
31
+ # Constants for Index names and models
32
+ INDEX_NAMES = {
33
+ "openai": "rag",
34
+ "groq": "gemini-rag",
35
+ "research": "research-rag"
36
+ }
37
+
38
+ GROQ_MODELS = [
39
+ "gemma2-9b-it", "llama-3.3-70b-versatile", "llama-3.1-8b-instant",
40
+ "mixtral-8x7b-32768", "deepseek-r1-distill-llama-70b"
41
+ ]
42
+
43
+ # Previous GeminiEmbeddings class remains the same
44
+
45
+ # Document processing class with improved error handling
46
+ class GeminiEmbeddings:
47
+ def __init__(self):
48
+ self.model_name = "models/embedding-001"
49
+ self._dimension = 768 # Gemini embedding dimension
50
+
51
+ def embed_documents(self, texts: List[str]) -> List[List[float]]:
52
+ """Create embeddings for a list of documents."""
53
+ try:
54
+ return [self._embed_text(text) for text in texts]
55
+ except Exception as e:
56
+ st.error(f"Embedding error: {str(e)}")
57
+ return []
58
+
59
+ def embed_query(self, text: str) -> List[float]:
60
+ """Create embeddings for a query string."""
61
+ return self._embed_text(text)
62
+
63
+ def _embed_text(self, text: str) -> List[float]:
64
+ """Helper function to embed a single text."""
65
+ try:
66
+ response = genai.embed_content(
67
+ model=self.model_name,
68
+ content=text,
69
+ task_type="retrieval_document"
70
+ )
71
+ embedding = response["embedding"]
72
+ return np.array(embedding, dtype=np.float32).tolist()
73
+ except Exception as e:
74
+ st.error(f"Embedding generation error: {str(e)}")
75
+ return [0.0] * self._dimension
76
+
77
+ class ResearchEngine:
78
+ @staticmethod
79
+ def _download_and_process_pdf(pdf_url: str, metadata: dict = None) -> List[Document]:
80
+ """Download and process a PDF from a URL."""
81
+ try:
82
+ response = requests.get(pdf_url, timeout=30)
83
+ if response.status_code == 200:
84
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
85
+ tmp.write(response.content)
86
+ docs = DocumentProcessor.process_pdf(tmp.name)
87
+ # Add metadata to each document chunk
88
+ if metadata:
89
+ for doc in docs:
90
+ doc.metadata.update(metadata)
91
+ os.unlink(tmp.name)
92
+ return docs
93
+ return []
94
+ except Exception as e:
95
+ st.warning(f"Error processing PDF from {pdf_url}: {str(e)}")
96
+ return []
97
+
98
+ @staticmethod
99
+ def fetch_and_process_arxiv_papers(query: str) -> List[Document]:
100
+ """Fetch and process papers from arXiv."""
101
+ try:
102
+ client = arxiv.Client()
103
+ search = arxiv.Search(
104
+ query=query,
105
+ max_results=2,
106
+ sort_by=arxiv.SortCriterion.Relevance
107
+ )
108
+ documents = []
109
+
110
+ for result in client.results(search):
111
+ try:
112
+ metadata = {
113
+ "title": result.title,
114
+ "authors": ", ".join(a.name for a in result.authors),
115
+ "published": result.published.strftime('%Y-%m-%d'),
116
+ "url": result.pdf_url,
117
+ "source": "arXiv",
118
+ "abstract": result.summary
119
+ }
120
+ docs = ResearchEngine._download_and_process_pdf(result.pdf_url, metadata)
121
+ documents.extend(docs)
122
+ except Exception as e:
123
+ st.warning(f"Error processing paper {result.title}: {str(e)}")
124
+ continue
125
+
126
+ return documents
127
+ except Exception as e:
128
+ st.error(f"arXiv error: {str(e)}")
129
+ return []
130
+
131
+ @staticmethod
132
+ def process_pdf_links(pdf_links: List[str], titles: List[str] = None) -> List[Document]:
133
+ """Process a list of PDF links directly."""
134
+ documents = []
135
+ for i, pdf_url in enumerate(pdf_links):
136
+ try:
137
+ metadata = {
138
+ "title": titles[i] if titles and i < len(titles) else f"Paper {i+1}",
139
+ "url": pdf_url,
140
+ "source": "Custom PDF",
141
+ }
142
+ docs = ResearchEngine._download_and_process_pdf(pdf_url, metadata)
143
+ documents.extend(docs)
144
+ except Exception as e:
145
+ st.warning(f"Error processing PDF from {pdf_url}: {str(e)}")
146
+ continue
147
+ return documents
148
+
149
+ # Add this to AIChains class
150
+ @staticmethod
151
+ def research_chain(question: str, model_name: str, mode: str = "arxiv", pdf_links: List[str] = None, titles: List[str] = None) -> str:
152
+ """Enhanced research chain with multiple modes."""
153
+ try:
154
+ # Get documents based on mode
155
+ if mode == "arxiv":
156
+ docs = ResearchEngine.fetch_and_process_arxiv_papers(question)
157
+ elif mode == "custom_pdfs" and pdf_links:
158
+ docs = ResearchEngine.process_pdf_links(pdf_links, titles)
159
+ else:
160
+ return "Invalid research mode or missing PDF links"
161
+
162
+ if not docs:
163
+ return "No relevant documents found or could not process PDFs."
164
+
165
+ # Create embeddings and vectorstore
166
+ embeddings = GeminiEmbeddings()
167
+ vectorstore = VectorStoreManager.get_vectorstore(docs, embeddings, INDEX_NAMES["research"])
168
+ if not vectorstore:
169
+ return "Error: Could not process documents"
170
+
171
+ # Create retrieval chain
172
+ llm = ChatGroq(model_name=model_name)
173
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
174
+
175
+ prompt = ChatPromptTemplate.from_template("""
176
+ Based on the following research documents:
177
+ {context}
178
+
179
+ Question: {input}
180
+
181
+ Provide a comprehensive analysis with specific citations to the source papers.
182
+ For each point, mention which paper it comes from using the title or number.
183
+ Include relevant quotes where appropriate.
184
+
185
+ Structure your response as follows:
186
+ 1. Main findings
187
+ 2. Supporting evidence
188
+ 3. Relevant quotes
189
+ 4. Sources used
190
+ """)
191
+
192
+ chain = create_retrieval_chain(
193
+ retriever,
194
+ create_stuff_documents_chain(llm, prompt)
195
+ )
196
+ result = chain.invoke({"input": question})
197
+ return result["answer"]
198
+ except Exception as e:
199
+ return f"Research Error: {str(e)}"
200
+ class DocumentProcessor:
201
+ @staticmethod
202
+ def process_pdf(pdf_path: str) -> List[Document]:
203
+ """Process a PDF file and return a list of Document objects."""
204
+ if not pdf_path:
205
+ return []
206
+ try:
207
+ with pdfplumber.open(pdf_path) as pdf:
208
+ docs = []
209
+ for i, page in enumerate(pdf.pages):
210
+ text = page.extract_text() or ""
211
+ if text.strip():
212
+ docs.append(Document(
213
+ page_content=text.strip(),
214
+ metadata={
215
+ "page": i + 1,
216
+ "source": pdf_path,
217
+ "type": "pdf"
218
+ }
219
+ ))
220
+
221
+ # Split documents into chunks
222
+ text_splitter = RecursiveCharacterTextSplitter(
223
+ chunk_size=1000,
224
+ chunk_overlap=200,
225
+ length_function=len
226
+ )
227
+ return text_splitter.split_documents(docs)
228
+ except Exception as e:
229
+ st.error(f"PDF processing error: {str(e)}")
230
+ return []
231
+
232
+ class VectorStoreManager:
233
+ @staticmethod
234
+ def get_vectorstore(docs: List[Document], embeddings, index_name: str) -> PineconeVectorStore:
235
+ """Create or get a vector store for the given documents."""
236
+ try:
237
+ # Ensure index exists
238
+ if index_name not in pc.list_indexes().names():
239
+ pc.create_index(
240
+ name=index_name,
241
+ dimension=768, # Gemini embedding dimension
242
+ metric="cosine"
243
+ )
244
+
245
+ return PineconeVectorStore.from_documents(
246
+ documents=docs,
247
+ embedding=embeddings,
248
+ index_name=index_name
249
+ )
250
+ except Exception as e:
251
+ st.error(f"Error creating vector store: {str(e)}")
252
+ return None
253
+
254
+ @staticmethod
255
+ def clear_index(index_name: str):
256
+ """Clear all vectors from the specified index."""
257
+ try:
258
+ if index_name in pc.list_indexes().names():
259
+ index = pc.Index(index_name)
260
+ index.delete(delete_all=True)
261
+ st.success(f"Successfully cleared {index_name} index")
262
+ else:
263
+ st.warning(f"Index {index_name} does not exist")
264
+ except Exception as e:
265
+ st.error(f"Error clearing index: {str(e)}")
266
+
267
+ # AI Chains class with all necessary methods
268
+ class AIChains:
269
+ @staticmethod
270
+ def openai_chain(question: str, context: str = "", pdf_path: str = None) -> str:
271
+ try:
272
+ llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
273
+ embeddings = OpenAIEmbeddings()
274
+
275
+ if pdf_path:
276
+ docs = DocumentProcessor.process_pdf(pdf_path)
277
+ vectorstore = VectorStoreManager.get_vectorstore(docs, embeddings, INDEX_NAMES["openai"])
278
+ if not vectorstore:
279
+ return "Error: Could not process document"
280
+
281
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
282
+ prompt = ChatPromptTemplate.from_template("""
283
+ Context: {context}
284
+ Additional Info: {additional_context}
285
+ Question: {input}
286
+ Provide a detailed answer with citations:
287
+ """)
288
+
289
+ chain = create_retrieval_chain(
290
+ retriever,
291
+ create_stuff_documents_chain(llm, prompt)
292
+ )
293
+ result = chain.invoke({
294
+ "input": question,
295
+ "additional_context": context
296
+ })
297
+ return result["answer"]
298
+
299
+ return llm.invoke(f"{context}\nQuestion: {question}").content
300
+ except Exception as e:
301
+ return f"OpenAI Error: {str(e)}"
302
+
303
+ @staticmethod
304
+ def groq_chain(question: str, model_name: str, context: str = "", pdf_path: str = None) -> str:
305
+ try:
306
+ llm = ChatGroq(model_name=model_name)
307
+ embeddings = GeminiEmbeddings()
308
+
309
+ if pdf_path:
310
+ docs = DocumentProcessor.process_pdf(pdf_path)
311
+ vectorstore = VectorStoreManager.get_vectorstore(docs, embeddings, INDEX_NAMES["groq"])
312
+ if not vectorstore:
313
+ return "Error: Could not process document"
314
+
315
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
316
+ prompt = ChatPromptTemplate.from_template("""
317
+ Context: {context}
318
+ Additional Info: {additional_context}
319
+ Question: {input}
320
+ Provide a detailed answer with citations:
321
+ """)
322
+
323
+ chain = create_retrieval_chain(
324
+ retriever,
325
+ create_stuff_documents_chain(llm, prompt)
326
+ )
327
+ result = chain.invoke({
328
+ "input": question,
329
+ "additional_context": context
330
+ })
331
+ return result["answer"]
332
+
333
+ return llm.invoke(f"{context}\nQuestion: {question}").content
334
+ except Exception as e:
335
+ return f"Groq Error: {str(e)}"
336
+
337
+ @staticmethod
338
+ def research_chain(question: str, model_name: str, mode: str = "arxiv", pdf_links: List[str] = None, titles: List[str] = None) -> str:
339
+ try:
340
+ if mode == "arxiv":
341
+ docs = ResearchEngine.fetch_and_process_arxiv_papers(question)
342
+ elif mode == "custom_pdfs" and pdf_links:
343
+ docs = ResearchEngine.process_pdf_links(pdf_links, titles)
344
+ else:
345
+ return "Invalid research mode or missing PDF links"
346
+
347
+ if not docs:
348
+ return "No relevant documents found."
349
+
350
+ embeddings = GeminiEmbeddings()
351
+ vectorstore = VectorStoreManager.get_vectorstore(
352
+ docs,
353
+ embeddings,
354
+ INDEX_NAMES["research"]
355
+ )
356
+ if not vectorstore:
357
+ return "Error: Could not process research papers"
358
+
359
+ llm = ChatGroq(model_name=model_name)
360
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
361
+
362
+ prompt = ChatPromptTemplate.from_template("""
363
+ Based on the following research papers:
364
+ {context}
365
+
366
+ Question: {input}
367
+
368
+ Provide a detailed analysis with specific citations:
369
+ """)
370
+
371
+ chain = create_retrieval_chain(
372
+ retriever,
373
+ create_stuff_documents_chain(llm, prompt)
374
+ )
375
+ result = chain.invoke({"input": question})
376
+ return result["answer"]
377
+ except Exception as e:
378
+ return f"Research Error: {str(e)}"
379
+
380
+ # Enhanced Streamlit UI
381
+ st.set_page_config(
382
+ page_title="AI Research Assistant",
383
+ page_icon="πŸ”¬",
384
+ layout="wide",
385
+ initial_sidebar_state="expanded"
386
+ )
387
+
388
+ # Updated styling with more modern look
389
+ st.markdown("""
390
+ <style>
391
+ /* Base styles */
392
+ :root {
393
+ --primary-color: #7c3aed;
394
+ --secondary-color: #4f46e5;
395
+ --background-color: #f9fafb;
396
+ --text-color: #111827;
397
+ }
398
+
399
+ /* Main container */
400
+ .main {
401
+ background-color: var(--background-color);
402
+ color: var(--text-color);
403
+ font-family: 'Inter', sans-serif;
404
+ }
405
+
406
+ /* Chat messages */
407
+ .stChatMessage {
408
+ background-color: white;
409
+ border-radius: 1rem;
410
+ padding: 1rem;
411
+ margin: 1rem 0;
412
+ box-shadow: 0 1px 3px rgba(0,0,0,0.1);
413
+ animation: slideIn 0.3s ease-out;
414
+ }
415
+
416
+ /* User message specific */
417
+ .user-message {
418
+ background-color: #f3f4f6;
419
+ margin-left: auto;
420
+ max-width: 80%;
421
+ }
422
+
423
+ /* Assistant message specific */
424
+ .assistant-message {
425
+ background-color: white;
426
+ margin-right: auto;
427
+ max-width: 80%;
428
+ }
429
+
430
+ /* Input container */
431
+ .input-container {
432
+ position: fixed;
433
+ bottom: 0;
434
+ left: 0;
435
+ right: 0;
436
+ background-color: white;
437
+ padding: 1rem;
438
+ box-shadow: 0 -2px 10px rgba(0,0,0,0.1);
439
+ z-index: 1000;
440
+ }
441
+
442
+ /* Buttons */
443
+ .stButton button {
444
+ background: linear-gradient(to right, var(--primary-color), var(--secondary-color));
445
+ color: white;
446
+ border: none;
447
+ border-radius: 0.5rem;
448
+ padding: 0.5rem 1rem;
449
+ transition: all 0.3s ease;
450
+ }
451
+
452
+ .stButton button:hover {
453
+ transform: translateY(-1px);
454
+ box-shadow: 0 4px 6px rgba(0,0,0,0.1);
455
+ }
456
+
457
+ /* Sidebar */
458
+ .css-1d391kg {
459
+ background-color: white;
460
+ }
461
+
462
+ /* Animations */
463
+ @keyframes slideIn {
464
+ from {
465
+ transform: translateY(20px);
466
+ opacity: 0;
467
+ }
468
+ to {
469
+ transform: translateY(0);
470
+ opacity: 1;
471
+ }
472
+ }
473
+
474
+ /* Chat container */
475
+ .chat-container {
476
+ margin-bottom: 120px;
477
+ padding: 1rem;
478
+ }
479
+
480
+ /* Search container */
481
+ .search-container {
482
+ position: fixed;
483
+ bottom: 0;
484
+ left: 0;
485
+ right: 0;
486
+ background: white;
487
+ padding: 1rem;
488
+ box-shadow: 0 -2px 10px rgba(0,0,0,0.1);
489
+ z-index: 1000;
490
+ }
491
+
492
+ /* Input field */
493
+ .stTextInput input {
494
+ border-radius: 0.5rem;
495
+ border: 2px solid #e5e7eb;
496
+ padding: 0.75rem;
497
+ font-size: 1rem;
498
+ transition: all 0.3s ease;
499
+ }
500
+
501
+ .stTextInput input:focus {
502
+ border-color: var(--primary-color);
503
+ box-shadow: 0 0 0 2px rgba(124,58,237,0.2);
504
+ }
505
+
506
+ /* Helper buttons */
507
+ .helper-buttons {
508
+ display: flex;
509
+ gap: 0.5rem;
510
+ margin-top: 0.5rem;
511
+ }
512
+
513
+ .helper-button {
514
+ background-color: #f3f4f6;
515
+ border: none;
516
+ border-radius: 0.5rem;
517
+ padding: 0.5rem 1rem;
518
+ font-size: 0.875rem;
519
+ color: #4b5563;
520
+ cursor: pointer;
521
+ transition: all 0.2s ease;
522
+ }
523
+
524
+ .helper-button:hover {
525
+ background-color: #e5e7eb;
526
+ }
527
+ </style>
528
+ """, unsafe_allow_html=True)
529
+
530
+ # Initialize session state
531
+ if "messages" not in st.session_state:
532
+ st.session_state.messages = []
533
+ if "pdf_path" not in st.session_state:
534
+ st.session_state.pdf_path = None
535
+
536
+ # Enhanced sidebar
537
+ with st.sidebar:
538
+ st.title("πŸ€– AI Research Assistant")
539
+
540
+ # Chat controls
541
+ st.subheader("πŸ’¬ Chat Controls")
542
+ col1, col2 = st.columns(2)
543
+ with col1:
544
+ if st.button("πŸ†• New Chat"):
545
+ st.session_state.messages = []
546
+ with col2:
547
+ if st.button("πŸ—‘οΈ Clear History"):
548
+ st.session_state.messages = []
549
+
550
+ st.divider()
551
+
552
+ # Model settings
553
+ st.subheader("πŸ› οΈ Model Settings")
554
+ model_choice = st.selectbox("Select Model", ["OpenAI", "Groq"], key="model_choice")
555
+
556
+ if model_choice == "Groq":
557
+ groq_model = st.selectbox("Model Version", GROQ_MODELS)
558
+
559
+ # Database controls
560
+ st.subheader("πŸ“Š Database Controls")
561
+ selected_index = st.selectbox("Select Index", list(INDEX_NAMES.values()))
562
+ if st.button("πŸ—‘οΈ Clear Selected Index"):
563
+ VectorStoreManager.clear_index(selected_index)
564
+
565
+ # Document upload
566
+ st.subheader("πŸ“„ Document Upload")
567
+ pdf_file = st.file_uploader("Upload PDF", type="pdf")
568
+
569
+ if pdf_file:
570
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
571
+ tmp.write(pdf_file.getvalue())
572
+ st.session_state.pdf_path = tmp.name
573
+ st.success("βœ… PDF uploaded successfully!")
574
+
575
+ # Main chat interface
576
+ st.header("AI Research Assistant", divider="rainbow")
577
+
578
+ # Chat container
579
+ with st.container():
580
+ st.markdown('<div class="chat-container">', unsafe_allow_html=True)
581
+ for message in st.session_state.messages:
582
+ with st.chat_message(message["role"]):
583
+ st.markdown(message["content"])
584
+ if "sources" in message and message["sources"]:
585
+ with st.expander("πŸ“š View Sources"):
586
+ st.write(message["sources"])
587
+ st.markdown('</div>', unsafe_allow_html=True)
588
+
589
+ # Search container
590
+ with st.container():
591
+ st.markdown('<div class="search-container">', unsafe_allow_html=True)
592
+
593
+ # Chat input
594
+ prompt = st.text_input("Ask me anything...", key="chat_input",
595
+ placeholder="Type your message here...")
596
+
597
+ # Search buttons
598
+ col1, col2, col3 = st.columns([1, 1, 4])
599
+ with col1:
600
+ web_search = st.button("🌐 Web")
601
+ with col2:
602
+ research_search = st.button("πŸ“š Research")
603
+
604
+ if prompt and (web_search or research_search or st.session_state.get("chat_input")):
605
+ st.session_state.messages.append({"role": "user", "content": prompt})
606
+
607
+ with st.chat_message("assistant"):
608
+ with st.spinner("πŸ€” Thinking..."):
609
+ try:
610
+ context = ""
611
+ sources = {}
612
+
613
+ if web_search:
614
+ with st.spinner("🌐 Searching the web..."):
615
+ web_results = search_tool.run(prompt)
616
+ context += f"Web Search Results:\n{web_results}\n"
617
+ sources["Web"] = web_results
618
+
619
+ if research_search:
620
+ with st.spinner("πŸ“š Analyzing research papers..."):
621
+ if model_choice == "Groq":
622
+ research_response = AIChains.research_chain(prompt, groq_model)
623
+ sources["Research"] = research_response
624
+ else:
625
+ st.warning("ℹ️ Research mode is only available with Groq models")
626
+ research_response = ""
627
+ context += f"\nResearch Context:\n{research_response}\n"
628
+
629
+ # Get response from selected model
630
+ if not (web_search or research_search):
631
+ with st.spinner("πŸ’­ Generating response..."):
632
+ if model_choice == "OpenAI":
633
+ response = AIChains.openai_chain(
634
+ question=prompt,
635
+ context=context,
636
+ pdf_path=st.session_state.pdf_path
637
+ )
638
+ else: # Groq
639
+ response = AIChains.groq_chain(
640
+ question=prompt,
641
+ model_name=groq_model,
642
+ context=context,
643
+ pdf_path=st.session_state.pdf_path
644
+ )
645
+ else:
646
+ response = context
647
+
648
+ # Display response with markdown formatting
649
+ st.markdown(response)
650
+
651
+ # Show sources in expandable section if available
652
+ if sources:
653
+ with st.expander("πŸ“š View Sources"):
654
+ for source_type, content in sources.items():
655
+ st.subheader(f"{source_type} Sources")
656
+ st.markdown(content)
657
+
658
+ # Add to chat history
659
+ st.session_state.messages.append({
660
+ "role": "assistant",
661
+ "content": response,
662
+ "sources": sources if sources else None
663
+ })
664
+ except Exception as e:
665
+ st.error(f"❌ Error: {str(e)}")
666
+
667
+ st.markdown('</div>', unsafe_allow_html=True)
668
+ st.sidebar.subheader("πŸ“š Research Mode")
669
+ research_mode = st.sidebar.radio(
670
+ "Select Research Mode",
671
+ ["arXiv", "Custom PDFs"]
672
+ )
673
+
674
+ if research_mode == "Custom PDFs":
675
+ pdf_links = st.sidebar.text_area(
676
+ "Enter PDF URLs (one per line)",
677
+ placeholder="https://example.com/paper1.pdf\nhttps://example.com/paper2.pdf"
678
+ )
679
+ pdf_titles = st.sidebar.text_area(
680
+ "Enter Paper Titles (one per line)",
681
+ placeholder="Paper 1 Title\nPaper 2 Title"
682
+ )
683
+
684
+ pdf_links_list = [url.strip() for url in pdf_links.split('\n') if url.strip()] if pdf_links else []
685
+ pdf_titles_list = [title.strip() for title in pdf_titles.split('\n') if title.strip()] if pdf_titles else []
686
+
687
+ # Modify the research search button handler:
688
+ if research_search:
689
+ with st.spinner("πŸ“š Analyzing research papers..."):
690
+ if model_choice == "Groq":
691
+ if research_mode == "Custom PDFs" and pdf_links_list:
692
+ research_response = AIChains.research_chain(
693
+ prompt,
694
+ groq_model,
695
+ mode="custom_pdfs",
696
+ pdf_links=pdf_links_list,
697
+ titles=pdf_titles_list
698
+ )
699
+ else:
700
+ research_response = AIChains.research_chain(
701
+ prompt,
702
+ groq_model,
703
+ mode="arxiv"
704
+ )
705
+ sources["Research"] = research_response
706
+ else:
707
+ st.warning("ℹ️ Research mode is only available with Groq models")
708
+ research_response = ""
709
+ context += f"\nResearch Context:\n{research_response}\n"
710
+
711
+ # Cleanup temporary files
712
+ if st.session_state.pdf_path and not pdf_file:
713
+ try:
714
+ os.unlink(st.session_state.pdf_path)
715
+ st.session_state.pdf_path = None
716
+ except Exception as e:
717
+ st.error(f"Error cleaning up temporary files: {str(e)}")
718
+
719
+ # Add a footer
720
+ st.markdown("""
721
+ <div style='position: fixed; bottom: 150px; left: 0; right: 0; text-align: center; padding: 10px; font-size: 0.8em; color: #666;'>
722
+ Made with ❀️ using Streamlit
723
+ </div>
724
+ """, unsafe_allow_html=True)
requirements.txt ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core Libraries
2
+ numpy
3
+ pandas
4
+ requests
5
+ pdfplumber
6
+ tqdm
7
+ python-dotenv
8
+ lxml
9
+ beautifulsoup4
10
+
11
+ # Pinecone (Vector Database)
12
+ pinecone-client
13
+
14
+ # LangChain & OpenAI/Groq Integrations
15
+ langchain
16
+ langchain-groq
17
+ langchain-community
18
+ langchain-openai
19
+ langchain-pinecone
20
+ langchain-core
21
+
22
+ # Google Gemini API
23
+ google-generativeai
24
+
25
+ # Web Scraping
26
+ selenium
27
+ webdriver-manager
28
+
29
+ # Streamlit (for UI)
30
+ streamlit
31
+
32
+ # Search APIs
33
+ arxiv
34
+ wikipedia-api
35
+ duckduckgo-search