jzou19950715 commited on
Commit
ad3151d
·
verified ·
1 Parent(s): 9f9e972

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +337 -734
app.py CHANGED
@@ -1,774 +1,339 @@
1
- import os
2
- import sys
3
- import logging
4
- from pathlib import Path
5
- import json
6
- from datetime import datetime
7
- from typing import List, Dict, Any, Optional, Tuple, Union
8
- import traceback
9
-
10
- # Configure detailed logging with file output
11
- LOG_DIR = "logs"
12
- os.makedirs(LOG_DIR, exist_ok=True)
13
- log_file = os.path.join(LOG_DIR, f"rag_system_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
14
-
15
- # Set up root logger with both file and console handlers
16
- logging.basicConfig(
17
- level=logging.INFO,
18
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
19
- handlers=[
20
- logging.FileHandler(log_file),
21
- logging.StreamHandler(sys.stdout)
22
- ]
23
- )
24
- logger = logging.getLogger("rag_system")
25
- logger.info(f"Starting RAG system. Log file: {log_file}")
26
-
27
- # Importing necessary libraries with error handling
28
- try:
29
- import torch
30
- import numpy as np
31
- from sentence_transformers import SentenceTransformer
32
- import chromadb
33
- from chromadb.utils import embedding_functions
34
- import gradio as gr
35
- from openai import OpenAI
36
- import google.generativeai as genai
37
- logger.info("All required libraries successfully imported")
38
- except ImportError as e:
39
- logger.critical(f"Failed to import required libraries: {e}")
40
- print(f"ERROR: Missing required libraries. Please install with: pip install -r requirements.txt")
41
- print(f"Specific error: {e}")
42
- sys.exit(1)
43
-
44
- # Version info for tracking
45
- VERSION = "1.1.0"
46
- logger.info(f"RAG System Version: {VERSION}")
47
-
48
- class Config:
49
- """
50
- Configuration for vector store and RAG system.
51
-
52
- This class centralizes all configuration parameters for the application,
53
- making it easier to modify settings and ensure consistency.
54
-
55
- Attributes:
56
- local_dir (str): Directory for ChromaDB persistence
57
- embedding_model (str): Name of the embedding model to use
58
- collection_name (str): Name of the ChromaDB collection
59
- default_top_k (int): Default number of results to return
60
- openai_model (str): Default OpenAI model to use
61
- gemini_model (str): Default Gemini model to use
62
- temperature (float): Temperature setting for LLM generation
63
- max_tokens (int): Maximum tokens for LLM response
64
- system_name (str): Name of the system for UI
65
- context_limit (int): Maximum characters to include in context
66
- """
67
-
68
- def __init__(self,
69
- local_dir: str = "./chroma_db",
70
- embedding_model: str = "all-MiniLM-L6-v2",
71
- collection_name: str = "markdown_docs",
72
- default_top_k: int = 8, # Increased from 5 to 8 for more context
73
- openai_model: str = "gpt-4o-mini",
74
- gemini_model: str = "gemini-1.5-flash",
75
- temperature: float = 0.3,
76
- max_tokens: int = 2000, # Increased from 1000 to 2000 for more comprehensive responses
77
- system_name: str = "Document RAG System",
78
- context_limit: int = 16000): # Increased context limit for more comprehensive context
79
- self.local_dir = local_dir
80
- self.embedding_model = embedding_model
81
- self.collection_name = collection_name
82
- self.default_top_k = default_top_k
83
- self.openai_model = openai_model
84
- self.gemini_model = gemini_model
85
- self.temperature = temperature
86
- self.max_tokens = max_tokens
87
- self.system_name = system_name
88
- self.context_limit = context_limit
89
-
90
- # Create local directory if it doesn't exist
91
- os.makedirs(local_dir, exist_ok=True)
92
-
93
- logger.info(f"Initialized configuration: {self.__dict__}")
94
-
95
- def to_dict(self) -> Dict[str, Any]:
96
- """Convert configuration to dictionary for serialization"""
97
- return self.__dict__
98
-
99
- @classmethod
100
- def from_file(cls, config_path: str) -> 'Config':
101
- """Load configuration from JSON file"""
102
- try:
103
- with open(config_path, 'r') as f:
104
- config_dict = json.load(f)
105
- logger.info(f"Loaded configuration from {config_path}")
106
- return cls(**config_dict)
107
- except Exception as e:
108
- logger.error(f"Failed to load configuration from {config_path}: {e}")
109
- logger.info("Using default configuration")
110
- return cls()
111
-
112
- def save_to_file(self, config_path: str) -> bool:
113
- """Save configuration to JSON file"""
114
- try:
115
- with open(config_path, 'w') as f:
116
- json.dump(self.to_dict(), f, indent=2)
117
- logger.info(f"Saved configuration to {config_path}")
118
- return True
119
- except Exception as e:
120
- logger.error(f"Failed to save configuration to {config_path}: {e}")
121
- return False
122
-
123
- class EmbeddingEngine:
124
- """
125
- Handle embeddings with a lightweight model.
126
-
127
- This class manages the embedding model used to convert text to vector
128
- representations for semantic search.
129
-
130
- Attributes:
131
- model (SentenceTransformer): The loaded embedding model
132
- model_name (str): Name of the successfully loaded model
133
- vector_size (int): Dimension of the embedding vectors
134
- device (str): Device used for inference ('cuda' or 'cpu')
135
- """
136
-
137
- def __init__(self, model_name="all-MiniLM-L6-v2"):
138
- """
139
- Initialize the embedding engine with the specified model.
140
-
141
- Args:
142
- model_name (str): Name of the embedding model to load
143
-
144
- Raises:
145
- SystemExit: If no embedding model could be loaded
146
- """
147
- # Use GPU if available
148
- self.device = "cuda" if torch.cuda.is_available() else "cpu"
149
- logger.info(f"Using device for embeddings: {self.device}")
150
-
151
- # Try multiple model options in order of preference
152
- model_options = [
153
- model_name,
154
- "all-MiniLM-L6-v2", # Good balance of speed and quality
155
- "paraphrase-MiniLM-L3-v2", # Faster but less accurate
156
- "all-mpnet-base-v2" # Higher quality but larger model
157
- ]
158
-
159
- self.model = None
160
-
161
- # Try each model in order until one works
162
- for model_option in model_options:
163
- try:
164
- logger.info(f"Attempting to load embedding model: {model_option}")
165
- self.model = SentenceTransformer(model_option)
166
-
167
- # Move model to device
168
- self.model.to(self.device)
169
-
170
- logger.info(f"Successfully loaded embedding model: {model_option}")
171
- self.model_name = model_option
172
- self.vector_size = self.model.get_sentence_embedding_dimension()
173
- logger.info(f"Embedding vector size: {self.vector_size}")
174
- break
175
-
176
- except Exception as e:
177
- logger.warning(f"Failed to load embedding model {model_option}: {str(e)}")
178
-
179
- if self.model is None:
180
- error_msg = "Failed to load any embedding model. Please check your internet connection or install models locally."
181
- logger.critical(error_msg)
182
- raise SystemExit(error_msg)
183
 
184
- def embed(self, texts: List[str]) -> np.ndarray:
185
- """
186
- Generate embeddings for a list of texts.
 
 
 
 
 
 
 
 
187
 
188
- Args:
189
- texts (List[str]): List of texts to embed
190
-
191
- Returns:
192
- np.ndarray: Array of embeddings
193
-
194
- Raises:
195
- ValueError: If the input is invalid
196
- RuntimeError: If embedding fails
197
- """
198
- if not texts:
199
- raise ValueError("Cannot embed empty list of texts")
200
 
201
- try:
202
- embeddings = self.model.encode(texts, convert_to_numpy=True)
203
- return embeddings
204
- except Exception as e:
205
- logger.error(f"Error generating embeddings: {e}")
206
- raise RuntimeError(f"Failed to generate embeddings: {e}")
207
-
208
- class VectorStoreManager:
209
- """
210
- Manage Chroma vector store operations - upload, query, etc.
211
-
212
- This class provides an interface to the ChromaDB vector database,
213
- handling document storage, retrieval, and management.
214
-
215
- Attributes:
216
- config (Config): Configuration parameters
217
- client (chromadb.PersistentClient): ChromaDB client
218
- collection (chromadb.Collection): The active ChromaDB collection
219
- embedding_engine (EmbeddingEngine): Engine for generating embeddings
220
- """
221
-
222
- def __init__(self, config: Config):
223
- """
224
- Initialize the vector store manager.
225
 
226
- Args:
227
- config (Config): Configuration parameters
228
-
229
- Raises:
230
- SystemExit: If the vector store cannot be initialized
231
- """
232
- self.config = config
233
-
234
- # Initialize Chroma client (local persistence)
235
- logger.info(f"Initializing Chroma at {config.local_dir}")
236
- try:
237
- self.client = chromadb.PersistentClient(path=config.local_dir)
238
- logger.info("ChromaDB client initialized successfully")
239
- except Exception as e:
240
- error_msg = f"Failed to initialize ChromaDB client: {e}"
241
- logger.critical(error_msg)
242
- raise SystemExit(error_msg)
243
 
244
- # Get or create collection
245
- try:
246
- # Initialize embedding model
247
- logger.info("Loading embedding model...")
248
- self.embedding_engine = EmbeddingEngine(config.embedding_model)
249
- logger.info(f"Using embedding model: {self.embedding_engine.model_name}")
250
-
251
- # Create embedding function
252
- sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(
253
- model_name=self.embedding_engine.model_name
254
- )
255
 
256
- # Try to get existing collection or create a new one
257
- try:
258
- self.collection = self.client.get_collection(
259
- name=config.collection_name,
260
- embedding_function=sentence_transformer_ef
261
- )
262
- logger.info(f"Using existing collection: {config.collection_name}")
263
- except Exception as e:
264
- logger.warning(f"Error getting collection: {e}")
265
- # Attempt to get a list of available collections
266
- collections = self.client.list_collections()
267
- if collections:
268
- logger.info(f"Available collections: {[c.name for c in collections]}")
269
- # Use the first available collection if any
270
- self.collection = self.client.get_collection(
271
- name=collections[0].name,
272
- embedding_function=sentence_transformer_ef
273
- )
274
- logger.info(f"Using collection: {collections[0].name}")
275
- else:
276
- # Create new collection if none exist
277
- self.collection = self.client.create_collection(
278
- name=config.collection_name,
279
- embedding_function=sentence_transformer_ef,
280
- metadata={"hnsw:space": "cosine"}
281
- )
282
- logger.info(f"Created new collection: {config.collection_name}")
283
 
284
- except Exception as e:
285
- error_msg = f"Error initializing Chroma collection: {e}"
286
- logger.critical(error_msg)
287
- raise SystemExit(error_msg)
288
-
289
- def query(self, query_text: str, n_results: int = 5) -> List[Dict]:
290
- """
291
- Query the vector store with a text query.
292
-
293
- Args:
294
- query_text (str): The query text
295
- n_results (int): Number of results to return
296
-
297
- Returns:
298
- List[Dict]: List of results with document text, metadata, and similarity score
299
- """
300
- if not query_text.strip():
301
- logger.warning("Empty query received")
302
- return []
303
-
304
- try:
305
- logger.info(f"Querying vector store with: '{query_text[:50]}...' (top {n_results})")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
- # Query the collection
308
- search_results = self.collection.query(
309
- query_texts=[query_text],
310
- n_results=n_results,
311
- include=["documents", "metadatas", "distances"]
312
- )
313
 
314
- # Format results
315
- results = []
316
- if search_results["documents"] and len(search_results["documents"][0]) > 0:
317
- for i in range(len(search_results["documents"][0])):
318
- results.append({
319
- 'document': search_results["documents"][0][i],
320
- 'metadata': search_results["metadatas"][0][i] if search_results["metadatas"] else {},
321
- 'score': 1.0 - search_results["distances"][0][i], # Convert distance to similarity
322
- 'distance': search_results["distances"][0][i]
323
- })
324
 
325
- logger.info(f"Found {len(results)} results for query")
326
- else:
327
- logger.info("No results found for query")
328
-
329
- return results
330
- except Exception as e:
331
- logger.error(f"Error querying collection: {e}")
332
- logger.debug(traceback.format_exc())
333
- return []
334
-
335
- def add_document(self,
336
- document: str,
337
- doc_id: str,
338
- metadata: Dict[str, Any]) -> bool:
339
- """
340
- Add a document to the vector store.
341
-
342
- Args:
343
- document (str): The document text
344
- doc_id (str): Unique identifier for the document
345
- metadata (Dict[str, Any]): Metadata about the document
346
-
347
- Returns:
348
- bool: True if successful, False otherwise
349
- """
350
- try:
351
- logger.info(f"Adding document '{doc_id}' to vector store")
352
-
353
- # Add the document to the collection
354
- self.collection.add(
355
- documents=[document],
356
- ids=[doc_id],
357
- metadatas=[metadata]
358
  )
359
 
360
- logger.info(f"Successfully added document '{doc_id}'")
361
- return True
362
- except Exception as e:
363
- logger.error(f"Error adding document to collection: {e}")
364
- return False
365
-
366
- def delete_document(self, doc_id: str) -> bool:
367
- """
368
- Delete a document from the vector store.
369
-
370
- Args:
371
- doc_id (str): ID of the document to delete
372
 
373
- Returns:
374
- bool: True if successful, False otherwise
375
- """
376
- try:
377
- logger.info(f"Deleting document '{doc_id}' from vector store")
378
- self.collection.delete(ids=[doc_id])
379
- logger.info(f"Successfully deleted document '{doc_id}'")
380
- return True
381
- except Exception as e:
382
- logger.error(f"Error deleting document from collection: {e}")
383
- return False
384
-
385
- def get_statistics(self) -> Dict[str, Any]:
386
- """
387
- Get statistics about the vector store.
388
-
389
- Returns:
390
- Dict[str, Any]: Statistics about the vector store
391
- """
392
- stats = {
393
- 'collection_name': self.config.collection_name,
394
- 'embedding_model': self.embedding_engine.model_name,
395
- 'embedding_dimensions': self.embedding_engine.vector_size,
396
- 'device': self.embedding_engine.device
397
- }
398
-
399
- try:
400
- # Get collection count
401
- collection_count = self.collection.count()
402
- stats['total_documents'] = collection_count
403
 
404
- # Get unique metadata values
405
- if collection_count > 0:
 
 
 
 
 
 
 
 
 
406
  try:
407
- # Get a sample of document metadata
408
- sample_results = self.collection.get(limit=min(collection_count, 100))
409
- if sample_results and 'metadatas' in sample_results and sample_results['metadatas']:
410
- # Count unique files if filename exists in metadata
411
- filenames = set()
412
- for metadata in sample_results['metadatas']:
413
- if 'filename' in metadata:
414
- filenames.add(metadata['filename'])
415
- stats['unique_files'] = len(filenames)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416
  except Exception as e:
417
- logger.warning(f"Error getting metadata statistics: {e}")
418
-
419
- logger.info(f"Vector store statistics: {stats}")
420
- except Exception as e:
421
- logger.error(f"Error getting statistics: {e}")
422
- stats['error'] = str(e)
423
-
424
- return stats
425
-
426
- class RAGSystem:
427
- """
428
- Retrieval-Augmented Generation with multiple LLM providers.
429
-
430
- This class handles the RAG workflow: retrieval of relevant documents,
431
- formatting context, and generating responses with different LLM providers.
432
-
433
- Attributes:
434
- vector_store (VectorStoreManager): Manager for vector store operations
435
- openai_client (Optional[OpenAI]): OpenAI client
436
- gemini_configured (bool): Whether Gemini API is configured
437
- config (Config): Configuration parameters
438
- """
439
-
440
- def __init__(self, vector_store: VectorStoreManager, config: Config):
441
- """
442
- Initialize the RAG system.
443
-
444
- Args:
445
- vector_store (VectorStoreManager): Vector store manager
446
- config (Config): Configuration parameters
447
- """
448
- self.vector_store = vector_store
449
- self.config = config
450
- self.openai_client = None
451
- self.gemini_configured = False
452
-
453
- logger.info("Initialized RAG system")
454
-
455
- def setup_openai(self, api_key: str) -> bool:
456
- """
457
- Set up OpenAI client with API key.
458
-
459
- Args:
460
- api_key (str): OpenAI API key
461
 
462
- Returns:
463
- bool: True if successful, False otherwise
464
- """
465
- if not api_key.strip():
466
- logger.warning("Empty OpenAI API key provided")
467
- return False
468
 
469
- try:
470
- logger.info("Setting up OpenAI client")
471
- self.openai_client = OpenAI(api_key=api_key)
472
- # Test the API key with a simple request
473
- response = self.openai_client.chat.completions.create(
474
- model=self.config.openai_model,
475
- messages=[
476
- {"role": "system", "content": "You are a helpful assistant."},
477
- {"role": "user", "content": "Test connection"}
478
- ],
479
- max_tokens=10
480
  )
481
- logger.info("OpenAI client configured successfully")
482
- return True
483
- except Exception as e:
484
- logger.error(f"Error initializing OpenAI client: {e}")
485
- self.openai_client = None
486
- return False
487
-
488
- def setup_gemini(self, api_key: str) -> bool:
489
- """
490
- Set up Gemini with API key.
491
-
492
- Args:
493
- api_key (str): Google AI API key
494
-
495
- Returns:
496
- bool: True if successful, False otherwise
497
- """
498
- if not api_key.strip():
499
- logger.warning("Empty Gemini API key provided")
500
- return False
501
-
502
- try:
503
- logger.info("Setting up Gemini client")
504
- genai.configure(api_key=api_key)
505
-
506
- # Test the API key with a simple request
507
- model = genai.GenerativeModel(self.config.gemini_model)
508
- response = model.generate_content("Test connection")
509
-
510
- self.gemini_configured = True
511
- logger.info("Gemini client configured successfully")
512
- return True
513
- except Exception as e:
514
- logger.error(f"Error configuring Gemini: {e}")
515
- self.gemini_configured = False
516
- return False
517
-
518
- def format_context(self, documents: List[Dict]) -> str:
519
- """
520
- Format retrieved documents into context for the LLM.
521
-
522
- Args:
523
- documents (List[Dict]): List of retrieved documents
524
-
525
- Returns:
526
- str: Formatted context for the LLM
527
- """
528
- if not documents:
529
- logger.warning("No documents provided for context formatting")
530
- return "No relevant documents found."
531
-
532
- logger.info(f"Formatting {len(documents)} documents for context")
533
- context_parts = []
534
-
535
- for i, doc in enumerate(documents):
536
- metadata = doc['metadata']
537
- # Extract document metadata in a robust way
538
- title = metadata.get('title', metadata.get('filename', 'Unknown document'))
539
-
540
- # Format header with just essential metadata for cleaner context
541
- header = f"Document {i+1} - {title}"
542
-
543
- # For readability, limit length of context document (using config value)
544
- doc_text = doc['document']
545
- if len(doc_text) > (self.config.context_limit // len(documents)):
546
- # Divide context limit among the documents
547
- max_length = self.config.context_limit // len(documents)
548
- doc_text = doc_text[:max_length] + "... [Document truncated for brevity]"
549
-
550
- context_parts.append(f"{header}:\n{doc_text}\n")
551
-
552
- full_context = "\n".join(context_parts)
553
- logger.info(f"Created context with {len(full_context)} characters")
554
-
555
- return full_context
556
-
557
- def generate_response_openai(self, query: str, context: str) -> str:
558
- """
559
- Generate a response using OpenAI model with context.
560
-
561
- Args:
562
- query (str): User query
563
- context (str): Formatted document context
564
 
565
- Returns:
566
- str: Generated response
567
- """
568
- if not self.openai_client:
569
- logger.warning("OpenAI API key not configured for response generation")
570
- return "Please configure an OpenAI API key to use this feature. Enter your API key in the field and click 'Save API Key'."
571
-
572
- # Improved system prompt for better, more comprehensive responses
573
- system_prompt = """
574
- You are an exceptionally helpful, clear, and friendly AI research assistant. Your goal is to provide comprehensive, well-structured, and insightful answers based on the provided document context.
575
-
576
- Guidelines for your response:
577
-
578
- 1. USE ONLY the information contained in the provided context documents to form your answer. If the context doesn't contain enough information to provide a complete answer, acknowledge this limitation clearly.
579
-
580
- 2. Always provide well-structured, detailed responses between 300-500 words that thoroughly address the user's question.
581
-
582
- 3. Format your response with clear headings, bullet points, or numbered lists when appropriate to enhance readability.
583
-
584
- 4. Cite your sources by referring to the document numbers (e.g., "According to Document 1...") to support your claims.
585
-
586
- 5. Use a friendly, conversational, and supportive tone that makes complex information accessible.
587
-
588
- 6. If different documents offer conflicting information, acknowledge these differences and present both perspectives without bias.
589
-
590
- 7. When appropriate, organize information into logical categories or chronological order to improve clarity.
591
-
592
- 8. Use examples from the documents to illustrate key points when available.
593
-
594
- 9. Conclude with a brief summary of the main points if the answer is complex.
595
-
596
- 10. Remember to stay focused on the user's specific question while providing sufficient context for complete understanding.
597
- """
598
-
599
- try:
600
- logger.info(f"Generating response with OpenAI ({self.config.openai_model})")
601
-
602
- start_time = datetime.now()
603
- response = self.openai_client.chat.completions.create(
604
- model=self.config.openai_model,
605
- messages=[
606
- {"role": "system", "content": system_prompt},
607
- {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {query}"}
608
- ],
609
- temperature=self.config.temperature,
610
- max_tokens=self.config.max_tokens,
611
  )
612
 
613
- generation_time = (datetime.now() - start_time).total_seconds()
614
- response_text = response.choices[0].message.content
615
-
616
- logger.info(f"Generated response with OpenAI in {generation_time:.2f} seconds")
617
- return response_text
618
- except Exception as e:
619
- error_msg = f"Error generating response with OpenAI: {str(e)}"
620
- logger.error(error_msg)
621
- return f"I encountered an error while generating your response. Please try again or check your API key. Error details: {str(e)}"
622
-
623
- def generate_response_gemini(self, query: str, context: str) -> str:
624
- """
625
- Generate a response using Gemini with context.
626
-
627
- Args:
628
- query (str): User query
629
- context (str): Formatted document context
630
 
631
- Returns:
632
- str: Generated response
633
- """
634
- if not self.gemini_configured:
635
- logger.warning("Gemini API key not configured for response generation")
636
- return "Please configure a Google AI API key to use this feature. Enter your API key in the field and click 'Save API Key'."
637
-
638
- # Improved Gemini prompt for more comprehensive and user-friendly responses
639
- prompt = f"""
640
- You are a knowledgeable and friendly research assistant who excels at providing clear, comprehensive, and well-structured responses. Your goal is to help users understand complex information from documents in an accessible way.
641
-
642
- **Guidelines for Your Response:**
643
-
644
- - Create a detailed, well-organized response of approximately 300-500 words that thoroughly addresses the user's question.
645
- - Use ONLY information from the provided context documents.
646
- - Structure your answer with clear paragraphs, and use headings, bullet points, or numbered lists when appropriate.
647
- - Maintain a friendly, conversational tone that makes information accessible and engaging.
648
- - When citing information, reference specific documents by number (e.g., "As mentioned in Document 2...").
649
- - If the context doesn't contain enough information for a complete answer, acknowledge this limitation while providing what you can from the available context.
650
- - If documents contain conflicting information, present both perspectives fairly.
651
- - Conclude with a brief summary if the topic is complex.
652
-
653
- **Context Documents:**
654
- {context}
655
-
656
- **User's Question:**
657
- {query}
658
-
659
- **Your Response:**
660
- """
661
-
662
- try:
663
- logger.info(f"Generating response with Gemini ({self.config.gemini_model})")
664
 
665
- start_time = datetime.now()
666
- model = genai.GenerativeModel(self.config.gemini_model)
 
 
 
 
667
 
668
- generation_config = {
669
- "temperature": self.config.temperature,
670
- "max_output_tokens": self.config.max_tokens,
671
- "top_p": 0.9,
672
- "top_k": 40
673
- }
674
 
675
- response = model.generate_content(
676
- prompt,
677
- generation_config=generation_config
 
 
 
678
  )
679
 
680
- generation_time = (datetime.now() - start_time).total_seconds()
681
- response_text = response.text
682
-
683
- logger.info(f"Generated response with Gemini in {generation_time:.2f} seconds")
684
- return response_text
685
- except Exception as e:
686
- error_msg = f"Error generating response with Gemini: {str(e)}"
687
- logger.error(error_msg)
688
- return f"I encountered an error while generating your response. Please try again or check your API key. Error details: {str(e)}"
689
-
690
- def query_and_generate(self,
691
- query: str,
692
- n_results: int = 5,
693
- model: str = "openai") -> Tuple[str, str]:
694
- """
695
- Retrieve relevant documents and generate a response using the specified model.
696
 
697
- Args:
698
- query (str): User query
699
- n_results (int): Number of documents to retrieve
700
- model (str): Model provider to use ('openai' or 'gemini')
701
-
702
- Returns:
703
- Tuple[str, str]: (Generated response, Search results)
704
- """
705
- if not query.strip():
706
- logger.warning("Empty query received")
707
- return "Please enter a question to get a response.", "No search performed."
708
 
709
- logger.info(f"Processing query: '{query[:50]}...' with {model} model")
 
 
710
 
711
- # Query vector store
712
- documents = self.vector_store.query(query, n_results=n_results)
 
 
 
 
 
 
713
 
714
- # Format search results (for logs and hidden UI component)
715
- # We'll format this in a way that's more useful for reference but not shown in UI
716
- formatted_results = []
717
- for i, res in enumerate(documents):
718
- metadata = res['metadata']
719
- title = metadata.get('title', metadata.get('filename', 'Unknown'))
720
- score = res['score']
721
-
722
- # Only include a very brief preview for reference
723
- preview = res['document'][:100] + '...' if len(res['document']) > 100 else res['document']
724
- formatted_results.append(f"Document {i+1}: {title} (Relevance: {score:.2f})")
725
 
726
- search_output_text = "\n".join(formatted_results) if formatted_results else "No relevant documents found."
 
 
 
 
727
 
728
- if not documents:
729
- logger.warning("No relevant documents found")
730
- return "I couldn't find relevant information in the knowledge base to answer your question. Could you try rephrasing your question or ask about a different topic?", search_output_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
731
 
732
- # Format context
733
- context = self.format_context(documents)
734
 
735
- # Generate response with the appropriate model
736
- if model == "openai":
737
- response = self.generate_response_openai(query, context)
738
- elif model == "gemini":
739
- response = self.generate_response_gemini(query, context)
740
- else:
741
- error_msg = f"Unknown model: {model}"
742
- logger.error(error_msg)
743
- return error_msg, search_output_text
744
 
745
- return response, search_output_text
746
-
747
- def get_db_stats(vector_store: VectorStoreManager) -> str:
748
- """
749
- Function to get vector store statistics.
750
 
751
- Args:
752
- vector_store (VectorStoreManager): Vector store manager
753
-
754
- Returns:
755
- str: Formatted statistics string
756
- """
757
- try:
758
- stats = vector_store.get_statistics()
759
- total_docs = stats.get('total_documents', 0)
760
-
761
- stats_text = f"Documents in knowledge base: {total_docs}"
762
- return stats_text
763
  except Exception as e:
764
- logger.error(f"Error getting statistics: {e}")
765
- return "Error getting database statistics"
766
- # Find and fix the `try` block around line 828
767
-
768
- # The error occurs because there's likely a `try` statement without a matching `except` or `finally`
769
- # Here are the possible locations and fixes:
770
-
771
-
772
 
773
  def main():
774
  """Main function to run the RAG application"""
@@ -1024,7 +589,7 @@ def main():
1024
  fn=lambda q: query_and_search(q, num_results.value, model_choice.value, temperature_slider.value, max_tokens_slider.value),
1025
  cache_examples=False,
1026
  )
1027
-
1028
  # Launch the interface with a nice theme
1029
  app.launch(
1030
  share=False, # Set to True to create a public link
@@ -1035,7 +600,45 @@ def main():
1035
  favicon_path="favicon.ico" if os.path.exists("favicon.ico") else None,
1036
  show_error=True
1037
  )
 
1038
  except Exception as e:
1039
  logger.critical(f"Error starting application: {e}")
1040
  print(f"Error starting application: {e}")
1041
- sys.exit(1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def main():
2
+ """Main function to run the RAG application"""
3
+ # Path for configuration file
4
+ CONFIG_FILE_PATH = "rag_config.json"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ try:
7
+ # Try to load configuration from file, or use defaults
8
+ if os.path.exists(CONFIG_FILE_PATH):
9
+ config = Config.from_file(CONFIG_FILE_PATH)
10
+ else:
11
+ config = Config(
12
+ local_dir="./chroma_db", # Store Chroma files in dedicated directory
13
+ collection_name="markdown_docs"
14
+ )
15
+ # Save default configuration
16
+ config.save_to_file(CONFIG_FILE_PATH)
17
 
18
+ print(f"Starting Document Knowledge Assistant v{VERSION}")
19
+ print(f"Log file: {log_file}")
 
 
 
 
 
 
 
 
 
 
20
 
21
+ # Initialize vector store manager with existing collection
22
+ vector_store = VectorStoreManager(config)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
+ # Initialize RAG system without API keys initially
25
+ rag_system = RAGSystem(vector_store, config)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ # Create the Gradio interface with custom CSS
28
+ with gr.Blocks(title="Document Knowledge Assistant", css=custom_css) as app:
29
+ gr.Markdown(f"# Document Knowledge Assistant v{VERSION}")
30
+ gr.Markdown("Ask questions about your documents and get comprehensive AI-powered answers")
 
 
 
 
 
 
 
31
 
32
+ # Main layout
33
+ with gr.Row():
34
+ # Left column for asking questions
35
+ with gr.Column(scale=3):
36
+ with gr.Box():
37
+ gr.Markdown("### Ask Your Question")
38
+ query_input = gr.Textbox(
39
+ label="",
40
+ placeholder="What would you like to know about your documents?",
41
+ lines=3
42
+ )
43
+
44
+ with gr.Row():
45
+ query_button = gr.Button("Ask Question", variant="primary", scale=3)
46
+ clear_button = gr.Button("Clear", variant="secondary", scale=1)
47
+
48
+ with gr.Box():
49
+ gr.Markdown("### Answer")
50
+ response_output = gr.Markdown()
 
 
 
 
 
 
 
 
51
 
52
+ # Right column for settings
53
+ with gr.Column(scale=1):
54
+ # API Keys and model selection
55
+ with gr.Accordion("AI Model Settings", open=True):
56
+ gr.Markdown("### AI Configuration")
57
+ model_choice = gr.Radio(
58
+ choices=["openai", "gemini"],
59
+ value="openai",
60
+ label="AI Provider",
61
+ info=f"Select your preferred AI model"
62
+ )
63
+
64
+ api_key_input = gr.Textbox(
65
+ label="API Key",
66
+ placeholder="Enter your API key here...",
67
+ type="password",
68
+ info="Your key is not stored between sessions"
69
+ )
70
+
71
+ save_key_button = gr.Button("Save API Key", variant="primary")
72
+ api_status = gr.Markdown("")
73
+
74
+ # Advanced search controls
75
+ with gr.Accordion("Advanced Settings", open=False):
76
+ gr.Markdown("### Search & Response Settings")
77
+ num_results = gr.Slider(
78
+ minimum=3,
79
+ maximum=15,
80
+ value=config.default_top_k,
81
+ step=1,
82
+ label="Documents to search",
83
+ info="Higher values provide more context"
84
+ )
85
+
86
+ temperature_slider = gr.Slider(
87
+ minimum=0.0,
88
+ maximum=1.0,
89
+ value=config.temperature,
90
+ step=0.05,
91
+ label="Creativity",
92
+ info="Lower = more factual, Higher = more creative"
93
+ )
94
+
95
+ max_tokens_slider = gr.Slider(
96
+ minimum=500,
97
+ maximum=4000,
98
+ value=config.max_tokens,
99
+ step=100,
100
+ label="Response Length",
101
+ info="Maximum words in response"
102
+ )
103
+
104
+ # Database stats - simplified
105
+ with gr.Accordion("System Info", open=False):
106
+ stats_display = gr.Markdown(get_db_stats(vector_store))
107
+
108
+ gr.Markdown(f"""
109
+ **System Details:**
110
+ - Version: {VERSION}
111
+ - Embedding: {vector_store.embedding_engine.model_name}
112
+ - Device: {vector_store.embedding_engine.device}
113
+ """)
114
+ refresh_button = gr.Button("Refresh", variant="secondary", size="sm")
115
 
116
+ # Hidden element for search results (not visible to user)
117
+ with gr.Accordion("Debug Information", open=False, visible=False):
118
+ search_output = gr.Markdown()
 
 
 
119
 
120
+ # Query history at the bottom (optional section)
121
+ with gr.Accordion("Recent Questions", open=False):
122
+ history_list = gr.Dataframe(
123
+ headers=["Time", "Question", "Model"],
124
+ datatype=["str", "str", "str"],
125
+ row_count=5,
126
+ col_count=(3, "fixed"),
127
+ interactive=False
128
+ )
 
129
 
130
+ # Footer
131
+ gr.Markdown(
132
+ """<div class="footer">Document Knowledge Assistant helps you get insights from your documents using AI.
133
+ Powered by Retrieval Augmented Generation.</div>"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  )
135
 
136
+ # Query history storage
137
+ query_history = []
 
 
 
 
 
 
 
 
 
 
138
 
139
+ # Function to update API key based on selected model
140
+ def update_api_key(api_key, model):
141
+ if not api_key.strip():
142
+ return "❌ API key cannot be empty"
143
+
144
+ if model == "openai":
145
+ success = rag_system.setup_openai(api_key)
146
+ model_name = f"OpenAI {config.openai_model}"
147
+ else:
148
+ success = rag_system.setup_gemini(api_key)
149
+ model_name = f"Google {config.gemini_model}"
150
+
151
+ if success:
152
+ return f"✅ {model_name} connected successfully"
153
+ else:
154
+ return f"❌ Connection failed. Please check your API key and try again."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
+ # Query function that returns both response and search results
157
+ def query_and_search(query, n_results, model, temperature, max_tokens):
158
+ # Update configuration with current UI values
159
+ config.temperature = float(temperature)
160
+ config.max_tokens = int(max_tokens)
161
+
162
+ start_time = datetime.now()
163
+
164
+ if not query.strip():
165
+ return "Please enter a question to get an answer.", "", query_history[-5:] if query_history else []
166
+
167
  try:
168
+ # Verify that API keys are configured
169
+ if (model == "openai" and rag_system.openai_client is None) or \
170
+ (model == "gemini" and not rag_system.gemini_configured):
171
+ return "Please configure your API key first. Enter your API key in the settings panel and click 'Save API Key'.", "", query_history[-5:] if query_history else []
172
+
173
+ # Call the RAG system's query and generate function
174
+ response, search_output_text = rag_system.query_and_generate(
175
+ query=query,
176
+ n_results=int(n_results),
177
+ model=model
178
+ )
179
+
180
+ # Add to history
181
+ timestamp = datetime.now().strftime("%H:%M")
182
+ query_history.append([timestamp, query, model])
183
+
184
+ # Keep only the last 100 queries
185
+ if len(query_history) > 100:
186
+ query_history.pop(0)
187
+
188
+ # Update the history display with the most recent entries (reverse chronological)
189
+ recent_history = list(reversed(query_history[-5:])) if len(query_history) >= 5 else list(reversed(query_history))
190
+
191
+ # Calculate elapsed time
192
+ elapsed_time = (datetime.now() - start_time).total_seconds()
193
+
194
+ # Add subtle timing information to the response
195
+ response_with_timing = f"{response}\n\n<small>Answered in {elapsed_time:.1f}s</small>"
196
+
197
+ return response_with_timing, search_output_text, recent_history
198
+
199
  except Exception as e:
200
+ error_msg = f"Error processing query: {str(e)}"
201
+ logger.error(error_msg)
202
+ logger.error(traceback.format_exc())
203
+ return "I encountered an error while processing your question. Please try again or check your API key settings.", "", query_history[-5:] if query_history else []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
 
205
+ # Function to clear the input and results
206
+ def clear_inputs():
207
+ return "", "", "", query_history[-5:] if query_history else []
 
 
 
208
 
209
+ # Set up events
210
+ save_key_button.click(
211
+ fn=update_api_key,
212
+ inputs=[api_key_input, model_choice],
213
+ outputs=api_status
 
 
 
 
 
 
214
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
 
216
+ query_button.click(
217
+ fn=query_and_search,
218
+ inputs=[query_input, num_results, model_choice, temperature_slider, max_tokens_slider],
219
+ outputs=[response_output, search_output, history_list]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  )
221
 
222
+ refresh_button.click(
223
+ fn=lambda: get_db_stats(vector_store),
224
+ inputs=None,
225
+ outputs=stats_display
226
+ )
 
 
 
 
 
 
 
 
 
 
 
 
227
 
228
+ clear_button.click(
229
+ fn=clear_inputs,
230
+ inputs=None,
231
+ outputs=[query_input, response_output, search_output, history_list]
232
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
+ # Handle Enter key in query input
235
+ query_input.submit(
236
+ fn=query_and_search,
237
+ inputs=[query_input, num_results, model_choice, temperature_slider, max_tokens_slider],
238
+ outputs=[response_output, search_output, history_list]
239
+ )
240
 
241
+ # Auto-fill examples
242
+ examples = [
243
+ ["What are the main features of this application?"],
244
+ ["How does the retrieval augmented generation work?"],
245
+ ["Can you explain the embedding models used in this system?"],
246
+ ]
247
 
248
+ gr.Examples(
249
+ examples=examples,
250
+ inputs=query_input,
251
+ outputs=[response_output, search_output, history_list],
252
+ fn=lambda q: query_and_search(q, num_results.value, model_choice.value, temperature_slider.value, max_tokens_slider.value),
253
+ cache_examples=False,
254
  )
255
 
256
+ # Launch the interface with a nice theme
257
+ app.launch(
258
+ share=False, # Set to True to create a public link
259
+ server_name="0.0.0.0", # Listen on all interfaces
260
+ server_port=7860, # Default Gradio port
261
+ debug=False, # Set to True during development
262
+ auth=None, # Add (username, password) tuple for basic auth
263
+ favicon_path="favicon.ico" if os.path.exists("favicon.ico") else None,
264
+ show_error=True
265
+ )
266
+ except Exception as e:
267
+ logger.critical(f"Error starting application: {e}")
268
+ print(f"Error starting application: {e}")
269
+ sys.exit(1)
270
+ try:
271
+ logger.info(f"Loading document: {file_path}")
272
 
273
+ # Initialize components
274
+ config = Config()
275
+ vector_store = VectorStoreManager(config)
 
 
 
 
 
 
 
 
276
 
277
+ # Read the file with different encodings if needed
278
+ content = None
279
+ encodings = ['utf-8', 'latin-1', 'cp1252']
280
 
281
+ for encoding in encodings:
282
+ try:
283
+ with open(file_path, 'r', encoding=encoding) as f:
284
+ content = f.read()
285
+ logger.info(f"Successfully read file with {encoding} encoding")
286
+ break
287
+ except UnicodeDecodeError:
288
+ logger.warning(f"Failed to read with {encoding} encoding, trying next...")
289
 
290
+ if content is None:
291
+ logger.error(f"Failed to read file with any encoding: {file_path}")
292
+ return False
 
 
 
 
 
 
 
 
293
 
294
+ # Extract metadata
295
+ file_name = os.path.basename(file_path)
296
+ file_ext = os.path.splitext(file_name)[1].lower()
297
+ file_size = os.path.getsize(file_path)
298
+ file_mtime = os.path.getmtime(file_path)
299
 
300
+ # Try to extract title from content for better reference
301
+ title = file_name
302
+ try:
303
+ # Simple heuristic to find a title (first non-empty line)
304
+ lines = content.split('\n')
305
+ for line in lines:
306
+ line = line.strip()
307
+ if line and len(line) < 100: # Reasonable title length
308
+ title = line
309
+ break
310
+ except:
311
+ pass
312
+
313
+ # Create metadata
314
+ metadata = {
315
+ 'filename': file_name,
316
+ 'title': title,
317
+ 'path': file_path,
318
+ 'extension': file_ext,
319
+ 'size': file_size,
320
+ 'modified': datetime.fromtimestamp(file_mtime).isoformat(),
321
+ 'created_at': datetime.now().isoformat()
322
+ }
323
 
324
+ # Generate a unique ID for the document
325
+ doc_id = f"{file_name}_{hash(content)}"
326
 
327
+ # Add to vector store
328
+ success = vector_store.add_document(content, doc_id, metadata)
 
 
 
 
 
 
 
329
 
330
+ logger.info(f"Document loaded successfully: {file_path}" if success else f"Failed to load document: {file_path}")
331
+ return success
 
 
 
332
 
 
 
 
 
 
 
 
 
 
 
 
 
333
  except Exception as e:
334
+ logger.error(f"Error loading document {file_path}: {e}")
335
+ logger.error(traceback.format_exc())
336
+ return False
 
 
 
 
 
337
 
338
  def main():
339
  """Main function to run the RAG application"""
 
589
  fn=lambda q: query_and_search(q, num_results.value, model_choice.value, temperature_slider.value, max_tokens_slider.value),
590
  cache_examples=False,
591
  )
592
+
593
  # Launch the interface with a nice theme
594
  app.launch(
595
  share=False, # Set to True to create a public link
 
600
  favicon_path="favicon.ico" if os.path.exists("favicon.ico") else None,
601
  show_error=True
602
  )
603
+
604
  except Exception as e:
605
  logger.critical(f"Error starting application: {e}")
606
  print(f"Error starting application: {e}")
607
+ sys.exit(1)
608
+
609
+ if __name__ == "__main__":
610
+ # Parse command line arguments
611
+ if len(sys.argv) > 1:
612
+ if sys.argv[1] == "--load" and len(sys.argv) > 2:
613
+ # Load documents mode
614
+ print(f"Document Knowledge Assistant v{VERSION}")
615
+ print(f"Loading documents into knowledge base...")
616
+
617
+ success_count = 0
618
+ failed_count = 0
619
+
620
+ for file_path in sys.argv[2:]:
621
+ if os.path.exists(file_path):
622
+ success = load_document(file_path)
623
+ if success:
624
+ success_count += 1
625
+ print(f"✅ Successfully loaded: {file_path}")
626
+ else:
627
+ failed_count += 1
628
+ print(f"❌ Failed to load: {file_path}")
629
+ else:
630
+ failed_count += 1
631
+ print(f"❌ File not found: {file_path}")
632
+
633
+ print(f"\nLoading complete: {success_count} documents loaded, {failed_count} failed")
634
+ sys.exit(0)
635
+ elif sys.argv[1] == "--help":
636
+ print(f"Document Knowledge Assistant v{VERSION}")
637
+ print("Usage:")
638
+ print(" python rag_system.py # Start the web UI")
639
+ print(" python rag_system.py --load file1 file2 # Load documents into the knowledge base")
640
+ print(" python rag_system.py --help # Show this help message")
641
+ sys.exit(0)
642
+
643
+ # Start the web UI
644
+ main()