Spaces:

jzou19950715
/

RAG_Test

Sleeping

App Files Files Community

jzou19950715 commited on 20 days ago

Commit

78841ad

verified ·

1 Parent(s): 6bf58ba

Update app.py

Browse files

Files changed (1) hide show

app.py +616 -223

app.py CHANGED Viewed

@@ -4,48 +4,153 @@ import logging
 from pathlib import Path
 import json
 from datetime import datetime
-from typing import List, Dict, Any, Optional
-# Configure logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
-# Importing necessary libraries
-import torch
-import numpy as np
-from sentence_transformers import SentenceTransformer
-import chromadb
-from chromadb.utils import embedding_functions
-import gradio as gr
-from openai import OpenAI
-import google.generativeai as genai
-# Configuration class
 class Config:
-    """Configuration for vector store and RAG"""
     def __init__(self,
-                 local_dir: str = ".",
                  embedding_model: str = "all-MiniLM-L6-v2",
-                 collection_name: str = "markdown_docs"):
         self.local_dir = local_dir
         self.embedding_model = embedding_model
         self.collection_name = collection_name
-# Embedding engine
 class EmbeddingEngine:
-    """Handle embeddings with a lightweight model"""
     def __init__(self, model_name="all-MiniLM-L6-v2"):
         # Use GPU if available
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        logger.info(f"Using device: {self.device}")
         # Try multiple model options in order of preference
         model_options = [
             model_name,
-            "all-MiniLM-L6-v2",
-            "paraphrase-MiniLM-L3-v2",
-            "all-mpnet-base-v2"  # Higher quality but larger model
         ]
         self.model = None
@@ -53,47 +158,99 @@ class EmbeddingEngine:
         # Try each model in order until one works
         for model_option in model_options:
             try:
-                logger.info(f"Attempting to load model: {model_option}")
                 self.model = SentenceTransformer(model_option)
                 # Move model to device
                 self.model.to(self.device)
-                logger.info(f"Successfully loaded model: {model_option}")
                 self.model_name = model_option
                 self.vector_size = self.model.get_sentence_embedding_dimension()
                 break
             except Exception as e:
-                logger.warning(f"Failed to load model {model_option}: {str(e)}")
         if self.model is None:
-            logger.error("Failed to load any embedding model. Exiting.")
-            sys.exit(1)
 class VectorStoreManager:
-    """Manage Chroma vector store operations - upload, query, etc."""
     def __init__(self, config: Config):
         self.config = config
         # Initialize Chroma client (local persistence)
         logger.info(f"Initializing Chroma at {config.local_dir}")
-        self.client = chromadb.PersistentClient(path=config.local_dir)
         # Get or create collection
         try:
             # Initialize embedding model
             logger.info("Loading embedding model...")
             self.embedding_engine = EmbeddingEngine(config.embedding_model)
-            logger.info(f"Using model: {self.embedding_engine.model_name}")
             # Create embedding function
             sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(
                 model_name=self.embedding_engine.model_name
             )
-            # Try to get existing collection
             try:
                 self.collection = self.client.get_collection(
                     name=config.collection_name,
@@ -101,7 +258,7 @@ class VectorStoreManager:
                 )
                 logger.info(f"Using existing collection: {config.collection_name}")
             except Exception as e:
-                logger.error(f"Error getting collection: {e}")
                 # Attempt to get a list of available collections
                 collections = self.client.list_collections()
                 if collections:
@@ -122,19 +279,33 @@ class VectorStoreManager:
                     logger.info(f"Created new collection: {config.collection_name}")
         except Exception as e:
-            logger.error(f"Error initializing Chroma collection: {e}")
-            sys.exit(1)
     def query(self, query_text: str, n_results: int = 5) -> List[Dict]:
         """
-        Query the vector store with a text query
         """
         try:
             # Query the collection
             search_results = self.collection.query(
                 query_texts=[query_text],
                 n_results=n_results,
-                include=["documents", "metadatas", "distances"]
             )
             # Format results
@@ -143,26 +314,106 @@ class VectorStoreManager:
                 for i in range(len(search_results["documents"][0])):
                     results.append({
                         'document': search_results["documents"][0][i],
-                        'metadata': search_results["metadatas"][0][i],
-                        'score': 1.0 - search_results["distances"][0][i]  # Convert distance to similarity
                     })
             return results
         except Exception as e:
             logger.error(f"Error querying collection: {e}")
             return []
     def get_statistics(self) -> Dict[str, Any]:
-        """Get statistics about the vector store"""
-        stats = {}
         try:
             # Get collection count
-            collection_info = self.collection.count()
-            stats['total_documents'] = collection_info
-            # Estimate unique files - with no chunking, each document is a file
-            stats['unique_files'] = collection_info
         except Exception as e:
             logger.error(f"Error getting statistics: {e}")
             stats['error'] = str(e)
@@ -170,280 +421,422 @@ class VectorStoreManager:
         return stats
 class RAGSystem:
-    """Retrieval-Augmented Generation with multiple LLM providers"""
-    def __init__(self, vector_store: VectorStoreManager):
         self.vector_store = vector_store
         self.openai_client = None
         self.gemini_configured = False
-    def setup_openai(self, api_key: str):
-        """Set up OpenAI client with API key"""
         try:
             self.openai_client = OpenAI(api_key=api_key)
             return True
         except Exception as e:
             logger.error(f"Error initializing OpenAI client: {e}")
             return False
-    def setup_gemini(self, api_key: str):
-        """Set up Gemini with API key"""
         try:
             genai.configure(api_key=api_key)
             self.gemini_configured = True
             return True
         except Exception as e:
             logger.error(f"Error configuring Gemini: {e}")
             return False
     def format_context(self, documents: List[Dict]) -> str:
-        """Format retrieved documents into context for the LLM"""
         if not documents:
             return "No relevant documents found."
         context_parts = []
         for i, doc in enumerate(documents):
             metadata = doc['metadata']
             title = metadata.get('title', metadata.get('filename', 'Unknown document'))
             # For readability, limit length of context document
             doc_text = doc['document']
-            if len(doc_text) > 10000:  # Limit long documents in context
-                doc_text = doc_text[:10000] + "... [Document truncated for context]"
-            context_parts.append(f"Document {i+1} - {title}:\n{doc_text}\n")
-        return "\n".join(context_parts)
     def generate_response_openai(self, query: str, context: str) -> str:
-        """Generate a response using OpenAI model with context"""
         if not self.openai_client:
             return "Error: OpenAI API key not configured. Please enter an API key in the API key field."
         system_prompt = """
-        You are a helpful assistant that answers questions based on the context provided.
-        Use the information from the context to answer the user's question.
-        If the context doesn't contain the information needed, say so clearly.
-        Always cite the specific sections from the context that you used in your answer.
         """
         try:
             response = self.openai_client.chat.completions.create(
-                model="gpt-4o-mini",  # Use GPT-4o mini
                 messages=[
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {query}"}
                 ],
-                temperature=0.3,  # Lower temperature for more factual responses
-                max_tokens=1000,
             )
-            return response.choices[0].message.content
         except Exception as e:
-            logger.error(f"Error generating response with OpenAI: {e}")
-            return f"Error generating response with OpenAI: {str(e)}"
     def generate_response_gemini(self, query: str, context: str) -> str:
-        """Generate a response using Gemini with context"""
         if not self.gemini_configured:
             return "Error: Google AI API key not configured. Please enter an API key in the API key field."
-prompt = f"""
         You are a highly supportive and insightful assistant dedicated to providing clear, helpful, and well-structured answers based on the given context. Your goal is to ensure the user receives a thorough, encouraging, and informative response that directly addresses their question.
         **Guidelines for Your Response:**
-        - Use the **context** to form a detailed and well-reasoned answer.
         - If the context lacks sufficient information, state it clearly while offering general insights or related knowledge.
-        - Cite specific sections from the context that contribute to your response.
         - Maintain a **friendly, professional, and supportive** tone that encourages user engagement.
         - Aim for **clarity and depth**, breaking down complex ideas into easy-to-understand explanations.
-        - Strive for a response length of **300-500 words**, ensuring **both completeness and readability**.
         **Context:**
         {context}
-        **User’s Question:**
         {query}
         **Your Response:**
-"""
         try:
-            model = genai.GenerativeModel('gemini-1.5-flash')
-            response = model.generate_content(prompt)
-            return response.text
         except Exception as e:
-            logger.error(f"Error generating response with Gemini: {e}")
-            return f"Error generating response with Gemini: {str(e)}"
-    def query_and_generate(self, query: str, n_results: int = 5, model: str = "openai") -> str:
-        """Retrieve relevant documents and generate a response using the specified model"""
         # Query vector store
         documents = self.vector_store.query(query, n_results=n_results)
         if not documents:
-            return "No relevant documents found to answer your question."
         # Format context
         context = self.format_context(documents)
         # Generate response with the appropriate model
         if model == "openai":
-            return self.generate_response_openai(query, context)
         elif model == "gemini":
-            return self.generate_response_gemini(query, context)
         else:
-            return f"Unknown model: {model}"
-# Main function to run the application
 def main():
-    # Initialize the system with current directory as the Chroma location
-    config = Config(
-        local_dir=".",  # Look for Chroma files in current directory
-        collection_name="markdown_docs"
-    )
     try:
         # Initialize vector store manager with existing collection
         vector_store = VectorStoreManager(config)
         # Initialize RAG system without API keys initially
-        rag_system = RAGSystem(vector_store)
         # Create the Gradio interface
-        with gr.Blocks(title="Document RAG System") as app:
-            gr.Markdown("# Document RAG System")
             with gr.Row():
                 with gr.Column(scale=1):
                     # API Keys and model selection
-                    model_choice = gr.Radio(
-                        choices=["openai", "gemini"],
-                        value="openai",
-                        label="Choose LLM Provider",
-                        info="Select which model to use (GPT-4o mini or Gemini 1.5 Flash)"
-                    )
-                    api_key_input = gr.Textbox(
-                        label="API Key",
-                        placeholder="Enter your API key here...",
-                        type="password"
-                    )
-                    save_key_button = gr.Button("Save API Key", variant="primary")
-                    api_status = gr.Markdown("")
                     # Search controls
-                    num_results = gr.Slider(
-                        minimum=1,
-                        maximum=10,
-                        value=10,
-                        step=1,
-                        label="Number of documents to retrieve"
-                    )
-                    # Database stats
-                    gr.Markdown("### Database Statistics")
-                    stats_display = gr.Textbox(
-                        label="",
-                        value=get_db_stats(vector_store),
-                        lines=2
-                    )
-                    refresh_button = gr.Button("Refresh Stats")
-                with gr.Column(scale=2):
-                    # Query and response
-                    query_input = gr.Textbox(
-                        label="Your Question",
-                        placeholder="Ask a question about your documents...",
-                        lines=2
-                    )
-                    query_button = gr.Button("Ask Question", variant="primary")
-                    gr.Markdown("### Response")
-                    response_output = gr.Markdown()
-                    gr.Markdown("### Document Search Results")
-                    search_output = gr.Markdown()
-            # Function to update API key based on selected model
-            def update_api_key(api_key, model):
-                if model == "openai":
-                    success = rag_system.setup_openai(api_key)
-                    model_name = "OpenAI GPT-4o mini"
-                else:
-                    success = rag_system.setup_gemini(api_key)
-                    model_name = "Google Gemini 1.5 Flash"
-                if success:
-                    return f"✅ {model_name} API key configured successfully"
-                else:
-                    return f"❌ Failed to configure {model_name} API key"
-            # Query function that returns both response and search results
-            def query_and_search(query, n_results, model):
-                # Get search results first
-                results = vector_store.query(query, n_results=int(n_results))
-                # Format search results
-                formatted_results = []
-                for i, res in enumerate(results):
-                    metadata = res['metadata']
-                    title = metadata.get('title', metadata.get('filename', 'Unknown'))
-                    preview = res['document'][:500] + '...' if len(res['document']) > 500 else res['document']
-                    formatted_results.append(f"**Result {i+1}** (Similarity: {res['score']:.2f})\n"
-                                          f"**Source:** {title}\n"
-                                          f"**Preview:**\n{preview}\n\n---\n")
-                search_output_text = "\n".join(formatted_results) if formatted_results else "No results found."
-                # Generate response if we have results
-                response = "No documents found to answer your question."
-                if results:
-                    context = rag_system.format_context(results)
-                    if model == "openai":
-                        response = rag_system.generate_response_openai(query, context)
-                    else:
-                        response = rag_system.generate_response_gemini(query, context)
-                return response, search_output_text
-            # Set up events
-            save_key_button.click(
-                fn=update_api_key,
-                inputs=[api_key_input, model_choice],
-                outputs=api_status
-            )
-            query_button.click(
-                fn=query_and_search,
-                inputs=[query_input, num_results, model_choice],
-                outputs=[response_output, search_output]
-            )
-            refresh_button.click(
-                fn=lambda: get_db_stats(vector_store),
-                inputs=None,
-                outputs=stats_display
-            )
-        # Launch the interface
-        app.launch()
-    except Exception as e:
-        logger.error(f"Error initializing application: {e}")
-        print(f"Error: {e}")
-        sys.exit(1)
-# Helper function to get database stats
-def get_db_stats(vector_store):
-    """Function to get vector store statistics"""
-    try:
-        stats = vector_store.get_statistics()
-        return f"Total documents: {stats.get('total_documents', 0)}"
-    except Exception as e:
-        logger.error(f"Error getting statistics: {e}")
-        return "Error getting database statistics"
-if __name__ == "__main__":
-    main()

 from pathlib import Path
 import json
 from datetime import datetime
+from typing import List, Dict, Any, Optional, Tuple, Union
+import traceback
+# Configure detailed logging with file output
+LOG_DIR = "logs"
+os.makedirs(LOG_DIR, exist_ok=True)
+log_file = os.path.join(LOG_DIR, f"rag_system_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
+# Set up root logger with both file and console handlers
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler(log_file),
+        logging.StreamHandler(sys.stdout)
+    ]
+)
+logger = logging.getLogger("rag_system")
+logger.info(f"Starting RAG system. Log file: {log_file}")
+# Importing necessary libraries with error handling
+try:
+    import torch
+    import numpy as np
+    from sentence_transformers import SentenceTransformer
+    import chromadb
+    from chromadb.utils import embedding_functions
+    import gradio as gr
+    from openai import OpenAI
+    import google.generativeai as genai
+    logger.info("All required libraries successfully imported")
+except ImportError as e:
+    logger.critical(f"Failed to import required libraries: {e}")
+    print(f"ERROR: Missing required libraries. Please install with: pip install -r requirements.txt")
+    print(f"Specific error: {e}")
+    sys.exit(1)
+# Version info for tracking
+VERSION = "1.0.0"
+logger.info(f"RAG System Version: {VERSION}")
 class Config:
+    """
+    Configuration for vector store and RAG system.
+    This class centralizes all configuration parameters for the application,
+    making it easier to modify settings and ensure consistency.
+    Attributes:
+        local_dir (str): Directory for ChromaDB persistence
+        embedding_model (str): Name of the embedding model to use
+        collection_name (str): Name of the ChromaDB collection
+        default_top_k (int): Default number of results to return
+        openai_model (str): Default OpenAI model to use
+        gemini_model (str): Default Gemini model to use
+        temperature (float): Temperature setting for LLM generation
+        max_tokens (int): Maximum tokens for LLM response
+        system_name (str): Name of the system for UI
+    """
     def __init__(self,
+                 local_dir: str = "./chroma_db",
                  embedding_model: str = "all-MiniLM-L6-v2",
+                 collection_name: str = "markdown_docs",
+                 default_top_k: int = 5,
+                 openai_model: str = "gpt-4o-mini",
+                 gemini_model: str = "gemini-1.5-flash",
+                 temperature: float = 0.3,
+                 max_tokens: int = 1000,
+                 system_name: str = "Document RAG System"):
         self.local_dir = local_dir
         self.embedding_model = embedding_model
         self.collection_name = collection_name
+        self.default_top_k = default_top_k
+        self.openai_model = openai_model
+        self.gemini_model = gemini_model
+        self.temperature = temperature
+        self.max_tokens = max_tokens
+        self.system_name = system_name
+        # Create local directory if it doesn't exist
+        os.makedirs(local_dir, exist_ok=True)
+        logger.info(f"Initialized configuration: {self.__dict__}")
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert configuration to dictionary for serialization"""
+        return self.__dict__
+    @classmethod
+    def from_file(cls, config_path: str) -> 'Config':
+        """Load configuration from JSON file"""
+        try:
+            with open(config_path, 'r') as f:
+                config_dict = json.load(f)
+            logger.info(f"Loaded configuration from {config_path}")
+            return cls(**config_dict)
+        except Exception as e:
+            logger.error(f"Failed to load configuration from {config_path}: {e}")
+            logger.info("Using default configuration")
+            return cls()
+    def save_to_file(self, config_path: str) -> bool:
+        """Save configuration to JSON file"""
+        try:
+            with open(config_path, 'w') as f:
+                json.dump(self.to_dict(), f, indent=2)
+            logger.info(f"Saved configuration to {config_path}")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to save configuration to {config_path}: {e}")
+            return False
 class EmbeddingEngine:
+    """
+    Handle embeddings with a lightweight model.
+    This class manages the embedding model used to convert text to vector
+    representations for semantic search.
+    Attributes:
+        model (SentenceTransformer): The loaded embedding model
+        model_name (str): Name of the successfully loaded model
+        vector_size (int): Dimension of the embedding vectors
+        device (str): Device used for inference ('cuda' or 'cpu')
+    """
     def __init__(self, model_name="all-MiniLM-L6-v2"):
+        """
+        Initialize the embedding engine with the specified model.
+        Args:
+            model_name (str): Name of the embedding model to load
+        Raises:
+            SystemExit: If no embedding model could be loaded
+        """
         # Use GPU if available
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        logger.info(f"Using device for embeddings: {self.device}")
         # Try multiple model options in order of preference
         model_options = [
             model_name,
+            "all-MiniLM-L6-v2",      # Good balance of speed and quality
+            "paraphrase-MiniLM-L3-v2", # Faster but less accurate
+            "all-mpnet-base-v2"      # Higher quality but larger model
         ]
         self.model = None
         # Try each model in order until one works
         for model_option in model_options:
             try:
+                logger.info(f"Attempting to load embedding model: {model_option}")
                 self.model = SentenceTransformer(model_option)
                 # Move model to device
                 self.model.to(self.device)
+                logger.info(f"Successfully loaded embedding model: {model_option}")
                 self.model_name = model_option
                 self.vector_size = self.model.get_sentence_embedding_dimension()
+                logger.info(f"Embedding vector size: {self.vector_size}")
                 break
             except Exception as e:
+                logger.warning(f"Failed to load embedding model {model_option}: {str(e)}")
         if self.model is None:
+            error_msg = "Failed to load any embedding model. Please check your internet connection or install models locally."
+            logger.critical(error_msg)
+            raise SystemExit(error_msg)
+    def embed(self, texts: List[str]) -> np.ndarray:
+        """
+        Generate embeddings for a list of texts.
+        Args:
+            texts (List[str]): List of texts to embed
+        Returns:
+            np.ndarray: Array of embeddings
+        Raises:
+            ValueError: If the input is invalid
+            RuntimeError: If embedding fails
+        """
+        if not texts:
+            raise ValueError("Cannot embed empty list of texts")
+        try:
+            embeddings = self.model.encode(texts, convert_to_numpy=True)
+            return embeddings
+        except Exception as e:
+            logger.error(f"Error generating embeddings: {e}")
+            raise RuntimeError(f"Failed to generate embeddings: {e}")
 class VectorStoreManager:
+    """
+    Manage Chroma vector store operations - upload, query, etc.
+    This class provides an interface to the ChromaDB vector database,
+    handling document storage, retrieval, and management.
+    Attributes:
+        config (Config): Configuration parameters
+        client (chromadb.PersistentClient): ChromaDB client
+        collection (chromadb.Collection): The active ChromaDB collection
+        embedding_engine (EmbeddingEngine): Engine for generating embeddings
+    """
     def __init__(self, config: Config):
+        """
+        Initialize the vector store manager.
+        Args:
+            config (Config): Configuration parameters
+        Raises:
+            SystemExit: If the vector store cannot be initialized
+        """
         self.config = config
         # Initialize Chroma client (local persistence)
         logger.info(f"Initializing Chroma at {config.local_dir}")
+        try:
+            self.client = chromadb.PersistentClient(path=config.local_dir)
+            logger.info("ChromaDB client initialized successfully")
+        except Exception as e:
+            error_msg = f"Failed to initialize ChromaDB client: {e}"
+            logger.critical(error_msg)
+            raise SystemExit(error_msg)
         # Get or create collection
         try:
             # Initialize embedding model
             logger.info("Loading embedding model...")
             self.embedding_engine = EmbeddingEngine(config.embedding_model)
+            logger.info(f"Using embedding model: {self.embedding_engine.model_name}")
             # Create embedding function
             sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(
                 model_name=self.embedding_engine.model_name
             )
+            # Try to get existing collection or create a new one
             try:
                 self.collection = self.client.get_collection(
                     name=config.collection_name,
                 )
                 logger.info(f"Using existing collection: {config.collection_name}")
             except Exception as e:
+                logger.warning(f"Error getting collection: {e}")
                 # Attempt to get a list of available collections
                 collections = self.client.list_collections()
                 if collections:
                     logger.info(f"Created new collection: {config.collection_name}")
         except Exception as e:
+            error_msg = f"Error initializing Chroma collection: {e}"
+            logger.critical(error_msg)
+            raise SystemExit(error_msg)
     def query(self, query_text: str, n_results: int = 5) -> List[Dict]:
         """
+        Query the vector store with a text query.
+        Args:
+            query_text (str): The query text
+            n_results (int): Number of results to return
+        Returns:
+            List[Dict]: List of results with document text, metadata, and similarity score
         """
+        if not query_text.strip():
+            logger.warning("Empty query received")
+            return []
         try:
+            logger.info(f"Querying vector store with: '{query_text[:50]}...' (top {n_results})")
             # Query the collection
             search_results = self.collection.query(
                 query_texts=[query_text],
                 n_results=n_results,
+                include=["documents", "metadatas", "distances", "embeddings"]
             )
             # Format results
                 for i in range(len(search_results["documents"][0])):
                     results.append({
                         'document': search_results["documents"][0][i],
+                        'metadata': search_results["metadatas"][0][i] if search_results["metadatas"] else {},
+                        'score': 1.0 - search_results["distances"][0][i],  # Convert distance to similarity
+                        'distance': search_results["distances"][0][i]
                     })
+                logger.info(f"Found {len(results)} results for query")
+            else:
+                logger.info("No results found for query")
             return results
         except Exception as e:
             logger.error(f"Error querying collection: {e}")
+            logger.debug(traceback.format_exc())
             return []
+    def add_document(self,
+                    document: str,
+                    doc_id: str,
+                    metadata: Dict[str, Any]) -> bool:
+        """
+        Add a document to the vector store.
+        Args:
+            document (str): The document text
+            doc_id (str): Unique identifier for the document
+            metadata (Dict[str, Any]): Metadata about the document
+        Returns:
+            bool: True if successful, False otherwise
+        """
+        try:
+            logger.info(f"Adding document '{doc_id}' to vector store")
+            # Add the document to the collection
+            self.collection.add(
+                documents=[document],
+                ids=[doc_id],
+                metadatas=[metadata]
+            )
+            logger.info(f"Successfully added document '{doc_id}'")
+            return True
+        except Exception as e:
+            logger.error(f"Error adding document to collection: {e}")
+            return False
+    def delete_document(self, doc_id: str) -> bool:
+        """
+        Delete a document from the vector store.
+        Args:
+            doc_id (str): ID of the document to delete
+        Returns:
+            bool: True if successful, False otherwise
+        """
+        try:
+            logger.info(f"Deleting document '{doc_id}' from vector store")
+            self.collection.delete(ids=[doc_id])
+            logger.info(f"Successfully deleted document '{doc_id}'")
+            return True
+        except Exception as e:
+            logger.error(f"Error deleting document from collection: {e}")
+            return False
     def get_statistics(self) -> Dict[str, Any]:
+        """
+        Get statistics about the vector store.
+        Returns:
+            Dict[str, Any]: Statistics about the vector store
+        """
+        stats = {
+            'collection_name': self.config.collection_name,
+            'embedding_model': self.embedding_engine.model_name,
+            'embedding_dimensions': self.embedding_engine.vector_size,
+            'device': self.embedding_engine.device
+        }
         try:
             # Get collection count
+            collection_count = self.collection.count()
+            stats['total_documents'] = collection_count
+            # Get unique metadata values
+            if collection_count > 0:
+                try:
+                    # Get a sample of document metadata
+                    sample_results = self.collection.get(limit=min(collection_count, 100))
+                    if sample_results and 'metadatas' in sample_results and sample_results['metadatas']:
+                        # Count unique files if filename exists in metadata
+                        filenames = set()
+                        for metadata in sample_results['metadatas']:
+                            if 'filename' in metadata:
+                                filenames.add(metadata['filename'])
+                        stats['unique_files'] = len(filenames)
+                except Exception as e:
+                    logger.warning(f"Error getting metadata statistics: {e}")
+            logger.info(f"Vector store statistics: {stats}")
         except Exception as e:
             logger.error(f"Error getting statistics: {e}")
             stats['error'] = str(e)
         return stats
 class RAGSystem:
+    """
+    Retrieval-Augmented Generation with multiple LLM providers.
+    This class handles the RAG workflow: retrieval of relevant documents,
+    formatting context, and generating responses with different LLM providers.
+    Attributes:
+        vector_store (VectorStoreManager): Manager for vector store operations
+        openai_client (Optional[OpenAI]): OpenAI client
+        gemini_configured (bool): Whether Gemini API is configured
+        config (Config): Configuration parameters
+    """
+    def __init__(self, vector_store: VectorStoreManager, config: Config):
+        """
+        Initialize the RAG system.
+        Args:
+            vector_store (VectorStoreManager): Vector store manager
+            config (Config): Configuration parameters
+        """
         self.vector_store = vector_store
+        self.config = config
         self.openai_client = None
         self.gemini_configured = False
+        logger.info("Initialized RAG system")
+    def setup_openai(self, api_key: str) -> bool:
+        """
+        Set up OpenAI client with API key.
+        Args:
+            api_key (str): OpenAI API key
+        Returns:
+            bool: True if successful, False otherwise
+        """
+        if not api_key.strip():
+            logger.warning("Empty OpenAI API key provided")
+            return False
         try:
+            logger.info("Setting up OpenAI client")
             self.openai_client = OpenAI(api_key=api_key)
+            # Test the API key with a simple request
+            response = self.openai_client.chat.completions.create(
+                model=self.config.openai_model,
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", "content": "Test connection"}
+                ],
+                max_tokens=10
+            )
+            logger.info("OpenAI client configured successfully")
             return True
         except Exception as e:
             logger.error(f"Error initializing OpenAI client: {e}")
+            self.openai_client = None
             return False
+    def setup_gemini(self, api_key: str) -> bool:
+        """
+        Set up Gemini with API key.
+        Args:
+            api_key (str): Google AI API key
+        Returns:
+            bool: True if successful, False otherwise
+        """
+        if not api_key.strip():
+            logger.warning("Empty Gemini API key provided")
+            return False
         try:
+            logger.info("Setting up Gemini client")
             genai.configure(api_key=api_key)
+            # Test the API key with a simple request
+            model = genai.GenerativeModel(self.config.gemini_model)
+            response = model.generate_content("Test connection")
             self.gemini_configured = True
+            logger.info("Gemini client configured successfully")
             return True
         except Exception as e:
             logger.error(f"Error configuring Gemini: {e}")
+            self.gemini_configured = False
             return False
     def format_context(self, documents: List[Dict]) -> str:
+        """
+        Format retrieved documents into context for the LLM.
+        Args:
+            documents (List[Dict]): List of retrieved documents
+        Returns:
+            str: Formatted context for the LLM
+        """
         if not documents:
+            logger.warning("No documents provided for context formatting")
             return "No relevant documents found."
+        logger.info(f"Formatting {len(documents)} documents for context")
         context_parts = []
         for i, doc in enumerate(documents):
             metadata = doc['metadata']
+            # Extract document metadata in a robust way
             title = metadata.get('title', metadata.get('filename', 'Unknown document'))
+            source = metadata.get('source', metadata.get('path', 'Unknown source'))
+            date = metadata.get('date', metadata.get('created_at', 'Unknown date'))
+            # Format header with metadata
+            header = f"Document {i+1} - {title}"
+            if source != 'Unknown source':
+                header += f" (Source: {source})"
+            if date != 'Unknown date':
+                header += f" (Date: {date})"
             # For readability, limit length of context document
             doc_text = doc['document']
+            if len(doc_text) > 8000:  # Limit long documents in context
+                doc_text = doc_text[:8000] + "... [Document truncated for context]"
+            context_parts.append(f"{header}:\n{doc_text}\n")
+        full_context = "\n".join(context_parts)
+        logger.info(f"Created context with {len(full_context)} characters")
+        return full_context
     def generate_response_openai(self, query: str, context: str) -> str:
+        """
+        Generate a response using OpenAI model with context.
+        Args:
+            query (str): User query
+            context (str): Formatted document context
+        Returns:
+            str: Generated response
+        """
         if not self.openai_client:
+            logger.warning("OpenAI API key not configured for response generation")
             return "Error: OpenAI API key not configured. Please enter an API key in the API key field."
         system_prompt = """
+        You are a helpful, detailed, and accurate assistant that answers questions based on the context provided.
+        Follow these guidelines:
+        1. Use ONLY the information from the context to answer the user's question.
+        2. If the context doesn't contain the information needed, say so clearly and do your best to deduce and infer the answer.
+        3. Always cite the specific documents from the context that you used in your answer by referencing their number (e.g., "According to Document 1...").
+        4. Organize your response in a clear, structured format with headings where appropriate.
+        5. Use the best practices of writings.
+        6. If the information in different documents conflicts, acknowledge this and explain the different perspectives.
+        7. Be specific and detailed in your answers, focusing on accuracy over brevity.
+        8. Aim to be educational and informative in your tone.
+        9. You aim to write between 300-500 words of comprehensive answer to user question.
         """
         try:
+            logger.info(f"Generating response with OpenAI ({self.config.openai_model})")
+            start_time = datetime.now()
             response = self.openai_client.chat.completions.create(
+                model=self.config.openai_model,
                 messages=[
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {query}"}
                 ],
+                temperature=self.config.temperature,
+                max_tokens=self.config.max_tokens,
             )
+            generation_time = (datetime.now() - start_time).total_seconds()
+            response_text = response.choices[0].message.content
+            logger.info(f"Generated response with OpenAI in {generation_time:.2f} seconds")
+            return response_text
         except Exception as e:
+            error_msg = f"Error generating response with OpenAI: {str(e)}"
+            logger.error(error_msg)
+            return f"Error: {error_msg}"
     def generate_response_gemini(self, query: str, context: str) -> str:
+        """
+        Generate a response using Gemini with context.
+        Args:
+            query (str): User query
+            context (str): Formatted document context
+        Returns:
+            str: Generated response
+        """
         if not self.gemini_configured:
+            logger.warning("Gemini API key not configured for response generation")
             return "Error: Google AI API key not configured. Please enter an API key in the API key field."
+        prompt = f"""
         You are a highly supportive and insightful assistant dedicated to providing clear, helpful, and well-structured answers based on the given context. Your goal is to ensure the user receives a thorough, encouraging, and informative response that directly addresses their question.
         **Guidelines for Your Response:**
+        - Use ONLY the information from the **context** to form a detailed and well-reasoned answer.
         - If the context lacks sufficient information, state it clearly while offering general insights or related knowledge.
+        - Cite specific sections from the context by referring to document numbers (e.g., "According to Document 1...").
         - Maintain a **friendly, professional, and supportive** tone that encourages user engagement.
         - Aim for **clarity and depth**, breaking down complex ideas into easy-to-understand explanations.
+        - Organize your response with headings and sections if appropriate.
+        - Do not make up information or use knowledge outside of the provided context.
+        - If information in different documents conflicts, explain the different perspectives.
         **Context:**
         {context}
+        **User's Question:**
         {query}
         **Your Response:**
+        """
         try:
+            logger.info(f"Generating response with Gemini ({self.config.gemini_model})")
+            start_time = datetime.now()
+            model = genai.GenerativeModel(self.config.gemini_model)
+            generation_config = {
+                "temperature": self.config.temperature,
+                "max_output_tokens": self.config.max_tokens,
+                "top_p": 0.9,
+                "top_k": 40
+            }
+            response = model.generate_content(
+                prompt,
+                generation_config=generation_config
+            )
+            generation_time = (datetime.now() - start_time).total_seconds()
+            response_text = response.text
+            logger.info(f"Generated response with Gemini in {generation_time:.2f} seconds")
+            return response_text
         except Exception as e:
+            error_msg = f"Error generating response with Gemini: {str(e)}"
+            logger.error(error_msg)
+            return f"Error: {error_msg}"
+    def query_and_generate(self,
+                          query: str,
+                          n_results: int = 5,
+                          model: str = "openai") -> Tuple[str, str]:
+        """
+        Retrieve relevant documents and generate a response using the specified model.
+        Args:
+            query (str): User query
+            n_results (int): Number of documents to retrieve
+            model (str): Model provider to use ('openai' or 'gemini')
+        Returns:
+            Tuple[str, str]: (Generated response, Search results)
+        """
+        if not query.strip():
+            logger.warning("Empty query received")
+            return "Please enter a question to get a response.", "No search performed."
+        logger.info(f"Processing query: '{query[:50]}...' with {model} model")
         # Query vector store
         documents = self.vector_store.query(query, n_results=n_results)
+        # Format search results
+        formatted_results = []
+        for i, res in enumerate(documents):
+            metadata = res['metadata']
+            title = metadata.get('title', metadata.get('filename', 'Unknown'))
+            preview = res['document'][:500] + '...' if len(res['document']) > 500 else res['document']
+            formatted_results.append(f"**Result {i+1}** (Similarity: {res['score']:.2f})\n"
+                                   f"**Source:** {title}\n"
+                                   f"**Preview:**\n{preview}\n\n---\n")
+        search_output_text = "\n".join(formatted_results) if formatted_results else "No results found."
         if not documents:
+            logger.warning("No relevant documents found")
+            return "No relevant documents found to answer your question.", search_output_text
         # Format context
         context = self.format_context(documents)
         # Generate response with the appropriate model
         if model == "openai":
+            response = self.generate_response_openai(query, context)
         elif model == "gemini":
+            response = self.generate_response_gemini(query, context)
         else:
+            error_msg = f"Unknown model: {model}"
+            logger.error(error_msg)
+            return error_msg, search_output_text
+        return response, search_output_text
+def get_db_stats(vector_store: VectorStoreManager) -> str:
+    """
+    Function to get vector store statistics.
+    Args:
+        vector_store (VectorStoreManager): Vector store manager
+    Returns:
+        str: Formatted statistics string
+    """
+    try:
+        stats = vector_store.get_statistics()
+        total_docs = stats.get('total_documents', 0)
+        unique_files = stats.get('unique_files', 'Unknown')
+        model = stats.get('embedding_model', 'Unknown')
+        device = stats.get('device', 'Unknown')
+        stats_text = [
+            f"Total documents: {total_docs}",
+            f"Unique files: {unique_files}",
+            f"Embedding model: {model}",
+            f"Device: {device}"
+        ]
+        return "\n".join(stats_text)
+    except Exception as e:
+        logger.error(f"Error getting statistics: {e}")
+        return "Error getting database statistics"
 def main():
+    """Main function to run the RAG application"""
+    print(f"Starting {CONFIG_FILE_PATH}Document RAG System v{VERSION}")
+    print(f"Log file: {log_file}")
+    # Path for configuration file
+    CONFIG_FILE_PATH = "rag_config.json"
+    # Try to load configuration from file, or use defaults
+    if os.path.exists(CONFIG_FILE_PATH):
+        config = Config.from_file(CONFIG_FILE_PATH)
+    else:
+        config = Config(
+            local_dir="./chroma_db",  # Store Chroma files in dedicated directory
+            collection_name="markdown_docs"
+        )
+        # Save default configuration
+        config.save_to_file(CONFIG_FILE_PATH)
     try:
         # Initialize vector store manager with existing collection
         vector_store = VectorStoreManager(config)
         # Initialize RAG system without API keys initially
+        rag_system = RAGSystem(vector_store, config)
         # Create the Gradio interface
+        with gr.Blocks(title=config.system_name) as app:
+            gr.Markdown(f"# {config.system_name} v{VERSION}")
+            gr.Markdown("Retrieve and generate answers from your documents using AI")
             with gr.Row():
                 with gr.Column(scale=1):
                     # API Keys and model selection
+                    with gr.Box():
+                        gr.Markdown("### LLM Configuration")
+                        model_choice = gr.Radio(
+                            choices=["openai", "gemini"],
+                            value="openai",
+                            label="Choose LLM Provider",
+                            info=f"Select which model to use ({config.openai_model} or {config.gemini_model})"
+                        )
+                        api_key_input = gr.Textbox(
+                            label="API Key",
+                            placeholder="Enter your API key here...",
+                            type="password",
+                            info="Your API key is not stored between sessions"
+                        )
+                        save_key_button = gr.Button("Save API Key", variant="primary")
+                        api_status = gr.Markdown("")
                     # Search controls
+                    with gr.Box():
+                        gr.Markdown("### Search Settings")
+                        num_results = gr.Slider(
+                            minimum=1,
+                            maximum=20,
+                            value=15,
+                            step=1,
+                            label="Number of documents to retrieve",
+                            info="Higher values may provide more context but slower responses"
+                        )
+                        temperature_slider = gr.Slider(
+                            minimum=0.0,
+                            maximum=1.0,
+                            value=config.temperature,
+                            step=0.05,
+                            label="Temperature",
+                            info="Lower values = more factual, higher values = more creative"
+                        )
+                        max_tokens_slider = gr.Slider(
+                            minimum=100,
+                            maximum=4000,
+                            value=config.max_tokens,
+                            step=100,
+                            label="Max Output Tokens",
+                            info="Maximum length of generated response"
+                        )