Spaces:

jzou19950715
/

RAG_Test

Sleeping

App Files Files Community

jzou19950715 commited on 23 days ago

Commit

ca322ba

verified ·

1 Parent(s): 78841ad

Update app.py

Browse files

Files changed (1) hide show

app.py +168 -428

app.py CHANGED Viewed

@@ -1,18 +1,26 @@
 import os
 import sys
 import logging
-from pathlib import Path
 import json
-from datetime import datetime
-from typing import List, Dict, Any, Optional, Tuple, Union
 import traceback
-# Configure detailed logging with file output
 LOG_DIR = "logs"
 os.makedirs(LOG_DIR, exist_ok=True)
 log_file = os.path.join(LOG_DIR, f"rag_system_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
-# Set up root logger with both file and console handlers
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
@@ -24,46 +32,11 @@ logging.basicConfig(
 logger = logging.getLogger("rag_system")
 logger.info(f"Starting RAG system. Log file: {log_file}")
-# Importing necessary libraries with error handling
-try:
-    import torch
-    import numpy as np
-    from sentence_transformers import SentenceTransformer
-    import chromadb
-    from chromadb.utils import embedding_functions
-    import gradio as gr
-    from openai import OpenAI
-    import google.generativeai as genai
-    logger.info("All required libraries successfully imported")
-except ImportError as e:
-    logger.critical(f"Failed to import required libraries: {e}")
-    print(f"ERROR: Missing required libraries. Please install with: pip install -r requirements.txt")
-    print(f"Specific error: {e}")
-    sys.exit(1)
-# Version info for tracking
-VERSION = "1.0.0"
-logger.info(f"RAG System Version: {VERSION}")
 class Config:
     """
     Configuration for vector store and RAG system.
-    This class centralizes all configuration parameters for the application,
-    making it easier to modify settings and ensure consistency.
-    Attributes:
-        local_dir (str): Directory for ChromaDB persistence
-        embedding_model (str): Name of the embedding model to use
-        collection_name (str): Name of the ChromaDB collection
-        default_top_k (int): Default number of results to return
-        openai_model (str): Default OpenAI model to use
-        gemini_model (str): Default Gemini model to use
-        temperature (float): Temperature setting for LLM generation
-        max_tokens (int): Maximum tokens for LLM response
-        system_name (str): Name of the system for UI
     """
     def __init__(self,
                  local_dir: str = "./chroma_db",
                  embedding_model: str = "all-MiniLM-L6-v2",
@@ -84,18 +57,14 @@ class Config:
         self.max_tokens = max_tokens
         self.system_name = system_name
-        # Create local directory if it doesn't exist
         os.makedirs(local_dir, exist_ok=True)
         logger.info(f"Initialized configuration: {self.__dict__}")
     def to_dict(self) -> Dict[str, Any]:
-        """Convert configuration to dictionary for serialization"""
         return self.__dict__
     @classmethod
     def from_file(cls, config_path: str) -> 'Config':
-        """Load configuration from JSON file"""
         try:
             with open(config_path, 'r') as f:
                 config_dict = json.load(f)
@@ -107,7 +76,6 @@ class Config:
             return cls()
     def save_to_file(self, config_path: str) -> bool:
-        """Save configuration to JSON file"""
         try:
             with open(config_path, 'w') as f:
                 json.dump(self.to_dict(), f, indent=2)
@@ -117,59 +85,33 @@ class Config:
             logger.error(f"Failed to save configuration to {config_path}: {e}")
             return False
 class EmbeddingEngine:
     """
-    Handle embeddings with a lightweight model.
-    This class manages the embedding model used to convert text to vector
-    representations for semantic search.
-    Attributes:
-        model (SentenceTransformer): The loaded embedding model
-        model_name (str): Name of the successfully loaded model
-        vector_size (int): Dimension of the embedding vectors
-        device (str): Device used for inference ('cuda' or 'cpu')
     """
     def __init__(self, model_name="all-MiniLM-L6-v2"):
-        """
-        Initialize the embedding engine with the specified model.
-        Args:
-            model_name (str): Name of the embedding model to load
-        Raises:
-            SystemExit: If no embedding model could be loaded
-        """
-        # Use GPU if available
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         logger.info(f"Using device for embeddings: {self.device}")
-        # Try multiple model options in order of preference
         model_options = [
             model_name,
-            "all-MiniLM-L6-v2",      # Good balance of speed and quality
-            "paraphrase-MiniLM-L3-v2", # Faster but less accurate
-            "all-mpnet-base-v2"      # Higher quality but larger model
         ]
         self.model = None
-        # Try each model in order until one works
         for model_option in model_options:
             try:
                 logger.info(f"Attempting to load embedding model: {model_option}")
                 self.model = SentenceTransformer(model_option)
-                # Move model to device
                 self.model.to(self.device)
                 logger.info(f"Successfully loaded embedding model: {model_option}")
                 self.model_name = model_option
                 self.vector_size = self.model.get_sentence_embedding_dimension()
                 logger.info(f"Embedding vector size: {self.vector_size}")
                 break
             except Exception as e:
                 logger.warning(f"Failed to load embedding model {model_option}: {str(e)}")
@@ -179,22 +121,8 @@ class EmbeddingEngine:
             raise SystemExit(error_msg)
     def embed(self, texts: List[str]) -> np.ndarray:
-        """
-        Generate embeddings for a list of texts.
-        Args:
-            texts (List[str]): List of texts to embed
-        Returns:
-            np.ndarray: Array of embeddings
-        Raises:
-            ValueError: If the input is invalid
-            RuntimeError: If embedding fails
-        """
         if not texts:
             raise ValueError("Cannot embed empty list of texts")
         try:
             embeddings = self.model.encode(texts, convert_to_numpy=True)
             return embeddings
@@ -202,33 +130,13 @@ class EmbeddingEngine:
             logger.error(f"Error generating embeddings: {e}")
             raise RuntimeError(f"Failed to generate embeddings: {e}")
 class VectorStoreManager:
     """
-    Manage Chroma vector store operations - upload, query, etc.
-    This class provides an interface to the ChromaDB vector database,
-    handling document storage, retrieval, and management.
-    Attributes:
-        config (Config): Configuration parameters
-        client (chromadb.PersistentClient): ChromaDB client
-        collection (chromadb.Collection): The active ChromaDB collection
-        embedding_engine (EmbeddingEngine): Engine for generating embeddings
     """
     def __init__(self, config: Config):
-        """
-        Initialize the vector store manager.
-        Args:
-            config (Config): Configuration parameters
-        Raises:
-            SystemExit: If the vector store cannot be initialized
-        """
         self.config = config
-        # Initialize Chroma client (local persistence)
         logger.info(f"Initializing Chroma at {config.local_dir}")
         try:
             self.client = chromadb.PersistentClient(path=config.local_dir)
@@ -238,19 +146,15 @@ class VectorStoreManager:
             logger.critical(error_msg)
             raise SystemExit(error_msg)
-        # Get or create collection
         try:
-            # Initialize embedding model
             logger.info("Loading embedding model...")
             self.embedding_engine = EmbeddingEngine(config.embedding_model)
             logger.info(f"Using embedding model: {self.embedding_engine.model_name}")
-            # Create embedding function
             sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(
                 model_name=self.embedding_engine.model_name
             )
-            # Try to get existing collection or create a new one
             try:
                 self.collection = self.client.get_collection(
                     name=config.collection_name,
@@ -259,18 +163,15 @@ class VectorStoreManager:
                 logger.info(f"Using existing collection: {config.collection_name}")
             except Exception as e:
                 logger.warning(f"Error getting collection: {e}")
-                # Attempt to get a list of available collections
                 collections = self.client.list_collections()
                 if collections:
                     logger.info(f"Available collections: {[c.name for c in collections]}")
-                    # Use the first available collection if any
                     self.collection = self.client.get_collection(
                         name=collections[0].name,
                         embedding_function=sentence_transformer_ef
                     )
                     logger.info(f"Using collection: {collections[0].name}")
                 else:
-                    # Create new collection if none exist
                     self.collection = self.client.create_collection(
                         name=config.collection_name,
                         embedding_function=sentence_transformer_ef,
@@ -284,76 +185,42 @@ class VectorStoreManager:
             raise SystemExit(error_msg)
     def query(self, query_text: str, n_results: int = 5) -> List[Dict]:
-        """
-        Query the vector store with a text query.
-        Args:
-            query_text (str): The query text
-            n_results (int): Number of results to return
-        Returns:
-            List[Dict]: List of results with document text, metadata, and similarity score
-        """
         if not query_text.strip():
             logger.warning("Empty query received")
             return []
         try:
             logger.info(f"Querying vector store with: '{query_text[:50]}...' (top {n_results})")
-            # Query the collection
             search_results = self.collection.query(
                 query_texts=[query_text],
                 n_results=n_results,
-                include=["documents", "metadatas", "distances", "embeddings"]
             )
-            # Format results
             results = []
             if search_results["documents"] and len(search_results["documents"][0]) > 0:
                 for i in range(len(search_results["documents"][0])):
                     results.append({
                         'document': search_results["documents"][0][i],
                         'metadata': search_results["metadatas"][0][i] if search_results["metadatas"] else {},
-                        'score': 1.0 - search_results["distances"][0][i],  # Convert distance to similarity
                         'distance': search_results["distances"][0][i]
                     })
                 logger.info(f"Found {len(results)} results for query")
             else:
                 logger.info("No results found for query")
             return results
         except Exception as e:
             logger.error(f"Error querying collection: {e}")
             logger.debug(traceback.format_exc())
             return []
-    def add_document(self,
-                    document: str,
-                    doc_id: str,
-                    metadata: Dict[str, Any]) -> bool:
-        """
-        Add a document to the vector store.
-        Args:
-            document (str): The document text
-            doc_id (str): Unique identifier for the document
-            metadata (Dict[str, Any]): Metadata about the document
-        Returns:
-            bool: True if successful, False otherwise
-        """
         try:
             logger.info(f"Adding document '{doc_id}' to vector store")
-            # Add the document to the collection
             self.collection.add(
                 documents=[document],
                 ids=[doc_id],
                 metadatas=[metadata]
             )
             logger.info(f"Successfully added document '{doc_id}'")
             return True
         except Exception as e:
@@ -361,15 +228,6 @@ class VectorStoreManager:
             return False
     def delete_document(self, doc_id: str) -> bool:
-        """
-        Delete a document from the vector store.
-        Args:
-            doc_id (str): ID of the document to delete
-        Returns:
-            bool: True if successful, False otherwise
-        """
         try:
             logger.info(f"Deleting document '{doc_id}' from vector store")
             self.collection.delete(ids=[doc_id])
@@ -380,31 +238,19 @@ class VectorStoreManager:
             return False
     def get_statistics(self) -> Dict[str, Any]:
-        """
-        Get statistics about the vector store.
-        Returns:
-            Dict[str, Any]: Statistics about the vector store
-        """
         stats = {
             'collection_name': self.config.collection_name,
             'embedding_model': self.embedding_engine.model_name,
             'embedding_dimensions': self.embedding_engine.vector_size,
             'device': self.embedding_engine.device
         }
         try:
-            # Get collection count
             collection_count = self.collection.count()
             stats['total_documents'] = collection_count
-            # Get unique metadata values
             if collection_count > 0:
                 try:
-                    # Get a sample of document metadata
                     sample_results = self.collection.get(limit=min(collection_count, 100))
                     if sample_results and 'metadatas' in sample_results and sample_results['metadatas']:
-                        # Count unique files if filename exists in metadata
                         filenames = set()
                         for metadata in sample_results['metadatas']:
                             if 'filename' in metadata:
@@ -412,57 +258,28 @@ class VectorStoreManager:
                         stats['unique_files'] = len(filenames)
                 except Exception as e:
                     logger.warning(f"Error getting metadata statistics: {e}")
             logger.info(f"Vector store statistics: {stats}")
         except Exception as e:
             logger.error(f"Error getting statistics: {e}")
             stats['error'] = str(e)
         return stats
 class RAGSystem:
     """
-    Retrieval-Augmented Generation with multiple LLM providers.
-    This class handles the RAG workflow: retrieval of relevant documents,
-    formatting context, and generating responses with different LLM providers.
-    Attributes:
-        vector_store (VectorStoreManager): Manager for vector store operations
-        openai_client (Optional[OpenAI]): OpenAI client
-        gemini_configured (bool): Whether Gemini API is configured
-        config (Config): Configuration parameters
     """
     def __init__(self, vector_store: VectorStoreManager, config: Config):
-        """
-        Initialize the RAG system.
-        Args:
-            vector_store (VectorStoreManager): Vector store manager
-            config (Config): Configuration parameters
-        """
         self.vector_store = vector_store
         self.config = config
         self.openai_client = None
         self.gemini_configured = False
         logger.info("Initialized RAG system")
     def setup_openai(self, api_key: str) -> bool:
-        """
-        Set up OpenAI client with API key.
-        Args:
-            api_key (str): OpenAI API key
-        Returns:
-            bool: True if successful, False otherwise
-        """
         if not api_key.strip():
             logger.warning("Empty OpenAI API key provided")
             return False
         try:
             logger.info("Setting up OpenAI client")
             self.openai_client = OpenAI(api_key=api_key)
@@ -483,27 +300,14 @@ class RAGSystem:
             return False
     def setup_gemini(self, api_key: str) -> bool:
-        """
-        Set up Gemini with API key.
-        Args:
-            api_key (str): Google AI API key
-        Returns:
-            bool: True if successful, False otherwise
-        """
         if not api_key.strip():
             logger.warning("Empty Gemini API key provided")
             return False
         try:
             logger.info("Setting up Gemini client")
             genai.configure(api_key=api_key)
-            # Test the API key with a simple request
             model = genai.GenerativeModel(self.config.gemini_model)
             response = model.generate_content("Test connection")
             self.gemini_configured = True
             logger.info("Gemini client configured successfully")
             return True
@@ -511,83 +315,44 @@ class RAGSystem:
             logger.error(f"Error configuring Gemini: {e}")
             self.gemini_configured = False
             return False
     def format_context(self, documents: List[Dict]) -> str:
-        """
-        Format retrieved documents into context for the LLM.
-        Args:
-            documents (List[Dict]): List of retrieved documents
-        Returns:
-            str: Formatted context for the LLM
-        """
         if not documents:
             logger.warning("No documents provided for context formatting")
             return "No relevant documents found."
         logger.info(f"Formatting {len(documents)} documents for context")
         context_parts = []
         for i, doc in enumerate(documents):
             metadata = doc['metadata']
-            # Extract document metadata in a robust way
             title = metadata.get('title', metadata.get('filename', 'Unknown document'))
             source = metadata.get('source', metadata.get('path', 'Unknown source'))
             date = metadata.get('date', metadata.get('created_at', 'Unknown date'))
-            # Format header with metadata
             header = f"Document {i+1} - {title}"
             if source != 'Unknown source':
                 header += f" (Source: {source})"
             if date != 'Unknown date':
                 header += f" (Date: {date})"
-            # For readability, limit length of context document
             doc_text = doc['document']
-            if len(doc_text) > 8000:  # Limit long documents in context
                 doc_text = doc_text[:8000] + "... [Document truncated for context]"
             context_parts.append(f"{header}:\n{doc_text}\n")
         full_context = "\n".join(context_parts)
         logger.info(f"Created context with {len(full_context)} characters")
         return full_context
     def generate_response_openai(self, query: str, context: str) -> str:
-        """
-        Generate a response using OpenAI model with context.
-        Args:
-            query (str): User query
-            context (str): Formatted document context
-        Returns:
-            str: Generated response
-        """
         if not self.openai_client:
             logger.warning("OpenAI API key not configured for response generation")
-            return "Error: OpenAI API key not configured. Please enter an API key in the API key field."
-        system_prompt = """
-        You are a helpful, detailed, and accurate assistant that answers questions based on the context provided.
-        Follow these guidelines:
-        1. Use ONLY the information from the context to answer the user's question.
-        2. If the context doesn't contain the information needed, say so clearly and do your best to deduce and infer the answer.
-        3. Always cite the specific documents from the context that you used in your answer by referencing their number (e.g., "According to Document 1...").
-        4. Organize your response in a clear, structured format with headings where appropriate.
-        5. Use the best practices of writings.
-        6. If the information in different documents conflicts, acknowledge this and explain the different perspectives.
-        7. Be specific and detailed in your answers, focusing on accuracy over brevity.
-        8. Aim to be educational and informative in your tone.
-        9. You aim to write between 300-500 words of comprehensive answer to user question.
-        """
         try:
-            logger.info(f"Generating response with OpenAI ({self.config.openai_model})")
             start_time = datetime.now()
             response = self.openai_client.chat.completions.create(
                 model=self.config.openai_model,
@@ -598,10 +363,8 @@ class RAGSystem:
                 temperature=self.config.temperature,
                 max_tokens=self.config.max_tokens,
             )
             generation_time = (datetime.now() - start_time).total_seconds()
             response_text = response.choices[0].message.content
             logger.info(f"Generated response with OpenAI in {generation_time:.2f} seconds")
             return response_text
         except Exception as e:
@@ -610,63 +373,30 @@ class RAGSystem:
             return f"Error: {error_msg}"
     def generate_response_gemini(self, query: str, context: str) -> str:
-        """
-        Generate a response using Gemini with context.
-        Args:
-            query (str): User query
-            context (str): Formatted document context
-        Returns:
-            str: Generated response
-        """
         if not self.gemini_configured:
             logger.warning("Gemini API key not configured for response generation")
-            return "Error: Google AI API key not configured. Please enter an API key in the API key field."
-        prompt = f"""
-        You are a highly supportive and insightful assistant dedicated to providing clear, helpful, and well-structured answers based on the given context. Your goal is to ensure the user receives a thorough, encouraging, and informative response that directly addresses their question.
-        **Guidelines for Your Response:**
-        - Use ONLY the information from the **context** to form a detailed and well-reasoned answer.
-        - If the context lacks sufficient information, state it clearly while offering general insights or related knowledge.
-        - Cite specific sections from the context by referring to document numbers (e.g., "According to Document 1...").
-        - Maintain a **friendly, professional, and supportive** tone that encourages user engagement.
-        - Aim for **clarity and depth**, breaking down complex ideas into easy-to-understand explanations.
-        - Organize your response with headings and sections if appropriate.
-        - Do not make up information or use knowledge outside of the provided context.
-        - If information in different documents conflicts, explain the different perspectives.
-        **Context:**
-        {context}
-        **User's Question:**
-        {query}
-        **Your Response:**
-        """
         try:
-            logger.info(f"Generating response with Gemini ({self.config.gemini_model})")
             start_time = datetime.now()
             model = genai.GenerativeModel(self.config.gemini_model)
             generation_config = {
                 "temperature": self.config.temperature,
                 "max_output_tokens": self.config.max_tokens,
                 "top_p": 0.9,
                 "top_k": 40
             }
-            response = model.generate_content(
-                prompt,
-                generation_config=generation_config
-            )
             generation_time = (datetime.now() - start_time).total_seconds()
             response_text = response.text
             logger.info(f"Generated response with Gemini in {generation_time:.2f} seconds")
             return response_text
         except Exception as e:
@@ -674,50 +404,29 @@ class RAGSystem:
             logger.error(error_msg)
             return f"Error: {error_msg}"
-    def query_and_generate(self,
-                          query: str,
-                          n_results: int = 5,
-                          model: str = "openai") -> Tuple[str, str]:
-        """
-        Retrieve relevant documents and generate a response using the specified model.
-        Args:
-            query (str): User query
-            n_results (int): Number of documents to retrieve
-            model (str): Model provider to use ('openai' or 'gemini')
-        Returns:
-            Tuple[str, str]: (Generated response, Search results)
-        """
         if not query.strip():
             logger.warning("Empty query received")
             return "Please enter a question to get a response.", "No search performed."
-        logger.info(f"Processing query: '{query[:50]}...' with {model} model")
-        # Query vector store
         documents = self.vector_store.query(query, n_results=n_results)
-        # Format search results
         formatted_results = []
         for i, res in enumerate(documents):
             metadata = res['metadata']
             title = metadata.get('title', metadata.get('filename', 'Unknown'))
-            preview = res['document'][:500] + '...' if len(res['document']) > 500 else res['document']
-            formatted_results.append(f"**Result {i+1}** (Similarity: {res['score']:.2f})\n"
-                                   f"**Source:** {title}\n"
-                                   f"**Preview:**\n{preview}\n\n---\n")
         search_output_text = "\n".join(formatted_results) if formatted_results else "No results found."
         if not documents:
             logger.warning("No relevant documents found")
             return "No relevant documents found to answer your question.", search_output_text
-        # Format context
         context = self.format_context(documents)
-        # Generate response with the appropriate model
         if model == "openai":
             response = self.generate_response_openai(query, context)
         elif model == "gemini":
@@ -729,114 +438,145 @@ class RAGSystem:
         return response, search_output_text
 def get_db_stats(vector_store: VectorStoreManager) -> str:
-    """
-    Function to get vector store statistics.
-    Args:
-        vector_store (VectorStoreManager): Vector store manager
-    Returns:
-        str: Formatted statistics string
-    """
     try:
         stats = vector_store.get_statistics()
         total_docs = stats.get('total_documents', 0)
         unique_files = stats.get('unique_files', 'Unknown')
         model = stats.get('embedding_model', 'Unknown')
         device = stats.get('device', 'Unknown')
-        stats_text = [
-            f"Total documents: {total_docs}",
-            f"Unique files: {unique_files}",
-            f"Embedding model: {model}",
             f"Device: {device}"
-        ]
-        return "\n".join(stats_text)
     except Exception as e:
         logger.error(f"Error getting statistics: {e}")
         return "Error getting database statistics"
 def main():
-    """Main function to run the RAG application"""
-    print(f"Starting {CONFIG_FILE_PATH}Document RAG System v{VERSION}")
-    print(f"Log file: {log_file}")
-    # Path for configuration file
     CONFIG_FILE_PATH = "rag_config.json"
-    # Try to load configuration from file, or use defaults
     if os.path.exists(CONFIG_FILE_PATH):
         config = Config.from_file(CONFIG_FILE_PATH)
     else:
-        config = Config(
-            local_dir="./chroma_db",  # Store Chroma files in dedicated directory
-            collection_name="markdown_docs"
-        )
-        # Save default configuration
         config.save_to_file(CONFIG_FILE_PATH)
     try:
-        # Initialize vector store manager with existing collection
         vector_store = VectorStoreManager(config)
-        # Initialize RAG system without API keys initially
         rag_system = RAGSystem(vector_store, config)
-        # Create the Gradio interface
-        with gr.Blocks(title=config.system_name) as app:
-            gr.Markdown(f"# {config.system_name} v{VERSION}")
-            gr.Markdown("Retrieve and generate answers from your documents using AI")
-            with gr.Row():
-                with gr.Column(scale=1):
-                    # API Keys and model selection
-                    with gr.Box():
-                        gr.Markdown("### LLM Configuration")
-                        model_choice = gr.Radio(
-                            choices=["openai", "gemini"],
-                            value="openai",
-                            label="Choose LLM Provider",
-                            info=f"Select which model to use ({config.openai_model} or {config.gemini_model})"
-                        )
-                        api_key_input = gr.Textbox(
-                            label="API Key",
-                            placeholder="Enter your API key here...",
-                            type="password",
-                            info="Your API key is not stored between sessions"
-                        )
-                        save_key_button = gr.Button("Save API Key", variant="primary")
-                        api_status = gr.Markdown("")
-                    # Search controls
-                    with gr.Box():
-                        gr.Markdown("### Search Settings")
-                        num_results = gr.Slider(
-                            minimum=1,
-                            maximum=20,
-                            value=15,
-                            step=1,
-                            label="Number of documents to retrieve",
-                            info="Higher values may provide more context but slower responses"
-                        )
-                        temperature_slider = gr.Slider(
-                            minimum=0.0,
-                            maximum=1.0,
-                            value=config.temperature,
-                            step=0.05,
-                            label="Temperature",
-                            info="Lower values = more factual, higher values = more creative"
-                        )
-                        max_tokens_slider = gr.Slider(
-                            minimum=100,
-                            maximum=4000,
-                            value=config.max_tokens,
-                            step=100,
-                            label="Max Output Tokens",
-                            info="Maximum length of generated response"
-                        )

 import os
 import sys
 import logging
 import json
 import traceback
+from datetime import datetime
+from typing import List, Dict, Any, Optional, Tuple
+# Third-party libraries
+import torch
+import numpy as np
+from sentence_transformers import SentenceTransformer
+import chromadb
+from chromadb.utils import embedding_functions
+import gradio as gr
+from openai import OpenAI
+import google.generativeai as genai
+# ----------------- Logging Configuration -----------------
 LOG_DIR = "logs"
 os.makedirs(LOG_DIR, exist_ok=True)
 log_file = os.path.join(LOG_DIR, f"rag_system_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 logger = logging.getLogger("rag_system")
 logger.info(f"Starting RAG system. Log file: {log_file}")
+# ----------------- Configuration Class -----------------
 class Config:
     """
     Configuration for vector store and RAG system.
     """
     def __init__(self,
                  local_dir: str = "./chroma_db",
                  embedding_model: str = "all-MiniLM-L6-v2",
         self.max_tokens = max_tokens
         self.system_name = system_name
         os.makedirs(local_dir, exist_ok=True)
         logger.info(f"Initialized configuration: {self.__dict__}")
     def to_dict(self) -> Dict[str, Any]:
         return self.__dict__
     @classmethod
     def from_file(cls, config_path: str) -> 'Config':
         try:
             with open(config_path, 'r') as f:
                 config_dict = json.load(f)
             return cls()
     def save_to_file(self, config_path: str) -> bool:
         try:
             with open(config_path, 'w') as f:
                 json.dump(self.to_dict(), f, indent=2)
             logger.error(f"Failed to save configuration to {config_path}: {e}")
             return False
+# ----------------- Embedding Engine -----------------
 class EmbeddingEngine:
     """
+    Handles text embeddings using a lightweight model.
     """
     def __init__(self, model_name="all-MiniLM-L6-v2"):
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         logger.info(f"Using device for embeddings: {self.device}")
         model_options = [
             model_name,
+            "all-MiniLM-L6-v2",
+            "paraphrase-MiniLM-L3-v2",
+            "all-mpnet-base-v2"
         ]
         self.model = None
         for model_option in model_options:
             try:
                 logger.info(f"Attempting to load embedding model: {model_option}")
                 self.model = SentenceTransformer(model_option)
                 self.model.to(self.device)
                 logger.info(f"Successfully loaded embedding model: {model_option}")
                 self.model_name = model_option
                 self.vector_size = self.model.get_sentence_embedding_dimension()
                 logger.info(f"Embedding vector size: {self.vector_size}")
                 break
             except Exception as e:
                 logger.warning(f"Failed to load embedding model {model_option}: {str(e)}")
             raise SystemExit(error_msg)
     def embed(self, texts: List[str]) -> np.ndarray:
         if not texts:
             raise ValueError("Cannot embed empty list of texts")
         try:
             embeddings = self.model.encode(texts, convert_to_numpy=True)
             return embeddings
             logger.error(f"Error generating embeddings: {e}")
             raise RuntimeError(f"Failed to generate embeddings: {e}")
+# ----------------- Vector Store Manager -----------------
 class VectorStoreManager:
     """
+    Manages Chroma vector store operations.
     """
     def __init__(self, config: Config):
         self.config = config
         logger.info(f"Initializing Chroma at {config.local_dir}")
         try:
             self.client = chromadb.PersistentClient(path=config.local_dir)
             logger.critical(error_msg)
             raise SystemExit(error_msg)
         try:
             logger.info("Loading embedding model...")
             self.embedding_engine = EmbeddingEngine(config.embedding_model)
             logger.info(f"Using embedding model: {self.embedding_engine.model_name}")
             sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(
                 model_name=self.embedding_engine.model_name
             )
             try:
                 self.collection = self.client.get_collection(
                     name=config.collection_name,
                 logger.info(f"Using existing collection: {config.collection_name}")
             except Exception as e:
                 logger.warning(f"Error getting collection: {e}")
                 collections = self.client.list_collections()
                 if collections:
                     logger.info(f"Available collections: {[c.name for c in collections]}")
                     self.collection = self.client.get_collection(
                         name=collections[0].name,
                         embedding_function=sentence_transformer_ef
                     )
                     logger.info(f"Using collection: {collections[0].name}")
                 else:
                     self.collection = self.client.create_collection(
                         name=config.collection_name,
                         embedding_function=sentence_transformer_ef,
             raise SystemExit(error_msg)
     def query(self, query_text: str, n_results: int = 5) -> List[Dict]:
         if not query_text.strip():
             logger.warning("Empty query received")
             return []
         try:
             logger.info(f"Querying vector store with: '{query_text[:50]}...' (top {n_results})")
             search_results = self.collection.query(
                 query_texts=[query_text],
                 n_results=n_results,
+                include=["documents", "metadatas", "distances"]
             )
             results = []
             if search_results["documents"] and len(search_results["documents"][0]) > 0:
                 for i in range(len(search_results["documents"][0])):
                     results.append({
                         'document': search_results["documents"][0][i],
                         'metadata': search_results["metadatas"][0][i] if search_results["metadatas"] else {},
+                        'score': 1.0 - search_results["distances"][0][i],  # convert distance to similarity
                         'distance': search_results["distances"][0][i]
                     })
                 logger.info(f"Found {len(results)} results for query")
             else:
                 logger.info("No results found for query")
             return results
         except Exception as e:
             logger.error(f"Error querying collection: {e}")
             logger.debug(traceback.format_exc())
             return []
+    def add_document(self, document: str, doc_id: str, metadata: Dict[str, Any]) -> bool:
         try:
             logger.info(f"Adding document '{doc_id}' to vector store")
             self.collection.add(
                 documents=[document],
                 ids=[doc_id],
                 metadatas=[metadata]
             )
             logger.info(f"Successfully added document '{doc_id}'")
             return True
         except Exception as e:
             return False
     def delete_document(self, doc_id: str) -> bool:
         try:
             logger.info(f"Deleting document '{doc_id}' from vector store")
             self.collection.delete(ids=[doc_id])
             return False
     def get_statistics(self) -> Dict[str, Any]:
         stats = {
             'collection_name': self.config.collection_name,
             'embedding_model': self.embedding_engine.model_name,
             'embedding_dimensions': self.embedding_engine.vector_size,
             'device': self.embedding_engine.device
         }
         try:
             collection_count = self.collection.count()
             stats['total_documents'] = collection_count
             if collection_count > 0:
                 try:
                     sample_results = self.collection.get(limit=min(collection_count, 100))
                     if sample_results and 'metadatas' in sample_results and sample_results['metadatas']:
                         filenames = set()
                         for metadata in sample_results['metadatas']:
                             if 'filename' in metadata:
                         stats['unique_files'] = len(filenames)
                 except Exception as e:
                     logger.warning(f"Error getting metadata statistics: {e}")
             logger.info(f"Vector store statistics: {stats}")
         except Exception as e:
             logger.error(f"Error getting statistics: {e}")
             stats['error'] = str(e)
         return stats
+# ----------------- RAG System -----------------
 class RAGSystem:
     """
+    Handles the Retrieval-Augmented Generation workflow.
     """
     def __init__(self, vector_store: VectorStoreManager, config: Config):
         self.vector_store = vector_store
         self.config = config
         self.openai_client = None
         self.gemini_configured = False
         logger.info("Initialized RAG system")
     def setup_openai(self, api_key: str) -> bool:
         if not api_key.strip():
             logger.warning("Empty OpenAI API key provided")
             return False
         try:
             logger.info("Setting up OpenAI client")
             self.openai_client = OpenAI(api_key=api_key)
             return False
     def setup_gemini(self, api_key: str) -> bool:
         if not api_key.strip():
             logger.warning("Empty Gemini API key provided")
             return False
         try:
             logger.info("Setting up Gemini client")
             genai.configure(api_key=api_key)
             model = genai.GenerativeModel(self.config.gemini_model)
             response = model.generate_content("Test connection")
             self.gemini_configured = True
             logger.info("Gemini client configured successfully")
             return True
             logger.error(f"Error configuring Gemini: {e}")
             self.gemini_configured = False
             return False
     def format_context(self, documents: List[Dict]) -> str:
         if not documents:
             logger.warning("No documents provided for context formatting")
             return "No relevant documents found."
         logger.info(f"Formatting {len(documents)} documents for context")
         context_parts = []
         for i, doc in enumerate(documents):
             metadata = doc['metadata']
             title = metadata.get('title', metadata.get('filename', 'Unknown document'))
             source = metadata.get('source', metadata.get('path', 'Unknown source'))
             date = metadata.get('date', metadata.get('created_at', 'Unknown date'))
             header = f"Document {i+1} - {title}"
             if source != 'Unknown source':
                 header += f" (Source: {source})"
             if date != 'Unknown date':
                 header += f" (Date: {date})"
             doc_text = doc['document']
+            if len(doc_text) > 8000:
                 doc_text = doc_text[:8000] + "... [Document truncated for context]"
             context_parts.append(f"{header}:\n{doc_text}\n")
         full_context = "\n".join(context_parts)
         logger.info(f"Created context with {len(full_context)} characters")
         return full_context
     def generate_response_openai(self, query: str, context: str) -> str:
         if not self.openai_client:
             logger.warning("OpenAI API key not configured for response generation")
+            return "Error: OpenAI API key not configured. Please enter an API key."
+        system_prompt = (
+            "You are a knowledgeable assistant that answers questions based solely on the provided context. "
+            "Use clear headings and cite the document numbers where the information is found. "
+            "If the context lacks the needed details, say so and suggest what additional details might help."
+        )
         try:
+            logger.info(f"Generating response with OpenAI using model {self.config.openai_model}")
             start_time = datetime.now()
             response = self.openai_client.chat.completions.create(
                 model=self.config.openai_model,
                 temperature=self.config.temperature,
                 max_tokens=self.config.max_tokens,
             )
             generation_time = (datetime.now() - start_time).total_seconds()
             response_text = response.choices[0].message.content
             logger.info(f"Generated response with OpenAI in {generation_time:.2f} seconds")
             return response_text
         except Exception as e:
             return f"Error: {error_msg}"
     def generate_response_gemini(self, query: str, context: str) -> str:
         if not self.gemini_configured:
             logger.warning("Gemini API key not configured for response generation")
+            return "Error: Gemini API key not configured. Please enter an API key."
+        prompt = (
+            "You are an insightful assistant who provides detailed, well-organized answers based solely on the provided context. "
+            "Answer the question below by clearly citing document numbers where applicable. "
+            "If there is insufficient context, indicate what further details would be needed.\n\n"
+            f"Context:\n{context}\n\nQuestion: {query}\n\nAnswer:"
+        )
         try:
+            logger.info(f"Generating response with Gemini using model {self.config.gemini_model}")
             start_time = datetime.now()
             model = genai.GenerativeModel(self.config.gemini_model)
             generation_config = {
                 "temperature": self.config.temperature,
                 "max_output_tokens": self.config.max_tokens,
                 "top_p": 0.9,
                 "top_k": 40
             }
+            response = model.generate_content(prompt, generation_config=generation_config)
             generation_time = (datetime.now() - start_time).total_seconds()
             response_text = response.text
             logger.info(f"Generated response with Gemini in {generation_time:.2f} seconds")
             return response_text
         except Exception as e:
             logger.error(error_msg)
             return f"Error: {error_msg}"
+    def query_and_generate(self, query: str, n_results: int = 5, model: str = "openai") -> Tuple[str, str]:
         if not query.strip():
             logger.warning("Empty query received")
             return "Please enter a question to get a response.", "No search performed."
+        logger.info(f"Processing query: '{query[:50]}...' using {model} model")
         documents = self.vector_store.query(query, n_results=n_results)
+        # Format retrieval details (hidden by default in the UI)
         formatted_results = []
         for i, res in enumerate(documents):
             metadata = res['metadata']
             title = metadata.get('title', metadata.get('filename', 'Unknown'))
+            preview = res['document'][:300] + '...' if len(res['document']) > 300 else res['document']
+            formatted_results.append(f"**Document {i+1}**\nSource: {title}\nPreview:\n{preview}\n")
         search_output_text = "\n".join(formatted_results) if formatted_results else "No results found."
         if not documents:
             logger.warning("No relevant documents found")
             return "No relevant documents found to answer your question.", search_output_text
         context = self.format_context(documents)
         if model == "openai":
             response = self.generate_response_openai(query, context)
         elif model == "gemini":
         return response, search_output_text
+# ----------------- Utility Function -----------------
 def get_db_stats(vector_store: VectorStoreManager) -> str:
     try:
         stats = vector_store.get_statistics()
         total_docs = stats.get('total_documents', 0)
         unique_files = stats.get('unique_files', 'Unknown')
         model = stats.get('embedding_model', 'Unknown')
         device = stats.get('device', 'Unknown')
+        stats_text = (
+            f"Total documents: {total_docs}\n"
+            f"Unique files: {unique_files}\n"
+            f"Embedding model: {model}\n"
             f"Device: {device}"
+        )
+        return stats_text
     except Exception as e:
         logger.error(f"Error getting statistics: {e}")
         return "Error getting database statistics"
+# ----------------- Main Application -----------------
 def main():
+    # Define configuration file path before usage
     CONFIG_FILE_PATH = "rag_config.json"
+    print(f"Starting Document RAG System v1.0.0")
+    print(f"Log file: {log_file}")
+    # Load configuration from file or use defaults
     if os.path.exists(CONFIG_FILE_PATH):
         config = Config.from_file(CONFIG_FILE_PATH)
     else:
+        config = Config(local_dir="./chroma_db", collection_name="markdown_docs")
         config.save_to_file(CONFIG_FILE_PATH)
     try:
         vector_store = VectorStoreManager(config)
         rag_system = RAGSystem(vector_store, config)
+    except Exception as e:
+        print(f"Error initializing system: {e}")
+        sys.exit(1)
+    # ----------------- Gradio Callback Functions -----------------
+    def save_api_key(model_choice: str, api_key: str):
+        if model_choice == "openai":
+            success = rag_system.setup_openai(api_key)
+            return "OpenAI API key saved and configured successfully." if success else "Error configuring OpenAI API key."
+        elif model_choice == "gemini":
+            success = rag_system.setup_gemini(api_key)
+            return "Gemini API key saved and configured successfully." if success else "Error configuring Gemini API key."
+        else:
+            return "Unknown model choice."
+    def process_query(query: str, model_choice: str, n_results: int, temperature: float, max_tokens: int):
+        # Update configuration parameters based on slider values
+        config.temperature = temperature
+        config.max_tokens = max_tokens
+        response_text, search_details = rag_system.query_and_generate(query, n_results=n_results, model=model_choice)
+        return response_text, search_details
+    # ----------------- Gradio Interface -----------------
+    with gr.Blocks(title=config.system_name) as app:
+        gr.Markdown(f"# {config.system_name} v1.0.0")
+        gr.Markdown("Retrieve answers from your documents with AI-powered retrieval and generation.")
+        with gr.Row():
+            with gr.Column(scale=1):
+                with gr.Box():
+                    gr.Markdown("### LLM Configuration")
+                    model_choice = gr.Radio(
+                        choices=["openai", "gemini"],
+                        value="openai",
+                        label="Select LLM Provider",
+                        info="Choose between OpenAI and Gemini models."
+                    )
+                    api_key_input = gr.Textbox(
+                        label="API Key",
+                        placeholder="Enter your API key here...",
+                        type="password",
+                        info="Your API key is not stored between sessions."
+                    )
+                    save_key_button = gr.Button("Save API Key", variant="primary")
+                    api_status = gr.Markdown("")
+                with gr.Box():
+                    gr.Markdown("### Search Settings")
+                    n_results_slider = gr.Slider(
+                        minimum=1,
+                        maximum=20,
+                        value=config.default_top_k,
+                        step=1,
+                        label="Documents to Retrieve",
+                        info="Number of documents for context."
+                    )
+                    temperature_slider = gr.Slider(
+                        minimum=0.0,
+                        maximum=1.0,
+                        value=config.temperature,
+                        step=0.05,
+                        label="Response Temperature",
+                        info="Lower values yield more factual responses."
+                    )
+                    max_tokens_slider = gr.Slider(
+                        minimum=100,
+                        maximum=4000,
+                        value=config.max_tokens,
+                        step=100,
+                        label="Max Output Tokens",
+                        info="Maximum tokens in generated response."
+                    )
+            with gr.Column(scale=2):
+                with gr.Box():
+                    gr.Markdown("### Ask a Question")
+                    query_input = gr.Textbox(
+                        label="Your Question",
+                        placeholder="Enter your question here..."
+                    )
+                    submit_button = gr.Button("Submit")
+                with gr.Box():
+                    answer_output = gr.Markdown(label="Answer")
+                with gr.Accordion("View Document Retrieval Details (hidden)", open=False):
+                    retrieval_output = gr.Markdown(label="Retrieval Details")
+        # Set up callbacks
+        save_key_button.click(
+            save_api_key,
+            inputs=[model_choice, api_key_input],
+            outputs=api_status
+        )
+        submit_button.click(
+            process_query,
+            inputs=[query_input, model_choice, n_results_slider, temperature_slider, max_tokens_slider],
+            outputs=[answer_output, retrieval_output]
+        )
+        with gr.Accordion("View Database Statistics", open=False):
+            db_stats = gr.Markdown(get_db_stats(vector_store))
+    app.launch()
+if __name__ == "__main__":
+    main()