import os
from sentence_transformers import SentenceTransformer

from database import init_db, check_if_indexed, delete_database_and_index, get_db_connection
from create_index import create_initial_index as build_secure_index
from search import search as secure_search
from ingest_document import ingest_pdf

# Use a CLIP model that can handle both text and images
MODEL_NAME = 'clip-ViT-B-32'

class KnowledgeBase:
    def __init__(self):
        self.model = SentenceTransformer(MODEL_NAME)
        # Ensure the database is initialized
        init_db()
        # Check if the index exists, if not, build it from initial data
        if not check_if_indexed():
            print("Local knowledge base not found. Building initial knowledge base...")
            self._build_initial_knowledge_base()

    def _build_initial_knowledge_base(self):
        current_dir = os.path.dirname(__file__)
        knowledge_base_data_dir = os.path.join(current_dir, "knowledge_base_data")
        
        document_filenames = [
            "healthy_maize_remedy.txt",
            "maize_phosphorus_deficiency_remedy.txt",
            "comic_relief.txt"
        ]
        
        documents_content = {}
        for filename in document_filenames:
            file_path = os.path.join(knowledge_base_data_dir, filename)
            try:
                with open(file_path, 'r', encoding='utf-8') as f:
                    documents_content[filename] = f.read()
            except FileNotFoundError:
                print(f"Warning: Knowledge base file not found, skipping: {file_path}")
        
        if documents_content:
            build_secure_index(documents_content)
        else:
            print("No initial knowledge base documents found to index.")

    def create_initial_index(self, documents_dict):
        # This method now directly calls the external build_secure_index
        build_secure_index(documents_dict)

    def rebuild_from_default_files(self):
        # This method orchestrates rebuilding the index from the default knowledge_base_data files
        self._build_initial_knowledge_base()

    def ingest_pdf(self, file_path, file_name):
        # This method now directly calls the external ingest_pdf
        ingest_pdf(file_path, file_name)

    def search(self, query, k=1):
        # This method now directly calls the external secure_search
        return secure_search(query, k)

def get_retriever():
    kb = KnowledgeBase()
    class Retriever:
        def __init__(self, kb):
            self.kb = kb
        def get_relevant_documents(self, query):
            results = self.kb.search(query)
            from langchain.schema import Document
            # Ensure that only text content is passed to Document
            # For image results, you might need a different handling or filter them out if Langchain Document doesn't support them directly.
            text_documents = [Document(page_content=r['content']) for r in results if r['type'] == 'text']
            return text_documents

    return Retriever(kb)