aura-mind-glow / vector_store.py
surfiniaburger's picture
chromadb
92a9c38
import chromadb
from sentence_transformers import SentenceTransformer
import os
# --- Constants ---
MODEL_NAME = "all-MiniLM-L6-v2"
COLLECTION_NAME = "aura_mind_knowledge"
KNOWLEDGE_BASE_DIR = "knowledge_base_data"
# --- Initialize ChromaDB and Model ---
client = chromadb.PersistentClient(path="chroma_db")
model = SentenceTransformer(MODEL_NAME)
collection = client.get_or_create_collection(name=COLLECTION_NAME)
def embed_and_store_documents():
"""
Reads documents from the knowledge base directory, generates embeddings,
and stores them in ChromaDB.
"""
if collection.count() > 0:
print("βœ… Knowledge base is already loaded into ChromaDB.")
return
print("Embedding and storing documents in ChromaDB...")
documents = []
ids = []
for filename in os.listdir(KNOWLEDGE_BASE_DIR):
if filename.endswith(".txt"):
with open(os.path.join(KNOWLEDGE_BASE_DIR, filename), "r") as f:
documents.append(f.read())
ids.append(filename)
if documents:
embeddings = model.encode(documents).tolist()
collection.add(
embeddings=embeddings,
documents=documents,
ids=ids
)
print(f"βœ… Successfully stored {len(documents)} documents in ChromaDB.")
def search_documents(query: str, n_results: int = 1) -> list:
"""
Searches for relevant documents in ChromaDB based on a query.
Args:
query: The search query.
n_results: The number of results to return.
Returns:
A list of relevant documents.
"""
if not query:
return []
query_embedding = model.encode([query]).tolist()
results = collection.query(
query_embeddings=query_embedding,
n_results=n_results,
)
return results['documents'][0] if results['documents'] else []