# test_retrieval.py import os from dotenv import load_dotenv from langchain_huggingface import HuggingFaceEmbeddings from langchain_community.vectorstores import SupabaseVectorStore from supabase.client import Client, create_client def test_retrieval(): """ A simple script to test similarity search on your Supabase vector store. """ # Load environment variables from .env file load_dotenv() # --- 1. Connect to the Database --- print("Connecting to Supabase...") supabase_url = os.environ.get("SUPABASE_URL") supabase_key = os.environ.get("SUPABASE_SERVICE_KEY") if not supabase_url or not supabase_key: print("Error: SUPABASE_URL and SUPABASE_SERVICE_KEY must be set in your .env file.") return try: supabase: Client = create_client(supabase_url, supabase_key) print("Successfully connected to Supabase.") except Exception as e: print(f"Error connecting to Supabase: {e}") return # --- 2. Initialize Embeddings and Vector Store --- print("Initializing embeddings model...") embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") vector_store = SupabaseVectorStore( client=supabase, embedding=embeddings, table_name="documents", query_name="match_documents", ) print("Vector store initialized.") # --- 3. Start the Interactive Test Loop --- print("\nEnter a question to test the similarity search.") print("Type 'exit' or 'quit' to stop the script.\n") while True: try: # Get user input query = input("Question: ") if query.lower() in ['exit', 'quit']: print("Exiting...") break if not query: continue # --- 4. Perform the Similarity Search --- print("\nSearching for similar documents...") # We ask for the top 3 matches (k=3) to get more context similar_docs = vector_store.similarity_search_with_relevance_scores(query, k=3) # --- 5. Display the Results --- if not similar_docs: print("\n--- No similar documents found in the database. ---") print("This might mean your database is empty. Please run the data upload cell in test.ipynb.\n") else: print(f"\n--- Found {len(similar_docs)} similar document(s) ---") for i, (doc, score) in enumerate(similar_docs): print(f"\n--- Result {i+1} (Similarity Score: {score:.4f}) ---") print(doc.page_content) print("\n-------------------------------------\n") except Exception as e: print(f"An error occurred: {e}") if __name__ == "__main__": test_retrieval()