import numpy as np from utils.openai_api import get_embedding def vector_similarity(x: list[float], y: list[float]) -> float: """ Returns the similarity between two vectors. Because OpenAI Embeddings are normalized to length 1, the cosine similarity is the same as the dot product. """ return np.dot(np.array(x), np.array(y)) def select_document_section_by_query_similarity(query: str, contexts: dict[(str, str), np.array]) -> list[(float, (str, str))]: """ Find the query embedding for the supplied query, and compare it against all of the pre-calculated document embeddings to find the most relevant sections. Return the list of document sections, sorted by relevance in descending order. """ query_embedding = get_embedding(query) document_similarities = sorted([ (vector_similarity(query_embedding, doc_embedding), doc_index) for doc_index, doc_embedding in contexts.items() ], reverse=True) return document_similarities[0]