Spaces:

HMC-CIS
/

Trial-OpenAI

Sleeping

Trial-OpenAI / utils /document_retrieval.py

Update utils/document_retrieval.py

fe836b8 verified 18 days ago

1.01 kB

	import numpy as np
	from utils.openai_api import get_embedding

	def vector_similarity(x: list[float], y: list[float]) -> float:
	"""
	Returns the similarity between two vectors.

	Because OpenAI Embeddings are normalized to length 1, the cosine similarity is the same as the dot product.
	"""
	return np.dot(np.array(x), np.array(y))

	def select_document_section_by_query_similarity(query: str, contexts: dict[(str, str), np.array]) -> list[(float, (str, str))]:
	"""
	Find the query embedding for the supplied query, and compare it against all of the pre-calculated document embeddings
	to find the most relevant sections.

	Return the list of document sections, sorted by relevance in descending order.
	"""
	query_embedding = get_embedding(query)

	document_similarities = sorted([
	(vector_similarity(query_embedding, doc_embedding), doc_index) for doc_index, doc_embedding in contexts.items()
	], reverse=True)

	return document_similarities[0]