Spaces:
Runtime error
Runtime error
"""Pinecone Vector store index. | |
An index that that is built on top of an existing vector store. | |
""" | |
from typing import Any, Dict, List, Optional, cast | |
from gpt_index.data_structs.data_structs import Node | |
from gpt_index.vector_stores.types import ( | |
NodeEmbeddingResult, | |
VectorStore, | |
VectorStoreQueryResult, | |
) | |
class PineconeVectorStore(VectorStore): | |
"""Pinecone Vector Store. | |
In this vector store, embeddings and docs are stored within a | |
Pinecone index. | |
During query time, the index uses Pinecone to query for the top | |
k most similar nodes. | |
Args: | |
pinecone_index (Optional[pinecone.Index]): Pinecone index instance | |
pinecone_kwargs (Optional[Dict]): kwargs to pass to Pinecone index | |
""" | |
stores_text: bool = True | |
def __init__( | |
self, | |
pinecone_index: Optional[Any] = None, | |
pinecone_kwargs: Optional[Dict] = None, | |
) -> None: | |
"""Initialize params.""" | |
import_err_msg = ( | |
"`pinecone` package not found, please run `pip install pinecone-client`" | |
) | |
try: | |
import pinecone # noqa: F401 | |
except ImportError: | |
raise ImportError(import_err_msg) | |
self._pinecone_index = cast(pinecone.Index, pinecone_index) | |
self._pinecone_kwargs = pinecone_kwargs or {} | |
def config_dict(self) -> dict: | |
"""Return config dict.""" | |
return self._pinecone_kwargs | |
def add( | |
self, | |
embedding_results: List[NodeEmbeddingResult], | |
) -> List[str]: | |
"""Add embedding results to index. | |
Args | |
embedding_results: List[NodeEmbeddingResult]: list of embedding results | |
""" | |
ids = [] | |
for result in embedding_results: | |
new_id = result.id | |
node = result.node | |
text_embedding = result.embedding | |
metadata = { | |
"text": node.get_text(), | |
"doc_id": result.doc_id, | |
} | |
self._pinecone_index.upsert( | |
[(new_id, text_embedding, metadata)], **self._pinecone_kwargs | |
) | |
ids.append(new_id) | |
return ids | |
def delete(self, doc_id: str, **delete_kwargs: Any) -> None: | |
"""Delete a document. | |
Args: | |
doc_id (str): document id | |
""" | |
# delete by filtering on the doc_id metadata | |
self._pinecone_index.delete( | |
filter={"doc_id": {"$eq": doc_id}}, **self._pinecone_kwargs | |
) | |
def client(self) -> Any: | |
"""Return Pinecone client.""" | |
return self._pinecone_index | |
def query( | |
self, | |
query_embedding: List[float], | |
similarity_top_k: int, | |
doc_ids: Optional[List[str]] = None, | |
) -> VectorStoreQueryResult: | |
"""Query index for top k most similar nodes. | |
Args: | |
query_embedding (List[float]): query embedding | |
similarity_top_k (int): top k most similar nodes | |
""" | |
response = self._pinecone_index.query( | |
query_embedding, | |
top_k=similarity_top_k, | |
include_values=True, | |
include_metadata=True, | |
**self._pinecone_kwargs, | |
) | |
top_k_nodes = [] | |
top_k_ids = [] | |
top_k_scores = [] | |
for match in response.matches: | |
text = match.metadata["text"] | |
node = Node(text=text, extra_info=match.metadata) | |
top_k_ids.append(match.id) | |
top_k_nodes.append(node) | |
top_k_scores.append(match.score) | |
return VectorStoreQueryResult( | |
nodes=top_k_nodes, similarities=top_k_scores, ids=top_k_ids | |
) | |