Spaces:
Runtime error
Runtime error
File size: 3,693 Bytes
35b22df |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
"""Pinecone Vector store index.
An index that that is built on top of an existing vector store.
"""
from typing import Any, Dict, List, Optional, cast
from gpt_index.data_structs.data_structs import Node
from gpt_index.vector_stores.types import (
NodeEmbeddingResult,
VectorStore,
VectorStoreQueryResult,
)
class PineconeVectorStore(VectorStore):
"""Pinecone Vector Store.
In this vector store, embeddings and docs are stored within a
Pinecone index.
During query time, the index uses Pinecone to query for the top
k most similar nodes.
Args:
pinecone_index (Optional[pinecone.Index]): Pinecone index instance
pinecone_kwargs (Optional[Dict]): kwargs to pass to Pinecone index
"""
stores_text: bool = True
def __init__(
self,
pinecone_index: Optional[Any] = None,
pinecone_kwargs: Optional[Dict] = None,
) -> None:
"""Initialize params."""
import_err_msg = (
"`pinecone` package not found, please run `pip install pinecone-client`"
)
try:
import pinecone # noqa: F401
except ImportError:
raise ImportError(import_err_msg)
self._pinecone_index = cast(pinecone.Index, pinecone_index)
self._pinecone_kwargs = pinecone_kwargs or {}
@property
def config_dict(self) -> dict:
"""Return config dict."""
return self._pinecone_kwargs
def add(
self,
embedding_results: List[NodeEmbeddingResult],
) -> List[str]:
"""Add embedding results to index.
Args
embedding_results: List[NodeEmbeddingResult]: list of embedding results
"""
ids = []
for result in embedding_results:
new_id = result.id
node = result.node
text_embedding = result.embedding
metadata = {
"text": node.get_text(),
"doc_id": result.doc_id,
}
self._pinecone_index.upsert(
[(new_id, text_embedding, metadata)], **self._pinecone_kwargs
)
ids.append(new_id)
return ids
def delete(self, doc_id: str, **delete_kwargs: Any) -> None:
"""Delete a document.
Args:
doc_id (str): document id
"""
# delete by filtering on the doc_id metadata
self._pinecone_index.delete(
filter={"doc_id": {"$eq": doc_id}}, **self._pinecone_kwargs
)
@property
def client(self) -> Any:
"""Return Pinecone client."""
return self._pinecone_index
def query(
self,
query_embedding: List[float],
similarity_top_k: int,
doc_ids: Optional[List[str]] = None,
) -> VectorStoreQueryResult:
"""Query index for top k most similar nodes.
Args:
query_embedding (List[float]): query embedding
similarity_top_k (int): top k most similar nodes
"""
response = self._pinecone_index.query(
query_embedding,
top_k=similarity_top_k,
include_values=True,
include_metadata=True,
**self._pinecone_kwargs,
)
top_k_nodes = []
top_k_ids = []
top_k_scores = []
for match in response.matches:
text = match.metadata["text"]
node = Node(text=text, extra_info=match.metadata)
top_k_ids.append(match.id)
top_k_nodes.append(node)
top_k_scores.append(match.score)
return VectorStoreQueryResult(
nodes=top_k_nodes, similarities=top_k_scores, ids=top_k_ids
)
|