Spaces:
Runtime error
Runtime error
File size: 3,882 Bytes
8a58cf3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
"""Faiss Vector store index.
An index that that is built on top of an existing vector store.
"""
from typing import Any, List, Optional, cast
import numpy as np
from gpt_index.vector_stores.types import (
NodeEmbeddingResult,
VectorStore,
VectorStoreQueryResult,
)
class FaissVectorStore(VectorStore):
"""Faiss Vector Store.
Embeddings are stored within a Faiss index.
During query time, the index uses Faiss to query for the top
k embeddings, and returns the corresponding indices.
Args:
faiss_index (faiss.Index): Faiss index instance
"""
stores_text: bool = False
def __init__(
self,
faiss_index: Any,
) -> None:
"""Initialize params."""
import_err_msg = """
`faiss` package not found. For instructions on
how to install `faiss` please visit
https://github.com/facebookresearch/faiss/wiki/Installing-Faiss
"""
try:
import faiss # noqa: F401
except ImportError:
raise ImportError(import_err_msg)
self._faiss_index = cast(faiss.Index, faiss_index)
@property
def config_dict(self) -> dict:
"""Return config dict."""
return {}
def add(
self,
embedding_results: List[NodeEmbeddingResult],
) -> List[str]:
"""Add embedding results to index.
NOTE: in the Faiss vector store, we do not store text in Faiss.
Args
embedding_results: List[NodeEmbeddingResult]: list of embedding results
"""
new_ids = []
for result in embedding_results:
text_embedding = result.embedding
text_embedding_np = np.array(text_embedding, dtype="float32")[np.newaxis, :]
new_id = str(self._faiss_index.ntotal)
self._faiss_index.add(text_embedding_np)
new_ids.append(new_id)
return new_ids
@property
def client(self) -> Any:
"""Return the faiss index."""
return self._faiss_index
@classmethod
def load(cls, save_path: str) -> "FaissVectorStore":
"""Load vector store from disk.
Args:
save_path (str): The save_path of the file.
Returns:
FaissVectorStore: The loaded vector store.
"""
import faiss
faiss_index = faiss.read_index(save_path)
return cls(faiss_index)
def save(
self,
save_path: str,
) -> None:
"""Save to file.
This method saves the vector store to disk.
Args:
save_path (str): The save_path of the file.
"""
import faiss
faiss.write_index(self._faiss_index, save_path)
def delete(self, doc_id: str, **delete_kwargs: Any) -> None:
"""Delete a document.
Args:
doc_id (str): document id
"""
raise NotImplementedError("Delete not yet implemented for Faiss index.")
def query(
self,
query_embedding: List[float],
similarity_top_k: int,
doc_ids: Optional[List[str]] = None,
) -> VectorStoreQueryResult:
"""Query index for top k most similar nodes.
Args:
query_embedding (List[float]): query embedding
similarity_top_k (int): top k most similar nodes
"""
query_embedding_np = np.array(query_embedding, dtype="float32")[np.newaxis, :]
dists, indices = self._faiss_index.search(query_embedding_np, similarity_top_k)
dists = [d[0] for d in dists]
# if empty, then return an empty response
if len(indices) == 0:
return VectorStoreQueryResult(similarities=[], ids=[])
# returned dimension is 1 x k
node_idxs = list([str(i) for i in indices[0]])
return VectorStoreQueryResult(similarities=dists, ids=node_idxs)
|