Spaces:
Runtime error
Runtime error
File size: 8,043 Bytes
35b22df |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 |
"""Vector-store specific query classes."""
from typing import Any, Dict, Optional
from gpt_index.data_structs.data_structs import IndexDict
from gpt_index.indices.query.vector_store.base import GPTVectorStoreIndexQuery
from gpt_index.vector_stores import (
ChromaVectorStore,
FaissVectorStore,
OpensearchVectorStore,
PineconeVectorStore,
QdrantVectorStore,
SimpleVectorStore,
WeaviateVectorStore,
)
from gpt_index.vector_stores.opensearch import OpensearchVectorClient
class GPTSimpleVectorIndexQuery(GPTVectorStoreIndexQuery):
"""GPT simple vector index query.
Args:
embed_model (Optional[BaseEmbedding]): embedding model
similarity_top_k (int): number of top k results to return
simple_vector_store_data_dict: (Optional[dict]): simple vector store data dict,
"""
def __init__(
self,
index_struct: IndexDict,
simple_vector_store_data_dict: Optional[Dict] = None,
**kwargs: Any,
) -> None:
"""Initialize params."""
# TODO: this is a temporary hack to allow composable
# indices to work for simple vector stores
# Our composability framework at the moment only allows for storage
# of index_struct, not vector_store. Therefore in order to
# allow simple vector indices to be composed, we need to "infer"
# the vector store from the index struct.
# NOTE: the next refactor would be to allow users to pass in
# the vector store during query-time. However this is currently
# not complete in our composability framework because the configs
# are keyed on index type, not index id (which means that users
# can't pass in distinct vector stores for different subindices).
# NOTE: composability on top of other vector stores (pinecone/weaviate)
# was already broken in this form.
if simple_vector_store_data_dict is None:
if len(index_struct.embeddings_dict) > 0:
simple_vector_store_data_dict = {
"embedding_dict": index_struct.embeddings_dict,
}
vector_store = SimpleVectorStore(
simple_vector_store_data_dict=simple_vector_store_data_dict
)
else:
raise ValueError("Vector store is required for vector store query.")
else:
vector_store = SimpleVectorStore(
simple_vector_store_data_dict=simple_vector_store_data_dict
)
super().__init__(index_struct=index_struct, vector_store=vector_store, **kwargs)
class GPTFaissIndexQuery(GPTVectorStoreIndexQuery):
"""GPT faiss vector index query.
Args:
embed_model (Optional[BaseEmbedding]): embedding model
similarity_top_k (int): number of top k results to return
faiss_index (faiss.Index): A Faiss Index object (required). Note: the index
will be reset during index construction.
"""
def __init__(
self,
index_struct: IndexDict,
faiss_index: Optional[Any] = None,
**kwargs: Any,
) -> None:
"""Initialize params."""
if faiss_index is None:
raise ValueError("faiss_index is required.")
vector_store = FaissVectorStore(faiss_index)
super().__init__(index_struct=index_struct, vector_store=vector_store, **kwargs)
class GPTPineconeIndexQuery(GPTVectorStoreIndexQuery):
"""GPT pinecone vector index query.
Args:
embed_model (Optional[BaseEmbedding]): embedding model
similarity_top_k (int): number of top k results to return
pinecone_index (Optional[pinecone.Index]): Pinecone index instance
pinecone_kwargs (Optional[dict]): Pinecone index kwargs
"""
def __init__(
self,
index_struct: IndexDict,
pinecone_index: Optional[Any] = None,
pinecone_kwargs: Optional[Dict] = None,
**kwargs: Any,
) -> None:
"""Initialize params."""
if pinecone_index is None and pinecone_kwargs is None:
raise ValueError("pinecone_index or pinecone_kwargs is required.")
vector_store = PineconeVectorStore(
pinecone_index=pinecone_index, pinecone_kwargs=pinecone_kwargs
)
super().__init__(index_struct=index_struct, vector_store=vector_store, **kwargs)
class GPTWeaviateIndexQuery(GPTVectorStoreIndexQuery):
"""GPT Weaviate vector index query.
Args:
embed_model (Optional[BaseEmbedding]): embedding model
similarity_top_k (int): number of top k results to return
weaviate_client (Optional[Any]): Weaviate client instance
class_prefix (Optional[str]): Weaviate class prefix
"""
def __init__(
self,
index_struct: IndexDict,
weaviate_client: Optional[Any] = None,
class_prefix: Optional[str] = None,
**kwargs: Any,
) -> None:
"""Initialize params."""
if weaviate_client is None:
raise ValueError("weaviate_client is required.")
vector_store = WeaviateVectorStore(
weaviate_client=weaviate_client, class_prefix=class_prefix
)
super().__init__(index_struct=index_struct, vector_store=vector_store, **kwargs)
class GPTQdrantIndexQuery(GPTVectorStoreIndexQuery):
"""GPT Qdrant vector index query.
Args:
embed_model (Optional[BaseEmbedding]): embedding model
similarity_top_k (int): number of top k results to return
client (Optional[Any]): QdrantClient instance from `qdrant-client` package
collection_name: (Optional[str]): name of the Qdrant collection
"""
def __init__(
self,
index_struct: IndexDict,
client: Optional[Any] = None,
collection_name: Optional[str] = None,
**kwargs: Any,
) -> None:
"""Initialize params."""
if client is None:
raise ValueError("client is required.")
if collection_name is None:
raise ValueError("collection_name is required.")
vector_store = QdrantVectorStore(client=client, collection_name=collection_name)
super().__init__(index_struct=index_struct, vector_store=vector_store, **kwargs)
class GPTChromaIndexQuery(GPTVectorStoreIndexQuery):
"""GPT Chroma vector index query.
Args:
text_qa_template (Optional[QuestionAnswerPrompt]): A Question-Answer Prompt
(see :ref:`Prompt-Templates`).
embed_model (Optional[BaseEmbedding]): Embedding model to use for
embedding similarity.
chroma_collection (Optional[Any]): Collection instance from `chromadb` package.
"""
def __init__(
self,
index_struct: IndexDict,
chroma_collection: Optional[Any] = None,
**kwargs: Any,
) -> None:
"""Initialize params."""
if chroma_collection is None:
raise ValueError("chroma_collection is required.")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
super().__init__(index_struct=index_struct, vector_store=vector_store, **kwargs)
class GPTOpensearchIndexQuery(GPTVectorStoreIndexQuery):
"""GPT Opensearch vector index query.
Args:
text_qa_template (Optional[QuestionAnswerPrompt]): A Question-Answer Prompt
(see :ref:`Prompt-Templates`).
embed_model (Optional[BaseEmbedding]): Embedding model to use for
embedding similarity.
client (Optional[OpensearchVectorClient]): Opensearch vector client.
"""
def __init__(
self,
index_struct: IndexDict,
client: Optional[OpensearchVectorClient] = None,
**kwargs: Any,
) -> None:
"""Initialize params."""
if client is None:
raise ValueError("OpensearchVectorClient client is required.")
vector_store = OpensearchVectorStore(client=client)
super().__init__(index_struct=index_struct, vector_store=vector_store, **kwargs)
|