Spaces:
Runtime error
Runtime error
"""List index. | |
A simple data structure where LlamaIndex iterates through document chunks | |
in sequence in order to answer a given query. | |
""" | |
from typing import Any, Optional, Sequence | |
from gpt_index.data_structs.data_structs_v2 import IndexList | |
from gpt_index.data_structs.node_v2 import Node | |
from gpt_index.indices.base import BaseGPTIndex, QueryMap | |
from gpt_index.indices.list.embedding_query import GPTListIndexEmbeddingQuery | |
from gpt_index.indices.list.query import GPTListIndexQuery | |
from gpt_index.indices.query.schema import QueryMode | |
from gpt_index.indices.service_context import ServiceContext | |
from gpt_index.prompts.default_prompts import DEFAULT_TEXT_QA_PROMPT | |
from gpt_index.prompts.prompts import QuestionAnswerPrompt | |
# This query is used to summarize the contents of the index. | |
GENERATE_TEXT_QUERY = "What is a concise summary of this document?" | |
class GPTListIndex(BaseGPTIndex[IndexList]): | |
"""GPT List Index. | |
The list index is a simple data structure where nodes are stored in | |
a sequence. During index construction, the document texts are | |
chunked up, converted to nodes, and stored in a list. | |
During query time, the list index iterates through the nodes | |
with some optional filter parameters, and synthesizes an | |
answer from all the nodes. | |
Args: | |
text_qa_template (Optional[QuestionAnswerPrompt]): A Question-Answer Prompt | |
(see :ref:`Prompt-Templates`). | |
NOTE: this is a deprecated field. | |
""" | |
index_struct_cls = IndexList | |
def __init__( | |
self, | |
nodes: Optional[Sequence[Node]] = None, | |
index_struct: Optional[IndexList] = None, | |
service_context: Optional[ServiceContext] = None, | |
text_qa_template: Optional[QuestionAnswerPrompt] = None, | |
**kwargs: Any, | |
) -> None: | |
"""Initialize params.""" | |
self.text_qa_template = text_qa_template or DEFAULT_TEXT_QA_PROMPT | |
super().__init__( | |
nodes=nodes, | |
index_struct=index_struct, | |
service_context=service_context, | |
**kwargs, | |
) | |
def get_query_map(self) -> QueryMap: | |
"""Get query map.""" | |
return { | |
QueryMode.DEFAULT: GPTListIndexQuery, | |
QueryMode.EMBEDDING: GPTListIndexEmbeddingQuery, | |
} | |
def _build_index_from_nodes(self, nodes: Sequence[Node]) -> IndexList: | |
"""Build the index from documents. | |
Args: | |
documents (List[BaseDocument]): A list of documents. | |
Returns: | |
IndexList: The created list index. | |
""" | |
index_struct = IndexList() | |
for n in nodes: | |
index_struct.add_node(n) | |
return index_struct | |
def _insert(self, nodes: Sequence[Node], **insert_kwargs: Any) -> None: | |
"""Insert a document.""" | |
for n in nodes: | |
print("inserting node to index struct: ", n.get_doc_id()) | |
self._index_struct.add_node(n) | |
def _delete(self, doc_id: str, **delete_kwargs: Any) -> None: | |
"""Delete a document.""" | |
cur_node_ids = self._index_struct.nodes | |
cur_nodes = self._docstore.get_nodes(cur_node_ids) | |
nodes_to_keep = [n for n in cur_nodes if n.ref_doc_id != doc_id] | |
self._index_struct.nodes = [n.get_doc_id() for n in nodes_to_keep] | |
def _preprocess_query(self, mode: QueryMode, query_kwargs: Any) -> None: | |
"""Preprocess query.""" | |
super()._preprocess_query(mode, query_kwargs) | |
if "text_qa_template" not in query_kwargs: | |
query_kwargs["text_qa_template"] = self.text_qa_template | |