"""Simple node parser.""" from typing import List, Optional, Sequence from gpt_index.data_structs.node_v2 import Node from gpt_index.langchain_helpers.text_splitter import TextSplitter, TokenTextSplitter from gpt_index.node_parser.node_utils import get_nodes_from_document from gpt_index.readers.schema.base import Document from gpt_index.node_parser.interface import NodeParser class SimpleNodeParser(NodeParser): """Simple node parser. Splits a document into Nodes using a TextSplitter. Args: text_splitter (Optional[TextSplitter]): text splitter include_extra_info (bool): whether to include extra info in nodes include_prev_next_rel (bool): whether to include prev/next relationships """ def __init__( self, text_splitter: Optional[TextSplitter] = None, include_extra_info: bool = True, include_prev_next_rel: bool = True, ) -> None: """Init params.""" self._text_splitter = text_splitter or TokenTextSplitter() self._include_extra_info = include_extra_info self._include_prev_next_rel = include_prev_next_rel def get_nodes_from_documents( self, documents: Sequence[Document], include_extra_info: bool = True, ) -> List[Node]: """Parse document into nodes. Args: documents (Sequence[Document]): documents to parse include_extra_info (bool): whether to include extra info in nodes """ all_nodes: List[Node] = [] for document in documents: nodes = get_nodes_from_document( document, self._text_splitter, include_extra_info, include_prev_next_rel=self._include_prev_next_rel, ) all_nodes.extend(nodes) return all_nodes