binhnase04854's picture
first deploy
b699122
"""Simple node parser."""
from typing import List, Optional, Sequence
from gpt_index.data_structs.node_v2 import Node
from gpt_index.langchain_helpers.text_splitter import TextSplitter, TokenTextSplitter
from gpt_index.node_parser.node_utils import get_nodes_from_document
from gpt_index.readers.schema.base import Document
from gpt_index.node_parser.interface import NodeParser
class SimpleNodeParser(NodeParser):
"""Simple node parser.
Splits a document into Nodes using a TextSplitter.
Args:
text_splitter (Optional[TextSplitter]): text splitter
include_extra_info (bool): whether to include extra info in nodes
include_prev_next_rel (bool): whether to include prev/next relationships
"""
def __init__(
self,
text_splitter: Optional[TextSplitter] = None,
include_extra_info: bool = True,
include_prev_next_rel: bool = True,
) -> None:
"""Init params."""
self._text_splitter = text_splitter or TokenTextSplitter()
self._include_extra_info = include_extra_info
self._include_prev_next_rel = include_prev_next_rel
def get_nodes_from_documents(
self,
documents: Sequence[Document],
include_extra_info: bool = True,
) -> List[Node]:
"""Parse document into nodes.
Args:
documents (Sequence[Document]): documents to parse
include_extra_info (bool): whether to include extra info in nodes
"""
all_nodes: List[Node] = []
for document in documents:
nodes = get_nodes_from_document(
document,
self._text_splitter,
include_extra_info,
include_prev_next_rel=self._include_prev_next_rel,
)
all_nodes.extend(nodes)
return all_nodes