File size: 1,846 Bytes
b699122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
"""Simple node parser."""
from typing import List, Optional, Sequence

from gpt_index.data_structs.node_v2 import Node
from gpt_index.langchain_helpers.text_splitter import TextSplitter, TokenTextSplitter
from gpt_index.node_parser.node_utils import get_nodes_from_document
from gpt_index.readers.schema.base import Document
from gpt_index.node_parser.interface import NodeParser


class SimpleNodeParser(NodeParser):
    """Simple node parser.

    Splits a document into Nodes using a TextSplitter.

    Args:
        text_splitter (Optional[TextSplitter]): text splitter
        include_extra_info (bool): whether to include extra info in nodes
        include_prev_next_rel (bool): whether to include prev/next relationships

    """

    def __init__(
        self,
        text_splitter: Optional[TextSplitter] = None,
        include_extra_info: bool = True,
        include_prev_next_rel: bool = True,
    ) -> None:
        """Init params."""
        self._text_splitter = text_splitter or TokenTextSplitter()
        self._include_extra_info = include_extra_info
        self._include_prev_next_rel = include_prev_next_rel

    def get_nodes_from_documents(
        self,
        documents: Sequence[Document],
        include_extra_info: bool = True,
    ) -> List[Node]:
        """Parse document into nodes.

        Args:
            documents (Sequence[Document]): documents to parse
            include_extra_info (bool): whether to include extra info in nodes

        """
        all_nodes: List[Node] = []
        for document in documents:
            nodes = get_nodes_from_document(
                document,
                self._text_splitter,
                include_extra_info,
                include_prev_next_rel=self._include_prev_next_rel,
            )
            all_nodes.extend(nodes)
        return all_nodes