File size: 4,505 Bytes
35b22df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
"""Query Configuration Schema.

This schema is used under the hood for all queries, but is primarily
exposed for recursive queries over composable indices.

"""

from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Dict, List, Optional

from dataclasses_json import DataClassJsonMixin


class QueryMode(str, Enum):
    """Query mode enum.

    Can be passed as the enum struct, or as the underlying string.

    Attributes:
        DEFAULT ("default"): Default query mode.
        RETRIEVE ("retrieve"): Retrieve mode.
        EMBEDDING ("embedding"): Embedding mode.
        SUMMARIZE ("summarize"): Summarize mode. Used for hierarchical
            summarization in the tree index.
        SIMPLE ("simple"): Simple mode. Used for keyword extraction.
        RAKE ("rake"): RAKE mode. Used for keyword extraction.
        RECURSIVE ("recursive"): Recursive mode. Used to recursively query
            over composed indices.

    """

    DEFAULT = "default"
    # a special "retrieve" query for tree index that retrieves that top nodes
    RETRIEVE = "retrieve"
    # embedding-based query
    EMBEDDING = "embedding"

    # to hierarchically summarize using tree
    SUMMARIZE = "summarize"

    # for keyword extractor
    SIMPLE = "simple"
    RAKE = "rake"

    # recursive queries (composable queries)
    # NOTE: deprecated
    RECURSIVE = "recursive"

    # for sql queries
    SQL = "sql"


@dataclass
class QueryConfig(DataClassJsonMixin):
    """Query config.

    Used under the hood for all queries.
    The user must explicitly specify a list of query config objects is passed during
    a query call to define configurations for each individual subindex within an
    overall composed index.

    The user may choose to specify either the query config objects directly,
    or as a list of JSON dictionaries. For instance, the following are equivalent:

    .. code-block:: python

        # using JSON dictionaries
        query_configs = [
            {
                # index_struct_id is optional
                "index_struct_id": "<index_struct_id>",
                "index_struct_type": "tree",
                "query_mode": "default",
                "query_kwargs": {
                    "child_branch_factor": 2
                }
            },
            ...
        ]
        response = index.query(
            "<query_str>", mode="recursive", query_configs=query_configs
        )

    .. code-block:: python

        query_configs = [
            QueryConfig(
                index_struct_id="<index_struct_id>",
                index_struct_type=IndexStructType.TREE,
                query_mode=QueryMode.DEFAULT,
                query_kwargs={
                    "child_branch_factor": 2
                }
            )
            ...
        ]
        response = index.query(
            "<query_str>", mode="recursive", query_configs=query_configs
        )


    Args:
        index_struct_id (Optional[str]): The index struct id. This can be obtained
            by calling
            "get_doc_id" on the original index class. This can be set by calling
            "set_doc_id" on the original index class.
        index_struct_type (IndexStructType): The type of index struct.
        query_mode (QueryMode): The query mode.
        query_kwargs (Dict[str, Any], optional): The query kwargs. Defaults to {}.

    """

    # index_struct_type: IndexStructType
    index_struct_type: str
    query_mode: QueryMode
    query_kwargs: Dict[str, Any] = field(default_factory=dict)
    # NOTE: type as Optional because old query configs may not
    # have this field
    index_struct_id: Optional[str] = None


@dataclass
class QueryBundle(DataClassJsonMixin):
    """
    Query bundle.

    This dataclass contains the original query string and associated transformations.

    Args:
        query_str (str): the original user-specified query string.
            This is currently used by all non embedding-based queries.
        embedding_strs (list[str]): list of strings used for embedding the query.
            This is currently used by all embedding-based queries.
    """

    query_str: str
    custom_embedding_strs: Optional[List[str]] = None

    @property
    def embedding_strs(self) -> List[str]:
        """Use custom embedding strs if specified, otherwise use query str."""
        if self.custom_embedding_strs is None:
            return [self.query_str]
        else:
            return self.custom_embedding_strs