Spaces:
Runtime error
Runtime error
"""Utilities for GPT indices.""" | |
import logging | |
import re | |
from typing import Dict, List, Optional, Set | |
from gpt_index.data_structs.data_structs import Node | |
from gpt_index.utils import globals_helper, truncate_text | |
from gpt_index.vector_stores.types import VectorStoreQueryResult | |
_logger = logging.getLogger(__name__) | |
def get_sorted_node_list(node_dict: Dict[int, Node]) -> List[Node]: | |
"""Get sorted node list. Used by tree-strutured indices.""" | |
sorted_indices = sorted(node_dict.keys()) | |
return [node_dict[index] for index in sorted_indices] | |
def extract_numbers_given_response(response: str, n: int = 1) -> Optional[List[int]]: | |
"""Extract number given the GPT-generated response. | |
Used by tree-structured indices. | |
""" | |
numbers = re.findall(r"\d+", response) | |
if len(numbers) == 0: | |
return None | |
else: | |
return numbers[:n] | |
def expand_tokens_with_subtokens(tokens: Set[str]) -> Set[str]: | |
"""Get subtokens from a list of tokens., filtering for stopwords.""" | |
results = set() | |
for token in tokens: | |
results.add(token) | |
sub_tokens = re.findall(r"\w+", token) | |
if len(sub_tokens) > 1: | |
results.update({w for w in sub_tokens if w not in globals_helper.stopwords}) | |
return results | |
def log_vector_store_query_result( | |
result: VectorStoreQueryResult, logger: Optional[logging.Logger] = None | |
) -> None: | |
"""Log vector store query result.""" | |
logger = logger or _logger | |
assert result.ids is not None | |
assert result.nodes is not None | |
similarities = result.similarities or [1.0 for _ in result.ids] | |
fmt_txts = [] | |
for node_idx, node_similarity, node in zip(result.ids, similarities, result.nodes): | |
fmt_txt = f"> [Node {node_idx}] [Similarity score: \ | |
{float(node_similarity):.6}] {truncate_text(node.get_text(), 100)}" | |
fmt_txts.append(fmt_txt) | |
top_k_node_text = "\n".join(fmt_txts) | |
logger.debug(f"> Top {len(result.nodes)} nodes:\n{top_k_node_text}") | |