Spaces:
Runtime error
Runtime error
File size: 2,004 Bytes
35b22df |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
"""Utilities for GPT indices."""
import logging
import re
from typing import Dict, List, Optional, Set
from gpt_index.data_structs.data_structs import Node
from gpt_index.utils import globals_helper, truncate_text
from gpt_index.vector_stores.types import VectorStoreQueryResult
_logger = logging.getLogger(__name__)
def get_sorted_node_list(node_dict: Dict[int, Node]) -> List[Node]:
"""Get sorted node list. Used by tree-strutured indices."""
sorted_indices = sorted(node_dict.keys())
return [node_dict[index] for index in sorted_indices]
def extract_numbers_given_response(response: str, n: int = 1) -> Optional[List[int]]:
"""Extract number given the GPT-generated response.
Used by tree-structured indices.
"""
numbers = re.findall(r"\d+", response)
if len(numbers) == 0:
return None
else:
return numbers[:n]
def expand_tokens_with_subtokens(tokens: Set[str]) -> Set[str]:
"""Get subtokens from a list of tokens., filtering for stopwords."""
results = set()
for token in tokens:
results.add(token)
sub_tokens = re.findall(r"\w+", token)
if len(sub_tokens) > 1:
results.update({w for w in sub_tokens if w not in globals_helper.stopwords})
return results
def log_vector_store_query_result(
result: VectorStoreQueryResult, logger: Optional[logging.Logger] = None
) -> None:
"""Log vector store query result."""
logger = logger or _logger
assert result.ids is not None
assert result.nodes is not None
similarities = result.similarities or [1.0 for _ in result.ids]
fmt_txts = []
for node_idx, node_similarity, node in zip(result.ids, similarities, result.nodes):
fmt_txt = f"> [Node {node_idx}] [Similarity score: \
{float(node_similarity):.6}] {truncate_text(node.get_text(), 100)}"
fmt_txts.append(fmt_txt)
top_k_node_text = "\n".join(fmt_txts)
logger.debug(f"> Top {len(result.nodes)} nodes:\n{top_k_node_text}")
|