Spaces:
Runtime error
Runtime error
File size: 4,743 Bytes
b699122 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
"""Default query for GPTPandasIndex."""
import logging
from typing import Any, Callable, Optional
import pandas as pd
from langchain.input import print_text
from gpt_index.data_structs.table_v2 import PandasStructTable
from gpt_index.indices.query.base import BaseGPTIndexQuery
from gpt_index.indices.query.schema import QueryBundle
from gpt_index.prompts.default_prompts import DEFAULT_PANDAS_PROMPT
from gpt_index.prompts.prompts import PandasPrompt
from gpt_index.response.schema import Response
logger = logging.getLogger(__name__)
DEFAULT_INSTRUCTION_STR = (
"We wish to convert this query to executable Python code using Pandas.\n"
"The final line of code should be a Python expression that can be called "
"with the `eval()` function. This expression should represent a solution "
"to the query."
)
def default_output_processor(
output: str, df: pd.DataFrame, **output_kwargs: Any
) -> str:
"""Process outputs in a default manner."""
import ast
import sys
import traceback
if sys.version_info < (3, 9):
logger.warn(
"Python version must be >= 3.9 in order to use "
"the default output processor, which executes "
"the Python query. Instead, we will return the "
"raw Python instructions as a string."
)
return output
local_vars = {"df": df}
# NOTE: inspired from langchain's tool
# see langchain.tools.python.tool (PythonAstREPLTool)
try:
tree = ast.parse(output)
module = ast.Module(tree.body[:-1], type_ignores=[])
exec(ast.unparse(module), {}, local_vars) # type: ignore
module_end = ast.Module(tree.body[-1:], type_ignores=[])
module_end_str = ast.unparse(module_end) # type: ignore
try:
return str(eval(module_end_str, {}, local_vars))
except Exception as e:
raise e
except Exception as e:
err_string = (
"There was an error running the output as Python code. "
f"Error message: {e}"
)
traceback.print_exc()
return err_string
class GPTNLPandasIndexQuery(BaseGPTIndexQuery[PandasStructTable]):
"""GPT Pandas query.
Convert natural language to Pandas python code.
.. code-block:: python
response = index.query("<query_str>", mode="default")
Args:
df (pd.DataFrame): Pandas dataframe to use.
instruction_str (Optional[str]): Instruction string to use.
output_processor (Optional[Callable[[str], str]]): Output processor.
A callable that takes in the output string, pandas DataFrame,
and any output kwargs and returns a string.
pandas_prompt (Optional[PandasPrompt]): Pandas prompt to use.
head (int): Number of rows to show in the table context.
"""
def __init__(
self,
index_struct: PandasStructTable,
df: Optional[pd.DataFrame] = None,
instruction_str: Optional[str] = None,
output_processor: Optional[Callable] = None,
pandas_prompt: Optional[PandasPrompt] = None,
output_kwargs: Optional[dict] = None,
head: int = 5,
**kwargs: Any,
) -> None:
"""Initialize params."""
super().__init__(index_struct=index_struct, **kwargs)
if df is None:
raise ValueError("df must be provided.")
self.df = df
self._head = head
self._pandas_prompt = pandas_prompt or DEFAULT_PANDAS_PROMPT
self._instruction_str = instruction_str or DEFAULT_INSTRUCTION_STR
self._output_processor = output_processor or default_output_processor
self._output_kwargs = output_kwargs or {}
def _get_table_context(self) -> str:
"""Get table context."""
return str(self.df.head(self._head))
def _query(self, query_bundle: QueryBundle) -> Response:
"""Answer a query."""
context = self._get_table_context()
pandas_response_str, _ = self._service_context.llm_predictor.predict(
self._pandas_prompt,
df_str=context,
query_str=query_bundle.query_str,
instruction_str=self._instruction_str,
)
if self._verbose:
print_text(f"> Pandas Instructions:\n" f"```\n{pandas_response_str}\n```\n")
pandas_output = self._output_processor(
pandas_response_str,
self.df,
**self._output_kwargs,
)
if self._verbose:
print_text(f"> Pandas Output: {pandas_output}\n")
response_extra_info = {
"pandas_instruction_str": pandas_response_str,
}
return Response(response=pandas_output, extra_info=response_extra_info)
|