Spaces:
Runtime error
Runtime error
"""Default query for GPTPandasIndex.""" | |
import logging | |
from typing import Any, Callable, Optional | |
import pandas as pd | |
from langchain.input import print_text | |
from gpt_index.data_structs.table_v2 import PandasStructTable | |
from gpt_index.indices.query.base import BaseGPTIndexQuery | |
from gpt_index.indices.query.schema import QueryBundle | |
from gpt_index.prompts.default_prompts import DEFAULT_PANDAS_PROMPT | |
from gpt_index.prompts.prompts import PandasPrompt | |
from gpt_index.response.schema import Response | |
logger = logging.getLogger(__name__) | |
DEFAULT_INSTRUCTION_STR = ( | |
"We wish to convert this query to executable Python code using Pandas.\n" | |
"The final line of code should be a Python expression that can be called " | |
"with the `eval()` function. This expression should represent a solution " | |
"to the query." | |
) | |
def default_output_processor( | |
output: str, df: pd.DataFrame, **output_kwargs: Any | |
) -> str: | |
"""Process outputs in a default manner.""" | |
import ast | |
import sys | |
import traceback | |
if sys.version_info < (3, 9): | |
logger.warn( | |
"Python version must be >= 3.9 in order to use " | |
"the default output processor, which executes " | |
"the Python query. Instead, we will return the " | |
"raw Python instructions as a string." | |
) | |
return output | |
local_vars = {"df": df} | |
# NOTE: inspired from langchain's tool | |
# see langchain.tools.python.tool (PythonAstREPLTool) | |
try: | |
tree = ast.parse(output) | |
module = ast.Module(tree.body[:-1], type_ignores=[]) | |
exec(ast.unparse(module), {}, local_vars) # type: ignore | |
module_end = ast.Module(tree.body[-1:], type_ignores=[]) | |
module_end_str = ast.unparse(module_end) # type: ignore | |
try: | |
return str(eval(module_end_str, {}, local_vars)) | |
except Exception as e: | |
raise e | |
except Exception as e: | |
err_string = ( | |
"There was an error running the output as Python code. " | |
f"Error message: {e}" | |
) | |
traceback.print_exc() | |
return err_string | |
class GPTNLPandasIndexQuery(BaseGPTIndexQuery[PandasStructTable]): | |
"""GPT Pandas query. | |
Convert natural language to Pandas python code. | |
.. code-block:: python | |
response = index.query("<query_str>", mode="default") | |
Args: | |
df (pd.DataFrame): Pandas dataframe to use. | |
instruction_str (Optional[str]): Instruction string to use. | |
output_processor (Optional[Callable[[str], str]]): Output processor. | |
A callable that takes in the output string, pandas DataFrame, | |
and any output kwargs and returns a string. | |
pandas_prompt (Optional[PandasPrompt]): Pandas prompt to use. | |
head (int): Number of rows to show in the table context. | |
""" | |
def __init__( | |
self, | |
index_struct: PandasStructTable, | |
df: Optional[pd.DataFrame] = None, | |
instruction_str: Optional[str] = None, | |
output_processor: Optional[Callable] = None, | |
pandas_prompt: Optional[PandasPrompt] = None, | |
output_kwargs: Optional[dict] = None, | |
head: int = 5, | |
**kwargs: Any, | |
) -> None: | |
"""Initialize params.""" | |
super().__init__(index_struct=index_struct, **kwargs) | |
if df is None: | |
raise ValueError("df must be provided.") | |
self.df = df | |
self._head = head | |
self._pandas_prompt = pandas_prompt or DEFAULT_PANDAS_PROMPT | |
self._instruction_str = instruction_str or DEFAULT_INSTRUCTION_STR | |
self._output_processor = output_processor or default_output_processor | |
self._output_kwargs = output_kwargs or {} | |
def _get_table_context(self) -> str: | |
"""Get table context.""" | |
return str(self.df.head(self._head)) | |
def _query(self, query_bundle: QueryBundle) -> Response: | |
"""Answer a query.""" | |
context = self._get_table_context() | |
pandas_response_str, _ = self._service_context.llm_predictor.predict( | |
self._pandas_prompt, | |
df_str=context, | |
query_str=query_bundle.query_str, | |
instruction_str=self._instruction_str, | |
) | |
if self._verbose: | |
print_text(f"> Pandas Instructions:\n" f"```\n{pandas_response_str}\n```\n") | |
pandas_output = self._output_processor( | |
pandas_response_str, | |
self.df, | |
**self._output_kwargs, | |
) | |
if self._verbose: | |
print_text(f"> Pandas Output: {pandas_output}\n") | |
response_extra_info = { | |
"pandas_instruction_str": pandas_response_str, | |
} | |
return Response(response=pandas_output, extra_info=response_extra_info) | |