"""Default query for GPTPandasIndex."""

import logging
from typing import Any, Callable, Optional

import pandas as pd
from langchain.input import print_text

from gpt_index.data_structs.table_v2 import PandasStructTable
from gpt_index.indices.query.base import BaseGPTIndexQuery
from gpt_index.indices.query.schema import QueryBundle
from gpt_index.prompts.default_prompts import DEFAULT_PANDAS_PROMPT
from gpt_index.prompts.prompts import PandasPrompt
from gpt_index.response.schema import Response

logger = logging.getLogger(__name__)


DEFAULT_INSTRUCTION_STR = (
    "We wish to convert this query to executable Python code using Pandas.\n"
    "The final line of code should be a Python expression that can be called "
    "with the `eval()` function. This expression should represent a solution "
    "to the query."
)


def default_output_processor(
    output: str, df: pd.DataFrame, **output_kwargs: Any
) -> str:
    """Process outputs in a default manner."""
    import ast
    import sys
    import traceback

    if sys.version_info < (3, 9):
        logger.warn(
            "Python version must be >= 3.9 in order to use "
            "the default output processor, which executes "
            "the Python query. Instead, we will return the "
            "raw Python instructions as a string."
        )
        return output

    local_vars = {"df": df}

    # NOTE: inspired from langchain's tool
    # see langchain.tools.python.tool (PythonAstREPLTool)
    try:
        tree = ast.parse(output)
        module = ast.Module(tree.body[:-1], type_ignores=[])
        exec(ast.unparse(module), {}, local_vars)  # type: ignore
        module_end = ast.Module(tree.body[-1:], type_ignores=[])
        module_end_str = ast.unparse(module_end)  # type: ignore
        try:
            return str(eval(module_end_str, {}, local_vars))
        except Exception as e:
            raise e
    except Exception as e:
        err_string = (
            "There was an error running the output as Python code. "
            f"Error message: {e}"
        )
        traceback.print_exc()
        return err_string


class GPTNLPandasIndexQuery(BaseGPTIndexQuery[PandasStructTable]):
    """GPT Pandas query.

    Convert natural language to Pandas python code.

    .. code-block:: python

        response = index.query("<query_str>", mode="default")

    Args:
        df (pd.DataFrame): Pandas dataframe to use.
        instruction_str (Optional[str]): Instruction string to use.
        output_processor (Optional[Callable[[str], str]]): Output processor.
            A callable that takes in the output string, pandas DataFrame,
            and any output kwargs and returns a string.
        pandas_prompt (Optional[PandasPrompt]): Pandas prompt to use.
        head (int): Number of rows to show in the table context.

    """

    def __init__(
        self,
        index_struct: PandasStructTable,
        df: Optional[pd.DataFrame] = None,
        instruction_str: Optional[str] = None,
        output_processor: Optional[Callable] = None,
        pandas_prompt: Optional[PandasPrompt] = None,
        output_kwargs: Optional[dict] = None,
        head: int = 5,
        **kwargs: Any,
    ) -> None:
        """Initialize params."""
        super().__init__(index_struct=index_struct, **kwargs)
        if df is None:
            raise ValueError("df must be provided.")
        self.df = df
        self._head = head
        self._pandas_prompt = pandas_prompt or DEFAULT_PANDAS_PROMPT
        self._instruction_str = instruction_str or DEFAULT_INSTRUCTION_STR
        self._output_processor = output_processor or default_output_processor
        self._output_kwargs = output_kwargs or {}

    def _get_table_context(self) -> str:
        """Get table context."""
        return str(self.df.head(self._head))

    def _query(self, query_bundle: QueryBundle) -> Response:
        """Answer a query."""
        context = self._get_table_context()

        pandas_response_str, _ = self._service_context.llm_predictor.predict(
            self._pandas_prompt,
            df_str=context,
            query_str=query_bundle.query_str,
            instruction_str=self._instruction_str,
        )
        if self._verbose:
            print_text(f"> Pandas Instructions:\n" f"```\n{pandas_response_str}\n```\n")
        pandas_output = self._output_processor(
            pandas_response_str,
            self.df,
            **self._output_kwargs,
        )
        if self._verbose:
            print_text(f"> Pandas Output: {pandas_output}\n")

        response_extra_info = {
            "pandas_instruction_str": pandas_response_str,
        }

        return Response(response=pandas_output, extra_info=response_extra_info)