"""Stanford 3D indoor dataset."""

from __future__ import annotations

import copy
import glob
import os
from collections.abc import Sequence
from io import BytesIO

import numpy as np
import pandas as pd
import torch

from vis4d.common.typing import ArgsType, DictStrAny
from vis4d.data.const import CommonKeys as K
from vis4d.data.typing import DictData

from .base import Dataset
from .util import CacheMappingMixin


class S3DIS(CacheMappingMixin, Dataset):
    """S3DIS dataset class."""

    DESCRIPTION = """S3DIS is a large-scale indoor pointcloud dataset."""
    HOMEPAGE = "https://buildingparser.stanford.edu/dataset.html"
    PAPER = (
        "https://openaccess.thecvf.com/content_cvpr_2016/papers/"
        "Armeni_3D_Semantic_Parsing_CVPR_2016_paper.pdf"
    )
    LICENSE = "CC BY-NC-SA 4.0"

    KEYS = [
        K.points3d,
        K.colors3d,
        K.semantics3d,
        K.instances3d,
    ]

    CLASS_NAME_TO_IDX = {
        "ceiling": 0,
        "floor": 1,
        "wall": 2,
        "beam": 3,
        "column": 4,
        "window": 5,
        "door": 6,
        "chair": 7,
        "table": 8,
        "bookcase": 9,
        "sofa": 10,
        "board": 11,
        "clutter": 12,
    }

    CLASS_COUNTS = torch.Tensor(
        [
            3370714,
            2856755,
            4919229,
            318158,
            375640,
            478001,
            974733,
            650464,
            791496,
            88727,
            1284130,
            229758,
            2272837,
        ]
    )

    AVAILABLE_KEYS: Sequence[str] = (
        K.points3d,
        K.colors3d,
        K.semantics3d,
        K.instances3d,
    )

    COLOR_MAPPING = torch.tensor(
        [
            [152, 223, 138],
            [31, 119, 180],
            [188, 189, 34],
            [140, 86, 75],
            [255, 152, 150],
            [214, 39, 40],
            [197, 176, 213],
            [23, 190, 207],
            [178, 76, 76],
            [247, 182, 210],
            [66, 188, 102],
            [219, 219, 141],
            [140, 57, 197],
            [202, 185, 52],
        ]
    )

    def __init__(
        self,
        data_root: str,
        split: str = "trainNoArea5",
        keys_to_load: Sequence[str] = (
            K.points3d,
            K.colors3d,
            K.semantics3d,
            K.instances3d,
        ),
        cache_points: bool = True,
        cache_as_binary: bool = False,
        cached_file_path: str | None = None,
        **kwargs: ArgsType,
    ) -> None:
        """Creates a new S3DIS dataset.

        Args:
            data_root (str): Path to S3DIS folder
            split (str): which split to load. Must either be trainNoArea[1-6]
                or testArea[1-6].  e.g. trainNoArea5 will load all areas except
                area 5 and testArea5 will only load area 5.
            keys_to_load (list[str]): What kind of data should be loaded
                (e.g. colors, xyz, semantics, ...)
            cache_points (bool): If true caches loaded points instead of
                reading them from the disk every time.
            cache_as_binary (bool): Whether to cache the dataset as binary.
                Default: False.
            cached_file_path (str | None): Path to a cached file. If cached
                file exist then it will load it instead of generating the data
                mapping. Default: None.

        Raises:
            ValueError: If requested split is malformed.
        """
        super().__init__(**kwargs)

        self.data_root = data_root
        self.split = split

        self.areas: list[str] = [
            "Area_1",
            "Area_2",
            "Area_3",
            "Area_4",
            "Area_5",
            "Area_6",
        ]
        area_number = int(self.split.split("Area")[-1])
        if "trainNoArea" in self.split:
            self.areas.remove(self.areas[area_number - 1])
        elif "testArea" in self.split:
            self.areas = [self.areas[area_number - 1]]
        else:
            raise ValueError("Unknown split: ", self.split)

        self.data, _ = self._load_mapping(
            self._generate_data_mapping,
            cache_as_binary=cache_as_binary,
            cached_file_path=cached_file_path,
        )
        self.keys_to_load = keys_to_load

        # Cache
        self.cache_points = cache_points
        self._cache: dict[int, DictData] = {}

    @property
    def num_classes(self) -> int:
        """The number of classes int he datset."""
        return len(S3DIS.CLASS_NAME_TO_IDX)

    def __repr__(self) -> str:
        """Concise representation of the dataset."""
        return f"S3DIS(root={self.data_root}, split={self.split})"

    def _generate_data_mapping(self) -> list[DictStrAny]:
        """Generate 3dis dataset mapping."""
        data: list[DictStrAny] = []
        for area in self.areas:
            for room_path in glob.glob(
                os.path.join(self.data_root, area + "/*")
            ):
                room_data: DictStrAny = {}
                if not os.path.isdir(room_path):
                    continue

                for anns in glob.glob(
                    os.path.join(room_path, "Annotations/*.txt")
                ):
                    instance_id = os.path.basename(anns.replace(".txt", ""))
                    sem_name = instance_id.split("_")[0]
                    room_data[instance_id] = {
                        "class_label": S3DIS.CLASS_NAME_TO_IDX.get(
                            sem_name, 12
                        ),
                        "path": anns,
                    }
                data.append(room_data)

        return data

    def __len__(self) -> int:
        """Length of the datset."""
        return len(self.data)

    def __getitem__(self, idx: int) -> DictData:
        """Transform s3dis sample to vis4d input format.

        Returns:
            coordinates: 3D Poitns coordinate Shape(n x 3)
            colors: 3D Point colors Shape(n x 3)
            Semantic Classes: 3D Point classes Shape(n x 1)

        Raises:
            ValueError: If a requested key does not exist in this dataset.
        """
        data = self.data[idx]

        # Cache data
        if self.cache_points and idx in self._cache:
            return copy.deepcopy(self._cache[idx])

        coords = np.zeros((0, 3), dtype=np.float32)
        color = np.zeros((0, 3), dtype=np.float32)
        semantic_ids = np.zeros((0, 1), dtype=int)
        instance_ids = np.zeros((0, 1), dtype=int)

        for values in data.values():
            data_path = values["path"]
            instance_id = int(
                values["path"].split("_")[-1].replace(".txt", "")
            )
            np_data = pd.read_csv(
                BytesIO(self.data_backend.get(data_path)),
                header=None,
                delimiter=" ",
            ).values.astype(np.float32)

            if K.points3d in self.keys_to_load:
                coords = np.vstack([coords, np_data[:, :3]])
            if K.colors3d in self.keys_to_load:
                color = np.vstack([color, np_data[:, 3:]])
            if K.semantics3d in self.keys_to_load:
                semantic_ids = np.vstack(
                    [
                        semantic_ids,
                        np.ones((np_data.shape[0], 1), dtype=int)
                        * values["class_label"],
                    ]
                )
            if K.instances3d in self.keys_to_load:
                instance_ids = np.vstack(
                    [
                        instance_ids,
                        np.ones((np_data.shape[0], 1), dtype=int)
                        * instance_id,
                    ]
                )

        data = {}
        for key in self.keys_to_load:
            if key == K.points3d:
                data[key] = coords
            elif key == K.colors3d:
                data[key] = color / 255.0
            elif key == K.semantics3d:
                data[key] = semantic_ids.squeeze(-1)
            elif key == K.instances3d:
                data[key] = instance_ids.squeeze(-1)
            else:
                raise ValueError(f"Can not load data for key: {key}")

        if self.cache_points:
            self._cache[idx] = copy.deepcopy(data)
        return data