import os
import pickle

import joblib
import numpy as np
import torch
from transformers import PreTrainedModel

from .configuration_sm_subgroup_classifier import SmSubgroupClassifierConfig


class SmSubgroupClassifier(PreTrainedModel):
    config_class = SmSubgroupClassifierConfig

    def __init__(self, config):
        super().__init__(config)
        self.config = config
        self._loaded_classifiers = {}
        self.model_dir = None

    def _get_available_models(self):
        """Discover available models by checking what directories exist"""
        if not self.model_dir:
            return []

        available = []
        if os.path.exists(self.model_dir):
            for item in os.listdir(self.model_dir):
                item_path = os.path.join(self.model_dir, item)
                if os.path.isdir(item_path) and "_" in item:
                    # Check if it has the required files
                    required_files = ["model.pkl", "scaler.pkl", "metadata.pkl"]
                    if all(
                        os.path.exists(os.path.join(item_path, f))
                        for f in required_files
                    ):
                        available.append(item)
        return available

    def _load_classifier(self, model_key):
        """Load a specific classifier by model key (e.g., 'en_OP-ob')"""
        if model_key in self._loaded_classifiers:
            return self._loaded_classifiers[model_key]

        available_models = self._get_available_models()
        if model_key not in available_models:
            raise ValueError(
                f"Model '{model_key}' not available. Available: {available_models}"
            )

        # Path to classifier
        classifier_path = os.path.join(self.model_dir, model_key)

        # Load components
        classifier = joblib.load(os.path.join(classifier_path, "model.pkl"))
        scaler = joblib.load(os.path.join(classifier_path, "scaler.pkl"))

        with open(os.path.join(classifier_path, "metadata.pkl"), "rb") as f:
            metadata = pickle.load(f)

        classifier_info = {
            "classifier": classifier,
            "scaler": scaler,
            "class_names": metadata["class_names"],
        }

        self._loaded_classifiers[model_key] = classifier_info
        return classifier_info

    def forward(self, language, model_name, embeddings):
        """
        Args:
            language: Language code (en, fi, sv)
            model_name: Model name (OP-ob, NA, etc.)
            embeddings: Pre-computed embeddings
        """
        # Create model key
        model_key = f"{language}_{model_name}"

        # Convert embeddings to numpy if needed
        if torch.is_tensor(embeddings):
            embeddings = embeddings.detach().cpu().numpy()

        if embeddings.ndim == 1:
            embeddings = embeddings.reshape(1, -1)

        # Load classifier
        classifier_info = self._load_classifier(model_key)

        # Scale and predict
        embeddings_scaled = classifier_info["scaler"].transform(embeddings)
        predictions = classifier_info["classifier"].predict(embeddings_scaled)
        probabilities = classifier_info["classifier"].predict_proba(embeddings_scaled)

        # Format results - just use class names and probabilities
        results = []
        for pred, probs in zip(predictions, probabilities):
            predicted_class_name = classifier_info["class_names"][pred]

            # Get all class probabilities
            all_probs = {
                classifier_info["class_names"][i]: float(prob)
                for i, prob in enumerate(probs)
            }

            results.append(
                {
                    "predicted_class": predicted_class_name,
                    "confidence": float(max(probs)),
                    "all_probabilities": all_probs,
                }
            )

        return {
            "language": language,
            "model_name": model_name,
            "model_key": model_key,
            "predictions": results[0] if len(results) == 1 else results,
        }

    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
        model = super().from_pretrained(pretrained_model_name_or_path, **kwargs)
        model.model_dir = pretrained_model_name_or_path
        return model