#!/usr/bin/env python
"""
Inference script for ResNet50 trained on ImageNet-1K.
"""
# Standard Library Imports
import numpy as np
import torch
from collections import OrderedDict

# Third Party Imports
import spaces
from torchvision import transforms
from torch.nn import functional as F
from torchvision.models import resnet50
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget


@spaces.GPU
def inference(image, alpha, top_k, target_layer, model=None, classes=None):
    """
    Run inference with GradCAM visualization
    """
    try:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        # Ensure model is on correct device and in eval mode
        model = model.to(device)
        model.eval()
        
        # Save a copy of input img
        org_img = image.copy()

        # Convert img to tensor and normalize it
        _transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]
            )
        ])

        # Preprocess the input image and move to device
        input_tensor = _transform(image).to(device)
        input_tensor = input_tensor.unsqueeze(0)
        input_tensor.requires_grad = True
        
        # Get Model Predictions
        outputs = model(input_tensor)
        probabilities = torch.softmax(outputs, dim=1)[0]
        confidences = {classes[i]: float(probabilities[i]) for i in range(1000)}

        # Select the top classes based on user input
        sorted_confidences = sorted(confidences.items(), key=lambda val: val[1], reverse=True)
        show_confidences = OrderedDict(sorted_confidences[:top_k])

        # Map layer numbers to meaningful parts of the ResNet architecture
        _layers = {
            1: model.conv1,
            2: model.layer1[-1],
            3: model.layer2[-1],
            4: model.layer3[-1],
            5: model.layer4[-1],
            6: model.layer4[-1]
        }

        # Ensure valid layer selection
        target_layer = min(max(target_layer, 1), 6)
        target_layers = [_layers[target_layer]]

        # Get the class activations from the selected layer
        cam = GradCAM(model=model, target_layers=target_layers)

        # Get the most probable class index
        top_class = max(confidences.items(), key=lambda x: x[1])[0]
        class_idx = classes.index(top_class)
        
        # Generate GradCAM for the top predicted class
        grayscale_cam = cam(
            input_tensor=input_tensor,
            targets=[ClassifierOutputTarget(class_idx)],
            aug_smooth=False,  # Disable augmentation for memory efficiency
            eigen_smooth=False  # Disable eigen smoothing for memory efficiency
        )
        grayscale_cam = grayscale_cam[0, :]

        # Overlay input image with Class activations
        visualization = show_cam_on_image(org_img/255., grayscale_cam, use_rgb=True, image_weight=alpha)
        
        # Clear CUDA cache
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            
        return show_confidences, visualization
        
    except Exception as e:
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        raise e