File size: 4,225 Bytes
077fb0c
 
 
 
 
 
 
 
 
 
fdcadea
077fb0c
 
 
 
 
 
 
 
fdcadea
bb98138
077fb0c
f8ecba6
077fb0c
f8ecba6
 
 
 
077fb0c
f8ecba6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env python
"""
Inference script for ResNet50 trained on ImageNet-1K.
"""
# Standard Library Imports
import numpy as np
import torch
from collections import OrderedDict

# Third Party Imports
import spaces
from torchvision import transforms
from torch.nn import functional as F
from torchvision.models import resnet50
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget


@spaces.GPU
def inference(image, alpha, top_k, target_layer, model=None, classes=None):
    """
    Run inference with GradCAM visualization
    """
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # Ensure model is on correct device and in eval mode
    model = model.to(device)
    model.eval()
    
    # Convert input to tensor and move to GPU
    if isinstance(image, np.ndarray):
        image_tensor = torch.from_numpy(image).to(device)
        if image_tensor.ndim == 3:
            image_tensor = image_tensor.unsqueeze(0)
    else:
        image_tensor = image.to(device)
    
    with torch.cuda.amp.autocast():  # Enable automatic mixed precision
        with torch.no_grad():
            # Save a copy of input img
            org_img = image.copy()

            # Calculate mean over each channel of input image
            mean_r, mean_g, mean_b = np.mean(image[:, :, 0]/255.), np.mean(image[:, :, 1]/255.), np.mean(image[:, :, 2]/255.)

            # Calculate Standard deviation over each channel
            std_r, std_g, std_b = np.std(image[:, :, 0]/255.), np.std(image[:, :, 1]/255.), np.std(image[:, :, 2]/255.)

            # Convert img to tensor and normalize it
            _transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((mean_r, mean_g, mean_b), (std_r, std_g, std_b))
                ])

            # Preprocess the input image
            input_tensor = _transform(image)

            # Create a mini-batch as expected by the model
            input_tensor = input_tensor.unsqueeze(0)

            # Get Model Predictions
            outputs = model(input_tensor)
            probabilities = torch.softmax(outputs, dim=1)[0]
            del outputs
            confidences = {classes[i]: float(probabilities[i]) for i in range(1000)}

            # Select the top classes based on user input
            sorted_confidences = sorted(confidences.items(), key=lambda val: val[1], reverse=True)
            show_confidences = OrderedDict(sorted_confidences[:top_k])

            # Map layer numbers to meaningful parts of the ResNet architecture
            _layers = {
                1: model.conv1,          # Initial convolution layer
                2: model.layer1[-1],     # Last bottleneck of first residual block
                3: model.layer2[-1],     # Last bottleneck of second residual block
                4: model.layer3[-1],     # Last bottleneck of third residual block
                5: model.layer4[-1],     # Last bottleneck of fourth residual block
                6: model.layer4[-1]      # Changed from fc to last conv layer for better visualization
            }

            # Ensure valid layer selection
            target_layer = min(max(target_layer, 1), 6)
            target_layers = [_layers[target_layer]]

            # Get the class activations from the selected layer
            cam = GradCAM(model=model, target_layers=target_layers)

            # Get the most probable class index
            top_class = max(confidences.items(), key=lambda x: x[1])[0]
            class_idx = classes.index(top_class)

            # Generate GradCAM for the top predicted class
            grayscale_cam = cam(input_tensor=input_tensor,
                               targets=[ClassifierOutputTarget(class_idx)],
                               aug_smooth=True,
                               eigen_smooth=True)
            grayscale_cam = grayscale_cam[0, :]

            # Overlay input image with Class activations
            visualization = show_cam_on_image(org_img/255., grayscale_cam, use_rgb=True, image_weight=alpha)
            return show_confidences, visualization