Spaces:

Shilpaj
/

ImageNet

Sleeping

App Files Files Community

ImageNet / inference.py

Shilpaj

Refactor: Modifications for inference on GPU

f8ecba6 3 months ago

raw

history blame

4.23 kB

	#!/usr/bin/env python
	"""
	Inference script for ResNet50 trained on ImageNet-1K.
	"""
	# Standard Library Imports
	import numpy as np
	import torch
	from collections import OrderedDict

	# Third Party Imports
	import spaces
	from torchvision import transforms
	from torch.nn import functional as F
	from torchvision.models import resnet50
	from pytorch_grad_cam import GradCAM
	from pytorch_grad_cam.utils.image import show_cam_on_image
	from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget


	@spaces.GPU
	def inference(image, alpha, top_k, target_layer, model=None, classes=None):
	"""
	Run inference with GradCAM visualization
	"""
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

	# Ensure model is on correct device and in eval mode
	model = model.to(device)
	model.eval()

	# Convert input to tensor and move to GPU
	if isinstance(image, np.ndarray):
	image_tensor = torch.from_numpy(image).to(device)
	if image_tensor.ndim == 3:
	image_tensor = image_tensor.unsqueeze(0)
	else:
	image_tensor = image.to(device)

	with torch.cuda.amp.autocast(): # Enable automatic mixed precision
	with torch.no_grad():
	# Save a copy of input img
	org_img = image.copy()

	# Calculate mean over each channel of input image
	mean_r, mean_g, mean_b = np.mean(image[:, :, 0]/255.), np.mean(image[:, :, 1]/255.), np.mean(image[:, :, 2]/255.)

	# Calculate Standard deviation over each channel
	std_r, std_g, std_b = np.std(image[:, :, 0]/255.), np.std(image[:, :, 1]/255.), np.std(image[:, :, 2]/255.)

	# Convert img to tensor and normalize it
	_transform = transforms.Compose([
	transforms.ToTensor(),
	transforms.Normalize((mean_r, mean_g, mean_b), (std_r, std_g, std_b))
	])

	# Preprocess the input image
	input_tensor = _transform(image)

	# Create a mini-batch as expected by the model
	input_tensor = input_tensor.unsqueeze(0)

	# Get Model Predictions
	outputs = model(input_tensor)
	probabilities = torch.softmax(outputs, dim=1)[0]
	del outputs
	confidences = {classes[i]: float(probabilities[i]) for i in range(1000)}

	# Select the top classes based on user input
	sorted_confidences = sorted(confidences.items(), key=lambda val: val[1], reverse=True)
	show_confidences = OrderedDict(sorted_confidences[:top_k])

	# Map layer numbers to meaningful parts of the ResNet architecture
	_layers = {
	1: model.conv1, # Initial convolution layer
	2: model.layer1[-1], # Last bottleneck of first residual block
	3: model.layer2[-1], # Last bottleneck of second residual block
	4: model.layer3[-1], # Last bottleneck of third residual block
	5: model.layer4[-1], # Last bottleneck of fourth residual block
	6: model.layer4[-1] # Changed from fc to last conv layer for better visualization
	}

	# Ensure valid layer selection
	target_layer = min(max(target_layer, 1), 6)
	target_layers = [_layers[target_layer]]

	# Get the class activations from the selected layer
	cam = GradCAM(model=model, target_layers=target_layers)

	# Get the most probable class index
	top_class = max(confidences.items(), key=lambda x: x[1])[0]
	class_idx = classes.index(top_class)

	# Generate GradCAM for the top predicted class
	grayscale_cam = cam(input_tensor=input_tensor,
	targets=[ClassifierOutputTarget(class_idx)],
	aug_smooth=True,
	eigen_smooth=True)
	grayscale_cam = grayscale_cam[0, :]

	# Overlay input image with Class activations
	visualization = show_cam_on_image(org_img/255., grayscale_cam, use_rgb=True, image_weight=alpha)
	return show_confidences, visualization