Spaces:
Sleeping
Sleeping
#!/usr/bin/env python | |
""" | |
Inference script for ResNet50 trained on ImageNet-1K. | |
""" | |
# Standard Library Imports | |
import numpy as np | |
import torch | |
from collections import OrderedDict | |
# Third Party Imports | |
import spaces | |
from torchvision import transforms | |
from torch.nn import functional as F | |
from torchvision.models import resnet50 | |
from pytorch_grad_cam import GradCAM | |
from pytorch_grad_cam.utils.image import show_cam_on_image | |
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget | |
def inference(image, alpha, top_k, target_layer, model=None, classes=None): | |
""" | |
Run inference with GradCAM visualization | |
""" | |
try: | |
if torch.cuda.is_available(): | |
torch.cuda.empty_cache() | |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
# Debug: Print model mode | |
print(f"Model mode: {model.training}") | |
# Ensure model is on correct device and in eval mode | |
model = model.to(device) | |
model.eval() | |
with torch.cuda.amp.autocast(): | |
org_img = image.copy() | |
# Convert img to tensor and normalize it | |
_transform = transforms.Compose([ | |
transforms.ToTensor(), | |
transforms.Normalize( | |
mean=[0.485, 0.456, 0.406], | |
std=[0.229, 0.224, 0.225] | |
) | |
]) | |
# Debug: Print image tensor stats | |
input_tensor = _transform(image).to(device) | |
print(f"Input tensor shape: {input_tensor.shape}") | |
print(f"Input tensor range: [{input_tensor.min():.2f}, {input_tensor.max():.2f}]") | |
input_tensor = input_tensor.unsqueeze(0) | |
input_tensor.requires_grad = True | |
# Get Model Predictions | |
outputs = model(input_tensor) | |
print(f"Raw output shape: {outputs.shape}") | |
print(f"Raw output range: [{outputs.min():.2f}, {outputs.max():.2f}]") | |
probabilities = torch.softmax(outputs, dim=1)[0] | |
print(f"Probabilities sum: {probabilities.sum():.2f}") # Should be close to 1.0 | |
# Get top 5 predictions for debugging | |
top_probs, top_indices = torch.topk(probabilities, 5) | |
print("\nTop 5 predictions:") | |
for idx, (prob, class_idx) in enumerate(zip(top_probs, top_indices)): | |
class_name = classes[class_idx] | |
print(f"{idx+1}. {class_name}: {prob:.4f}") | |
# Create confidence dictionary | |
confidences = {classes[i]: float(probabilities[i]) for i in range(len(classes))} | |
sorted_confidences = sorted(confidences.items(), key=lambda x: x[1], reverse=True) | |
show_confidences = OrderedDict(sorted_confidences[:top_k]) | |
# Map layer numbers to meaningful parts of the ResNet architecture | |
_layers = { | |
1: model.conv1, | |
2: model.layer1[-1], | |
3: model.layer2[-1], | |
4: model.layer3[-1], | |
5: model.layer4[-1], | |
6: model.layer4[-1] | |
} | |
target_layer = min(max(target_layer, 1), 6) | |
target_layers = [_layers[target_layer]] | |
# Debug: Print selected layer | |
print(f"\nUsing target layer: {target_layers[0]}") | |
cam = GradCAM(model=model, target_layers=target_layers) | |
# Get the most probable class index | |
top_class = max(confidences.items(), key=lambda x: x[1])[0] | |
class_idx = classes.index(top_class) | |
print(f"\nSelected class for GradCAM: {top_class} (index: {class_idx})") | |
grayscale_cam = cam( | |
input_tensor=input_tensor, | |
targets=[ClassifierOutputTarget(class_idx)], | |
aug_smooth=False, | |
eigen_smooth=False | |
) | |
grayscale_cam = grayscale_cam[0, :] | |
visualization = show_cam_on_image(org_img/255., grayscale_cam, use_rgb=True, image_weight=alpha) | |
if torch.cuda.is_available(): | |
torch.cuda.empty_cache() | |
return show_confidences, visualization | |
except Exception as e: | |
print(f"Error in inference: {str(e)}") | |
if torch.cuda.is_available(): | |
torch.cuda.empty_cache() | |
raise e | |