import itertools import config as config import cv2 import gradio as gr import matplotlib.patches as patches import matplotlib.pyplot as plt import numpy as np import torch import torchvision import utils from loss import YoloLoss from model import YOLOv3 from PIL import Image from torch.utils.data import DataLoader from torchvision import transforms from utils import get_loaders new_state_dict = {} state_dict = torch.load('Yolov3_Padmanabh.pth', map_location=torch.device('cpu')) for key, value in state_dict.items(): new_key = key.replace('model.', '') new_state_dict[new_key] = value model = YOLOv3(in_channels=3, num_classes=config.NUM_CLASSES) model.load_state_dict(new_state_dict, strict=True) model.eval() classes = ("aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor") import grad_cam_func as gcf from pytorch_grad_cam.activations_and_gradients import ActivationsAndGradients from pytorch_grad_cam.utils.image import show_cam_on_image from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget def inference(input_img=None, iou_threshold=0.6, conf_threshold=0.5, gc_trans=0.3): if input_img is not None: tranform_img = config.infer_transforms(image=input_img) transform_img = tranform_img['image'].unsqueeze(0) transform_img_visual = config.infer_transforms_visualization(image=input_img)['image'] with torch.no_grad(): outputs = model(transform_img) bboxes = [[] for _ in range(transform_img.shape[0])] # range of Batch size for i in range(3): batch_size, A, S, _, _ = outputs[i].shape anchor = np.array(config.SCALED_ANCHORS[i]) boxes_scale_i = utils.cells_to_bboxes( outputs[i], anchor, S=S, is_preds=True) for idx, (box) in enumerate(boxes_scale_i): bboxes[idx] += box nms_boxes = utils.non_max_suppression(bboxes[0], iou_threshold=iou_threshold, threshold=conf_threshold, box_format="midpoint",) image, boxes = transform_img_visual.permute(1,2,0), nms_boxes """Plots predicted bounding boxes on the image""" cmap = plt.get_cmap("tab20b") class_labels = config.PASCAL_CLASSES colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))] im = np.array(image) height, width, _ = im.shape # Create figure and axes fig, ax = plt.subplots(1) # Display the image ax.imshow(im) # box[0] is x midpoint, box[2] is width # box[1] is y midpoint, box[3] is height # Create a Rectangle patch for box in boxes: assert len(box) == 6, "box should contain class pred, confidence, x, y, width, height" class_pred = box[0] box = box[2:] upper_left_x = box[0] - box[2] / 2 upper_left_y = box[1] - box[3] / 2 rect = patches.Rectangle( (upper_left_x * width, upper_left_y * height), box[2] * width, box[3] * height, linewidth=2, edgecolor=colors[int(class_pred)], facecolor="none", ) # Add the patch to the Axes ax.add_patch(rect) plt.text( upper_left_x * width, upper_left_y * height, s=class_labels[int(class_pred)], color="white", verticalalignment="top", bbox={"color": colors[int(class_pred)], "pad": 0}, ) plt.axis('off') fig.canvas.draw() fig_img = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8) fig_img = fig_img.reshape(fig.canvas.get_width_height()[::-1] + (3,)) plt.close(fig) outputs_inference_bb = fig_img ### GradCAM target_layer = [model.layers[-2]] cam = gcf.BaseCAM(model, target_layer) AnG = ActivationsAndGradients(model, target_layer, None) outputs = AnG(transform_img) bboxes = [[] for _ in range(1)] for i in range(3): batch_size, A, S, _, _ = outputs[i].shape anchor = config.SCALED_ANCHORS[i] boxes_scale_i = utils.cells_to_bboxes( outputs[i], anchor, S=S, is_preds=True ) for idx, (box) in enumerate(boxes_scale_i): bboxes[idx] += box nms_boxes = utils.non_max_suppression( bboxes[0], iou_threshold=0.5, threshold=0.4, box_format="midpoint", ) target_categories = [box[0] for box in nms_boxes] targets = [ClassifierOutputTarget( category) for category in target_categories] help_ = cam.compute_cam_per_layer(transform_img, targets, False) output_gc = cam.aggregate_multi_layers(help_)[0, :, :] img = cv2.resize(input_img, (416, 416)) img = np.float32(img) / 255 cam_image = show_cam_on_image(img, output_gc, use_rgb=True, image_weight=gc_trans) outputs_inference_gc = cam_image else: outputs_inference_bb = None outputs_inference_gc = None return outputs_inference_bb, outputs_inference_gc title = "PASCAL VOC trained on Yolov3" description = "A simple Gradio interface to infer on Yolov3 model, and get GradCAM results" examples = [['examples/test_'+str(i)+'.jpg', 0.6, 0.5, 0.3] for i in range(10)] demo = gr.Interface(inference, inputs = [gr.Image(label="Input image"), gr.Slider(0, 1, value=0.6, label="IOU Threshold"), gr.Slider(0, 1, value=0.4, label="Threshold"), gr.Slider(0, 1, value=0.5, label="GradCAM Transparency"), ], outputs = [ gr.Image(label="Yolov3 Prediction"), gr.Image(label="GradCAM Output"),], title = title, description = description, examples = examples ) demo.launch()