## Gradio Example # This app makes use of the saliency generation example found in the base ``xaitk-saliency`` repo [here](https://github.com/XAITK/xaitk-saliency/blob/master/examples/OcclusionSaliency.ipynb), and explores integrating ``xaitk-saliency`` with ``Gradio`` to create an interactive interface for computing saliency maps. import os import PIL.Image import matplotlib.pyplot as plt # type: ignore import urllib import numpy as np import gradio as gr from gradio import ( # type: ignore AnnotatedImage, Button, Column, Image, Label, # type: ignore Number, Plot, Row, TabItem, Tab, Tabs, # type: ignore Checkbox, Dropdown, Slider, Textbox # type: ignore ) # State variables for Image Classification from gr_component_state import ( # type: ignore img_cls_model_name, img_cls_saliency_algo_name, window_size_state, stride_state, debiased_state, ) # State functions for Image Classification from gr_component_state import ( # type: ignore select_img_cls_model, select_img_cls_saliency_algo, enter_window_size, enter_stride, check_debiased ) # State variables for Object Detection from gr_component_state import ( # type: ignore obj_det_model_name, obj_det_saliency_algo_name, occlusion_grid_state ) # State functions for Object Detection from gr_component_state import ( # type: ignore select_obj_det_model, select_obj_det_saliency_algo, enter_occlusion_grid_size ) # Common state variables from gr_component_state import ( # type: ignore threads_state, num_masks_state, spatial_res_state, p1_state, seed_state ) # Common state functions from gr_component_state import ( # type: ignore select_threads, enter_num_masks, enter_spatial_res, select_p1, enter_seed ) import torch import torchvision.transforms as transforms import torchvision.models as models import torch.nn.functional from smqtk_detection.impls.detect_image_objects.resnet_frcnn import ResNetFRCNN from xaitk_saliency.impls.gen_image_classifier_blackbox_sal.slidingwindow import SlidingWindowStack from xaitk_saliency.impls.gen_image_classifier_blackbox_sal.rise import RISEStack from xaitk_saliency.impls.gen_object_detector_blackbox_sal.drise import RandomGridStack, DRISEStack from xaitk_saliency.interfaces.gen_object_detector_blackbox_sal import GenerateObjectDetectorBlackboxSaliency from smqtk_detection.interfaces.detect_image_objects import DetectImageObjects from smqtk_classifier.interfaces.classify_image import ClassifyImage os.makedirs('data', exist_ok=True) test_image_filename = 'data/catdog.jpg' urllib.request.urlretrieve('https://farm1.staticflickr.com/74/202734059_fcce636dcd_z.jpg', test_image_filename) plt.figure(figsize=(12, 8)) plt.axis('off') _ = plt.imshow(PIL.Image.open(test_image_filename)) CUDA_AVAILABLE = torch.cuda.is_available() model_input_size = (224, 224) model_mean = [0.485, 0.456, 0.406] model_loader = transforms.Compose([ transforms.ToPILImage(), transforms.Resize(model_input_size), transforms.ToTensor(), transforms.Normalize( mean=model_mean, std=[0.229, 0.224, 0.225] ), ]) def get_sal_labels(classes_file, custom_categories_list=None): if not os.path.isfile(classes_file): url = "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt" _ = urllib.request.urlretrieve(url, classes_file) f = open(classes_file, "r") categories = [s.strip() for s in f.readlines()] if not custom_categories_list == None: sal_class_labels = custom_categories_list else: sal_class_labels = categories sal_class_idxs = [categories.index(lbl) for lbl in sal_class_labels] return sal_class_labels, sal_class_idxs def get_det_sal_labels(classes_file, custom_categories_list=None): if not os.path.isfile(classes_file): url = "https://raw.githubusercontent.com/matlab-deep-learning/Object-Detection-Using-Pretrained-YOLO-v2/main/%2Bhelper/coco-classes.txt" _ = urllib.request.urlretrieve(url, classes_file) f = open(classes_file, "r") categories = [s.strip() for s in f.readlines()] if not custom_categories_list == None: sal_obj_labels = custom_categories_list else: sal_obj_labels = categories sal_obj_idxs = [categories.index(lbl) for lbl in sal_obj_labels] return sal_obj_labels, sal_obj_idxs def get_model(model_choice): if model_choice == "ResNet-18": model = models.resnet18(pretrained=True) else: model = models.resnet50(pretrained=True) model = model.eval() if CUDA_AVAILABLE: model = model.cuda() return model def get_detection_model(model_choice): if model_choice == "Faster-RCNN": blackbox_detector = ResNetFRCNN( box_thresh=0.05, img_batch_size=1, use_cuda=True ) else: raise Exception("Unknown Input") return blackbox_detector def get_saliency_algo(sal_choice): if sal_choice == "RISE": gen_sal = RISEStack( n=num_masks_state[-1], s=spatial_res_state[-1], p1=p1_state[-1], seed=seed_state[-1], threads=threads_state[-1], debiased=debiased_state[-1] ) elif sal_choice == "SlidingWindowStack": gen_sal = SlidingWindowStack( window_size=eval(window_size_state[-1]), stride=eval(stride_state[-1]), threads=threads_state[-1] ) else: raise Exception("Unknown Input") return gen_sal def get_detection_saliency_algo(sal_choice): if sal_choice == "RandomGridStack": gen_sal = RandomGridStack( n=num_masks_state[-1], s=eval(occlusion_grid_state[-1]), p1=p1_state[-1], threads=threads_state[-1], seed=seed_state[-1], ) elif sal_choice == "DRISE": gen_sal = DRISEStack( n=num_masks_state[-1], s=spatial_res_state[-1], p1=p1_state[-1], seed=seed_state[-1], threads=threads_state[-1] ) else: raise Exception("Unknown Input") return gen_sal data_path = "./data" if not os.path.exists(data_path): os.makedirs(data_path) # Setup imagenet classes and ClassifyImage for generating classification saliency classes_file = os.path.join(data_path,"imagenet_classes.txt") sal_class_labels, sal_class_idxs = get_sal_labels(classes_file) class TorchResnet (ClassifyImage): modified_class_labels = [] def get_labels(self): return self.modified_class_labels def set_labels(self, class_labels): self.modified_class_labels = [lbl for lbl in class_labels] @torch.no_grad() def classify_images(self, image_iter): # Input may either be an NDaray, or some arbitrary iterable of NDarray images. model = get_model(img_cls_model_name[-1]) for img in image_iter: image_tensor = model_loader(img).unsqueeze(0) if CUDA_AVAILABLE: image_tensor = image_tensor.cuda() feature_vec = model(image_tensor) # Converting feature extractor output to probabilities. class_conf = torch.nn.functional.softmax(feature_vec, dim=1).cpu().detach().numpy().squeeze() # Only return the confidences for the focus classes yield dict(zip(sal_class_labels, class_conf[sal_class_idxs])) def get_config(self): # Required by a parent class. return {} blackbox_classifier, blackbox_fill = TorchResnet(), np.uint8(np.asarray(model_mean) * 255).tolist() # Setup COCO object classes for generating detection saliency obj_classes_file = os.path.join(data_path,"coco_classes.txt") sal_obj_labels, sal_obj_idxs = get_det_sal_labels(obj_classes_file) # Modify textbox parameters based on chosen saliency algorithm def show_textbox_parameters(choice): if choice == 'RISE': return Textbox(visible=False), Textbox(visible=False), Textbox(visible=True), Textbox(visible=True), Textbox(visible=True) elif choice == 'SlidingWindowStack': return Textbox(visible=True), Textbox(visible=True), Textbox(visible=False), Textbox(visible=False), Textbox(visible=False) elif choice == "RandomGridStack": return Textbox(visible=True), Textbox(visible=False), Textbox(visible=True), Textbox(visible=True) elif choice == "DRISE": return Textbox(visible=True), Textbox(visible=True), Textbox(visible=True), Textbox(visible=False) else: raise Exception("Unknown Input") # Modify slider parameters based on chosen saliency algorithm def show_slider_parameters(choice): if choice == 'RISE' or choice == 'RandomGridStack' or choice == 'DRISE': return Slider(visible=True), Slider(visible=True) elif choice == 'SlidingWindowStack': return Slider(visible=True), Slider(visible=False) else: raise Exception("Unknown Input") # Modify checkbox parameters based on chosen saliency algorithm def show_debiased_checkbox(choice): if choice == 'RISE': return Checkbox(visible=True) elif choice == 'SlidingWindowStack' or choice == 'RandomGridStack' or choice == 'DRISE': return Checkbox(visible=False) else: raise Exception("Unknown Input") # Function that is called after clicking the "Classify" button in the demo def predict(x,top_n_classes): image_tensor = model_loader(x).unsqueeze(0) if CUDA_AVAILABLE: image_tensor = image_tensor.cuda() model = get_model(img_cls_model_name[-1]) feature_vec = model(image_tensor) class_conf = torch.nn.functional.softmax(feature_vec, dim=1).cpu().detach().numpy().squeeze() labels = list(zip(sal_class_labels, class_conf[sal_class_idxs].tolist())) final_labels = dict(sorted(labels, key=lambda t: t[1],reverse=True)[:top_n_classes]) return final_labels, Dropdown(choices=list(final_labels)) # Interpretation function for image classification that implements the selected saliency algorithm and generates the class-wise saliency map visualizations def interpretation_function(image: np.ndarray, labels: dict, nth_class: str, img_alpha, sal_alpha, sal_range_min, sal_range_max): sal_generator = get_saliency_algo(img_cls_saliency_algo_name[-1]) sal_generator.fill = blackbox_fill labels_list = labels.keys() blackbox_classifier.set_labels(labels_list) sal_maps = sal_generator(image, blackbox_classifier) nth_class_index = blackbox_classifier.get_labels().index(nth_class) fig = visualize_saliency_plot(image, sal_maps[nth_class_index,:,:], img_alpha, sal_alpha, sal_range_min, sal_range_max) return fig def visualize_saliency_plot(image: np.ndarray, class_sal_map: np.ndarray, img_alpha, sal_alpha, sal_range_min, sal_range_max): colorbar_kwargs = { "fraction": 0.046*(image.shape[0]/image.shape[1]), "pad": 0.04, } fig = plt.figure() plt.imshow(image, alpha=img_alpha) plt.imshow( np.clip(class_sal_map, sal_range_min, sal_range_max), cmap='jet', alpha=sal_alpha ) plt.clim(sal_range_min, sal_range_max) plt.colorbar(**colorbar_kwargs) plt.title(f"Saliency Map") plt.axis('off') plt.close(fig) return fig # Generate top-n object detect predictions on the input image def run_detect(input_img: np.ndarray, num_detections: int): detect_model = get_detection_model(obj_det_model_name[-1]) preds = list(list(detect_model([input_img]))[0]) n_preds = len(preds) n_classes = len(preds[0][1]) bboxes = np.empty((n_preds, 4), dtype=np.float32) scores = np.empty((n_preds, n_classes), dtype=np.float32) max_scores_index = np.empty((n_preds, 1), dtype=int) labels = None final_bbox = [] final_label = [] for i, (bbox, score_dict) in enumerate(preds): bboxes[i] = (*bbox.min_vertex, *bbox.max_vertex) score_list = list(score_dict.values()) scores[i] = score_list max_scores_index[i] = score_list.index(max(score_list)) if labels is None: labels = list(score_dict.keys()) label_name = str(labels[int(max_scores_index[i,0])]) conf_score = str(round(score_list[int(max_scores_index[i,0])],4)) label_with_score = str(i) + " : "+ label_name + " - " + conf_score final_label.append(label_with_score) bboxes_list = bboxes[:,:].astype(int).tolist() return (input_img, list(zip([f for f in bboxes_list], [l for l in final_label]))[:num_detections]), Dropdown(choices=[l for l in final_label][:num_detections]) # Run saliency algorithm on the object detect predictions and generate corresponding visualizations def run_detect_saliency(input_img: np.ndarray, num_predictions, obj_label, img_alpha, sal_alpha, sal_range_min, sal_range_max): detect_model = get_detection_model(obj_det_model_name[-1]) img_preds = list(list(detect_model([input_img]))[0]) ref_preds = img_preds[:int(num_predictions)] ref_bboxes = [] ref_scores = [] for det in ref_preds: bbox = det[0] ref_bboxes.append([ *bbox.min_vertex, *bbox.max_vertex, ]) score_dict = det[1] ref_scores.append(list(score_dict.values())) ref_bboxes = np.array(ref_bboxes) ref_scores = np.array(ref_scores) print(f"Ref bboxes: {ref_bboxes.shape}") print(f"Ref scores: {ref_scores.shape}") sal_generator = get_detection_saliency_algo(obj_det_saliency_algo_name[-1]) sal_generator.fill = blackbox_fill sal_maps = gen_det_saliency(input_img, detect_model, sal_generator,ref_bboxes,ref_scores) print(f"Saliency maps: {sal_maps.shape}") nth_class_index = int(obj_label.split(' : ')[0]) scores = sal_maps[nth_class_index,:,:] fig = visualize_saliency_plot(input_img, sal_maps[nth_class_index,:,:], img_alpha, sal_alpha, sal_range_min, sal_range_max) scores = np.clip(scores, sal_range_min, sal_range_max) return fig def gen_det_saliency(input_img: np.ndarray, blackbox_detector: DetectImageObjects, sal_map_generator: GenerateObjectDetectorBlackboxSaliency, ref_bboxes: np.ndarray, ref_scores: np.ndarray ): sal_maps = sal_map_generator.generate( input_img, ref_bboxes, ref_scores, blackbox_detector, ) return sal_maps with gr.Blocks() as demo: with Tab("Image Classification"): with Row(): with Column(scale=0.5): drop_list = Dropdown(value=img_cls_model_name[-1],choices=["ResNet-18","ResNet-50"],label="Choose Model",interactive="True") with Column(scale=0.5): drop_list_sal = Dropdown(value=img_cls_saliency_algo_name[-1],choices=["SlidingWindowStack","RISE"],label="Choose Saliency Algorithm",interactive="True") with Row(): with Column(scale=0.33): window_size = Textbox(value=window_size_state[-1],label="Tuple of window size values (Press Enter to submit the input)",interactive=True,visible=False) masks = Number(value=num_masks_state[-1],label="Number of Random Masks (Press Enter to submit the input)",interactive=True,visible=True,precision=0) with Column(scale=0.33): stride = Textbox(value=stride_state[-1],label="Tuple of stride values (Press Enter to submit the input)" ,interactive=True,visible=False) spatial_res = Number(value=spatial_res_state[-1],label="Spatial Resolution of Masking Grid (Press Enter to submit the input)" ,interactive=True,visible=True,precision=0) with Column(scale=0.33): threads = Slider(value=threads_state[-1],label="Threads",interactive=True,visible=True) with Row(): with Column(scale=0.33): seed = Number(value=seed_state[-1],label="Seed (Press Enter to submit the input)",interactive=True,visible=True,precision=0) with Column(scale=0.33): p1 = Slider(value=p1_state[-1],label="P1",interactive=True,visible=True, minimum=0,maximum=1,step=0.1) with Column(scale=0.33): debiased = Checkbox(value=debiased_state[-1],label="Debiased", interactive=True, visible=True) with Row(): with Column(): input_img = Image(label="Saliency Map Generation", width=640, height=480) num_classes = Slider(value=2,label="Top-N class labels", interactive=True,visible=True) classify = Button("Classify") with Column(): class_label = Label(label="Predicted Class") with Column(): with Row(): class_name = Dropdown(label="Class to compute saliency",interactive=True,visible=True) with Row(): img_alpha = Slider(value=0.7,label="Image Opacity",interactive=True,visible=True,minimum=0,maximum=1,step=0.1) sal_alpha = Slider(value=0.3,label="Saliency Map Opacity",interactive=True,visible=True,minimum=0,maximum=1,step=0.1) with Row(): min_sal_range = Slider(value=0,label="Minimum Saliency Value",interactive=True,visible=True,minimum=-1,maximum=1,step=0.05) max_sal_range = Slider(value=1,label="Maximum Saliency Value",interactive=True,visible=True,minimum=-1,maximum=1,step=0.05) with Row(): generate_saliency = Button("Generate Saliency") with Column(): with Tabs(): with TabItem("Display interpretation with plot"): interpretation_plot = Plot() with Tab("Object Detection"): with Row(): with Column(scale=0.5): drop_list_detect_model = Dropdown(value=obj_det_model_name[-1],choices=["Faster-RCNN"],label="Choose Model",interactive="True") with Column(scale=0.5): drop_list_detect_sal = Dropdown(value=obj_det_saliency_algo_name[-1],choices=["RandomGridStack","DRISE"],label="Choose Saliency Algorithm",interactive="True") with Row(): with Column(scale=0.33): masks_detect = Number(value=num_masks_state[-1],label="Number of Random Masks (Press Enter to submit the input)",interactive=True,visible=True,precision=0) occlusion_grid_size = Textbox(value=occlusion_grid_state[-1],label="Tuple of occlusion grid size values (Press Enter to submit the input)",interactive=True,visible=False) spatial_res_detect = Number(value=spatial_res_state[-1],label="Spatial Resolution of Masking Grid (Press Enter to submit the input)" ,interactive=True,visible=True,precision=0) with Column(scale=0.33): seed_detect = Number(value=seed_state[-1],label="Seed (Press Enter to submit the input)",interactive=True,visible=True,precision=0) p1_detect = Slider(value=p1_state[-1],label="P1",interactive=True,visible=True, minimum=0,maximum=1,step=0.1) with Column(scale=0.33): threads_detect = Slider(value=threads_state[-1],label="Threads",interactive=True,visible=True) with Row(): with Column(): input_img_detect = Image(label="Saliency Map Generation", width=640, height=480) num_detections = Slider(value=2,label="Top-N detections", interactive=True,visible=True) detection = Button("Run Detection Algorithm") with Column(): detect_label = AnnotatedImage(label="Detections") with Column(): with Row(): class_name_det = Dropdown(label="Detection to compute saliency",interactive=True,visible=True) with Row(): img_alpha_det = Slider(value=0.7,label="Image Opacity",interactive=True,visible=True,minimum=0,maximum=1,step=0.1) sal_alpha_det = Slider(value=0.3,label="Saliency Map Opacity",interactive=True,visible=True,minimum=0,maximum=1,step=0.1) with Row(): min_sal_range_det = Slider(value=0.95,label="Minimum Saliency Value",interactive=True,visible=True,minimum=0.80,maximum=1,step=0.05) max_sal_range_det = Slider(value=1,label="Maximum Saliency Value",interactive=True,visible=True,minimum=0.80,maximum=1,step=0.05) with Row(): generate_det_saliency = Button("Generate Saliency") with Column(): with Tabs(): with TabItem("Display saliency map plot"): det_saliency_plot = Plot() # Image Classification dropdown list event listeners drop_list.select(select_img_cls_model,drop_list,drop_list) drop_list_sal.select(select_img_cls_saliency_algo,drop_list_sal,drop_list_sal) drop_list_sal.change(show_textbox_parameters,drop_list_sal,[window_size,stride,masks,spatial_res,seed]) drop_list_sal.change(show_slider_parameters,drop_list_sal,[threads,p1]) drop_list_sal.change(show_debiased_checkbox,drop_list_sal,debiased) # Image Classification textbox, slider and checkbox event listeners window_size.submit(enter_window_size,window_size,window_size) masks.submit(enter_num_masks,masks,masks) stride.submit(enter_stride, stride, stride) spatial_res.submit(enter_spatial_res, spatial_res, spatial_res) seed.submit(enter_seed, seed, seed) threads.change(select_threads, threads, threads) p1.change(select_p1, p1, p1) debiased.change(check_debiased,debiased,debiased) # Image Classification prediction and saliency generation event listeners classify.click(predict, [input_img, num_classes], [class_label,class_name]) generate_saliency.click(interpretation_function, [input_img, class_label, class_name, img_alpha, sal_alpha, min_sal_range, max_sal_range], [interpretation_plot]) # Object Detection dropdown list event listeners drop_list_detect_model.select(select_obj_det_model,drop_list_detect_model,drop_list_detect_model) drop_list_detect_sal.select(select_obj_det_saliency_algo,drop_list_detect_sal,drop_list_detect_sal) drop_list_detect_sal.change(show_slider_parameters,drop_list_detect_sal,[threads_detect,p1_detect]) drop_list_detect_sal.change(show_textbox_parameters,drop_list_detect_sal,[masks_detect,spatial_res_detect,seed_detect,occlusion_grid_size]) # Object detection textbox and slider event listeners masks_detect.submit(enter_num_masks,masks_detect,masks_detect) occlusion_grid_size.submit(enter_occlusion_grid_size,occlusion_grid_size,occlusion_grid_size) spatial_res_detect.submit(enter_spatial_res, spatial_res_detect, spatial_res_detect) seed_detect.submit(enter_seed, seed_detect, seed_detect) threads_detect.change(select_threads, threads_detect, threads_detect) p1_detect.change(select_p1, p1_detect, p1_detect) # Object detection prediction, class selection and saliency generation event listeners detection.click(run_detect, [input_img_detect, num_detections], [detect_label,class_name_det]) generate_det_saliency.click(run_detect_saliency,[input_img_detect, num_detections, class_name_det, img_alpha_det, sal_alpha_det, min_sal_range_det, max_sal_range_det],det_saliency_plot) demo.launch()