## Gradio Example

# This app makes use of the saliency generation example found in the base ``xaitk-saliency`` repo [here](https://github.com/XAITK/xaitk-saliency/blob/master/examples/OcclusionSaliency.ipynb), and explores integrating ``xaitk-saliency`` with ``Gradio`` to create an interactive interface for computing saliency maps.

import os
import PIL.Image
import matplotlib.pyplot as plt  # type: ignore
import urllib
import numpy as np

import gradio as gr
from gradio import (  # type: ignore
    AnnotatedImage, Button, Column, Image, Label,  # type: ignore
    Number, Plot, Row, TabItem, Tab, Tabs,  # type: ignore
    Checkbox, Dropdown, Slider, Textbox  # type: ignore
)

# State variables for Image Classification
from gr_component_state import ( # type: ignore
    img_cls_model_name, img_cls_saliency_algo_name, window_size_state, stride_state, debiased_state,
)

# State functions for Image Classification
from gr_component_state import ( # type: ignore
    select_img_cls_model, select_img_cls_saliency_algo, enter_window_size, enter_stride, check_debiased
)

# State variables for Object Detection
from gr_component_state import ( # type: ignore
    obj_det_model_name, obj_det_saliency_algo_name, occlusion_grid_state
)

# State functions for Object Detection
from gr_component_state import ( # type: ignore
    select_obj_det_model, select_obj_det_saliency_algo, enter_occlusion_grid_size
)

# Common state variables
from gr_component_state import ( # type: ignore
    threads_state, num_masks_state, spatial_res_state, p1_state, seed_state
)

# Common state functions
from gr_component_state import ( # type: ignore
    select_threads, enter_num_masks, enter_spatial_res, select_p1, enter_seed
)

import torch
import torchvision.transforms as transforms
import torchvision.models as models
import torch.nn.functional

from smqtk_detection.impls.detect_image_objects.resnet_frcnn import ResNetFRCNN
from xaitk_saliency.impls.gen_image_classifier_blackbox_sal.slidingwindow import SlidingWindowStack
from xaitk_saliency.impls.gen_image_classifier_blackbox_sal.rise import RISEStack
from xaitk_saliency.impls.gen_object_detector_blackbox_sal.drise import RandomGridStack, DRISEStack
from xaitk_saliency.interfaces.gen_object_detector_blackbox_sal import GenerateObjectDetectorBlackboxSaliency
from smqtk_detection.interfaces.detect_image_objects import DetectImageObjects
from smqtk_classifier.interfaces.classify_image import ClassifyImage


os.makedirs('data', exist_ok=True)
test_image_filename = 'data/catdog.jpg'
urllib.request.urlretrieve('https://farm1.staticflickr.com/74/202734059_fcce636dcd_z.jpg', test_image_filename)
plt.figure(figsize=(12, 8))
plt.axis('off')
_ = plt.imshow(PIL.Image.open(test_image_filename))

CUDA_AVAILABLE = torch.cuda.is_available()

model_input_size = (224, 224)
model_mean = [0.485, 0.456, 0.406]
model_loader = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(model_input_size), 
    transforms.ToTensor(),
    transforms.Normalize(
        mean=model_mean,
        std=[0.229, 0.224, 0.225]
    ),
])

def get_sal_labels(classes_file, custom_categories_list=None):
    if not os.path.isfile(classes_file):
        url = "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"
        _ = urllib.request.urlretrieve(url, classes_file)
    
    f = open(classes_file, "r")
    categories = [s.strip() for s in f.readlines()]
    
    if not custom_categories_list == None:
        sal_class_labels = custom_categories_list
    else:
        sal_class_labels = categories
    
    sal_class_idxs = [categories.index(lbl) for lbl in sal_class_labels]
    
    return sal_class_labels, sal_class_idxs

def get_det_sal_labels(classes_file, custom_categories_list=None):
    if not os.path.isfile(classes_file):
        url = "https://raw.githubusercontent.com/matlab-deep-learning/Object-Detection-Using-Pretrained-YOLO-v2/main/%2Bhelper/coco-classes.txt"
        _ = urllib.request.urlretrieve(url, classes_file)
    
    f = open(classes_file, "r")
    categories = [s.strip() for s in f.readlines()]
    
    if not custom_categories_list == None:
        sal_obj_labels = custom_categories_list
    else:
        sal_obj_labels = categories
    
    sal_obj_idxs = [categories.index(lbl) for lbl in sal_obj_labels]

    return sal_obj_labels, sal_obj_idxs

def get_model(model_choice):
    if model_choice == "ResNet-18":
        model = models.resnet18(pretrained=True)
    else:
        model = models.resnet50(pretrained=True)
    model = model.eval()
    if CUDA_AVAILABLE:
        model = model.cuda()

    return model

def get_detection_model(model_choice):

    if model_choice == "Faster-RCNN":
        blackbox_detector = ResNetFRCNN(
            box_thresh=0.05,
            img_batch_size=1,
            use_cuda=True
        )
    
    else:
        raise Exception("Unknown Input")

    return blackbox_detector

def get_saliency_algo(sal_choice):
    if sal_choice == "RISE":
        gen_sal = RISEStack(
            n=num_masks_state[-1], 
            s=spatial_res_state[-1], 
            p1=p1_state[-1], 
            seed=seed_state[-1], 
            threads=threads_state[-1], 
            debiased=debiased_state[-1]
        )
        
    elif sal_choice == "SlidingWindowStack":
        gen_sal = SlidingWindowStack(
            window_size=eval(window_size_state[-1]),
            stride=eval(stride_state[-1]),
            threads=threads_state[-1]
        )
    
    else:
        raise Exception("Unknown Input")

    return gen_sal

def get_detection_saliency_algo(sal_choice):
    if sal_choice == "RandomGridStack":
        gen_sal = RandomGridStack(
            n=num_masks_state[-1],
            s=eval(occlusion_grid_state[-1]),
            p1=p1_state[-1],
            threads=threads_state[-1],  
            seed=seed_state[-1],  
        )
    
    elif sal_choice == "DRISE":
        gen_sal = DRISEStack(
            n=num_masks_state[-1], 
            s=spatial_res_state[-1], 
            p1=p1_state[-1], 
            seed=seed_state[-1], 
            threads=threads_state[-1]
        )
    
    else:
        raise Exception("Unknown Input")
    
    return gen_sal


data_path = "./data"
if not os.path.exists(data_path):
    os.makedirs(data_path)

# Setup imagenet classes and ClassifyImage for generating classification saliency

classes_file = os.path.join(data_path,"imagenet_classes.txt")
sal_class_labels, sal_class_idxs = get_sal_labels(classes_file)

class TorchResnet (ClassifyImage):

    modified_class_labels = []

    def get_labels(self):
        return self.modified_class_labels
    
    def set_labels(self, class_labels):
        self.modified_class_labels = [lbl for lbl in class_labels]
    
    @torch.no_grad()
    def classify_images(self, image_iter):
        # Input may either be an NDaray, or some arbitrary iterable of NDarray images.
        
        model = get_model(img_cls_model_name[-1])
        
        for img in image_iter:
            image_tensor = model_loader(img).unsqueeze(0)
            if CUDA_AVAILABLE:
                image_tensor = image_tensor.cuda()
            
            feature_vec = model(image_tensor)
            # Converting feature extractor output to probabilities.
            class_conf = torch.nn.functional.softmax(feature_vec, dim=1).cpu().detach().numpy().squeeze()
            # Only return the confidences for the focus classes
            yield dict(zip(sal_class_labels, class_conf[sal_class_idxs]))
            
    def get_config(self):
        # Required by a parent class.
        return {}

blackbox_classifier, blackbox_fill = TorchResnet(), np.uint8(np.asarray(model_mean) * 255).tolist()

# Setup COCO object classes for generating detection saliency

obj_classes_file = os.path.join(data_path,"coco_classes.txt")
sal_obj_labels, sal_obj_idxs = get_det_sal_labels(obj_classes_file)


# Modify textbox parameters based on chosen saliency algorithm
def show_textbox_parameters(choice):
    if choice == 'RISE':
        return Textbox(visible=False), Textbox(visible=False), Textbox(visible=True), Textbox(visible=True), Textbox(visible=True)
    elif choice == 'SlidingWindowStack':
        return Textbox(visible=True), Textbox(visible=True), Textbox(visible=False), Textbox(visible=False), Textbox(visible=False)
    elif choice == "RandomGridStack":
        return Textbox(visible=True), Textbox(visible=False), Textbox(visible=True), Textbox(visible=True)
    elif choice == "DRISE":
        return Textbox(visible=True), Textbox(visible=True), Textbox(visible=True), Textbox(visible=False)
    else:
        raise Exception("Unknown Input")

# Modify slider parameters based on chosen saliency algorithm
def show_slider_parameters(choice):
    if choice == 'RISE' or choice == 'RandomGridStack' or choice == 'DRISE':
        return Slider(visible=True), Slider(visible=True)
    elif choice == 'SlidingWindowStack':
        return Slider(visible=True), Slider(visible=False)
    else:
        raise Exception("Unknown Input")
    
# Modify checkbox parameters based on chosen saliency algorithm
def show_debiased_checkbox(choice):
    if choice == 'RISE':
        return Checkbox(visible=True)
    elif choice == 'SlidingWindowStack' or choice == 'RandomGridStack' or choice == 'DRISE':
        return Checkbox(visible=False)
    else:
        raise Exception("Unknown Input")

# Function that is called after clicking the "Classify" button in the demo
def predict(x,top_n_classes):
    
    image_tensor = model_loader(x).unsqueeze(0)
    if CUDA_AVAILABLE:
        image_tensor = image_tensor.cuda()
    model = get_model(img_cls_model_name[-1])
    feature_vec = model(image_tensor)
    class_conf = torch.nn.functional.softmax(feature_vec, dim=1).cpu().detach().numpy().squeeze()
    labels = list(zip(sal_class_labels, class_conf[sal_class_idxs].tolist()))
    final_labels = dict(sorted(labels, key=lambda t: t[1],reverse=True)[:top_n_classes])
    
    return final_labels, Dropdown(choices=list(final_labels))

# Interpretation function for image classification that implements the selected saliency algorithm and generates the class-wise saliency map visualizations
def interpretation_function(image: np.ndarray, 
                        labels: dict,
                        nth_class: str, 
                        img_alpha,
                        sal_alpha,
                        sal_range_min,
                        sal_range_max):
    
    sal_generator = get_saliency_algo(img_cls_saliency_algo_name[-1])
    sal_generator.fill = blackbox_fill
    labels_list = labels.keys()
    blackbox_classifier.set_labels(labels_list)
    sal_maps = sal_generator(image, blackbox_classifier)
    nth_class_index = blackbox_classifier.get_labels().index(nth_class)
    fig = visualize_saliency_plot(image,
                                sal_maps[nth_class_index,:,:],
                                img_alpha,
                                sal_alpha,
                                sal_range_min,
                                sal_range_max)
        
    return fig

def visualize_saliency_plot(image: np.ndarray, 
                            class_sal_map: np.ndarray,
                            img_alpha,
                            sal_alpha,
                            sal_range_min,
                            sal_range_max):
    colorbar_kwargs = {
        "fraction": 0.046*(image.shape[0]/image.shape[1]),
        "pad": 0.04,
    }
    fig = plt.figure()
    plt.imshow(image, alpha=img_alpha)
    plt.imshow(
        np.clip(class_sal_map, sal_range_min, sal_range_max),
        cmap='jet', alpha=sal_alpha
    )
    plt.clim(sal_range_min, sal_range_max)
    plt.colorbar(**colorbar_kwargs)
    plt.title(f"Saliency Map")
    plt.axis('off')
    plt.close(fig)

    return fig

# Generate top-n object detect predictions on the input image
def run_detect(input_img: np.ndarray, num_detections: int):
    detect_model = get_detection_model(obj_det_model_name[-1])
    preds = list(list(detect_model([input_img]))[0])
    n_preds = len(preds)
    n_classes = len(preds[0][1])

    bboxes = np.empty((n_preds, 4), dtype=np.float32)
    scores = np.empty((n_preds, n_classes), dtype=np.float32)
    max_scores_index = np.empty((n_preds, 1), dtype=int)
    labels = None
    final_bbox = []
    final_label = []
    for i, (bbox, score_dict) in enumerate(preds):
        bboxes[i] = (*bbox.min_vertex, *bbox.max_vertex)
        score_list = list(score_dict.values())
        scores[i] = score_list
        max_scores_index[i] = score_list.index(max(score_list))
        if labels is None:
            labels = list(score_dict.keys())
        label_name = str(labels[int(max_scores_index[i,0])])
        conf_score = str(round(score_list[int(max_scores_index[i,0])],4))
        label_with_score = str(i) + " : "+ label_name + " - " + conf_score
        final_label.append(label_with_score)
    
    bboxes_list = bboxes[:,:].astype(int).tolist()

    return (input_img, list(zip([f for f in bboxes_list], [l for l in final_label]))[:num_detections]), Dropdown(choices=[l for l in final_label][:num_detections])

# Run saliency algorithm on the object detect predictions and generate corresponding visualizations
def run_detect_saliency(input_img: np.ndarray, 
                        num_predictions,
                        obj_label, 
                        img_alpha,
                        sal_alpha,
                        sal_range_min,
                        sal_range_max):
    
    detect_model = get_detection_model(obj_det_model_name[-1])
    img_preds = list(list(detect_model([input_img]))[0])
    ref_preds = img_preds[:int(num_predictions)]
    ref_bboxes = []
    ref_scores = []
    for det in ref_preds:
        bbox = det[0]
        ref_bboxes.append([
            *bbox.min_vertex,
            *bbox.max_vertex,
        ])

        score_dict = det[1]
        ref_scores.append(list(score_dict.values()))

    ref_bboxes = np.array(ref_bboxes)
    ref_scores = np.array(ref_scores)
    
    print(f"Ref bboxes: {ref_bboxes.shape}")
    print(f"Ref scores: {ref_scores.shape}")
    
    sal_generator = get_detection_saliency_algo(obj_det_saliency_algo_name[-1])
    sal_generator.fill = blackbox_fill
    
    sal_maps = gen_det_saliency(input_img, detect_model, sal_generator,ref_bboxes,ref_scores)
    print(f"Saliency maps: {sal_maps.shape}")

    nth_class_index = int(obj_label.split(' : ')[0])
    scores = sal_maps[nth_class_index,:,:]
    fig = visualize_saliency_plot(input_img,
                                sal_maps[nth_class_index,:,:],
                                img_alpha,
                                sal_alpha,
                                sal_range_min,
                                sal_range_max)
    
    scores = np.clip(scores, sal_range_min, sal_range_max)

    return fig

def gen_det_saliency(input_img: np.ndarray,
    blackbox_detector: DetectImageObjects,
    sal_map_generator: GenerateObjectDetectorBlackboxSaliency,
    ref_bboxes: np.ndarray,
    ref_scores: np.ndarray
):
    sal_maps = sal_map_generator.generate(
        input_img,
        ref_bboxes,
        ref_scores,
        blackbox_detector,
    )

    return sal_maps

with gr.Blocks() as demo:
    with Tab("Image Classification"):
        with Row():
            with Column(scale=0.5):
                drop_list = Dropdown(value=img_cls_model_name[-1],choices=["ResNet-18","ResNet-50"],label="Choose Model",interactive="True")
            with Column(scale=0.5):
                drop_list_sal = Dropdown(value=img_cls_saliency_algo_name[-1],choices=["SlidingWindowStack","RISE"],label="Choose Saliency Algorithm",interactive="True")
        with Row():
            with Column(scale=0.33):
                window_size = Textbox(value=window_size_state[-1],label="Tuple of window size values (Press Enter to submit the input)",interactive=True,visible=False)
                masks = Number(value=num_masks_state[-1],label="Number of Random Masks (Press Enter to submit the input)",interactive=True,visible=True,precision=0)
            with Column(scale=0.33):
                stride = Textbox(value=stride_state[-1],label="Tuple of stride values (Press Enter to submit the input)" ,interactive=True,visible=False)
                spatial_res = Number(value=spatial_res_state[-1],label="Spatial Resolution of Masking Grid (Press Enter to submit the input)" ,interactive=True,visible=True,precision=0)
            with Column(scale=0.33):
                threads = Slider(value=threads_state[-1],label="Threads",interactive=True,visible=True)
        with Row():
            with Column(scale=0.33):
                seed = Number(value=seed_state[-1],label="Seed (Press Enter to submit the input)",interactive=True,visible=True,precision=0)
            with Column(scale=0.33):
                p1 = Slider(value=p1_state[-1],label="P1",interactive=True,visible=True, minimum=0,maximum=1,step=0.1)
            with Column(scale=0.33):
                debiased = Checkbox(value=debiased_state[-1],label="Debiased", interactive=True, visible=True)
        with Row():
            with Column():
                input_img = Image(label="Saliency Map Generation", width=640, height=480)
                num_classes = Slider(value=2,label="Top-N class labels", interactive=True,visible=True)
                classify = Button("Classify")
            with Column():
                class_label = Label(label="Predicted Class")
            with Column():
                with Row():
                    class_name = Dropdown(label="Class to compute saliency",interactive=True,visible=True)
                with Row():
                    img_alpha = Slider(value=0.7,label="Image Opacity",interactive=True,visible=True,minimum=0,maximum=1,step=0.1)
                    sal_alpha = Slider(value=0.3,label="Saliency Map Opacity",interactive=True,visible=True,minimum=0,maximum=1,step=0.1)
                with Row():
                    min_sal_range = Slider(value=0,label="Minimum Saliency Value",interactive=True,visible=True,minimum=-1,maximum=1,step=0.05)
                    max_sal_range = Slider(value=1,label="Maximum Saliency Value",interactive=True,visible=True,minimum=-1,maximum=1,step=0.05)
                with Row():
                    generate_saliency = Button("Generate Saliency")
            with Column():
                with Tabs():
                    with TabItem("Display interpretation with plot"):
                        interpretation_plot = Plot()

    with Tab("Object Detection"):
        with Row():
            with Column(scale=0.5):
                drop_list_detect_model = Dropdown(value=obj_det_model_name[-1],choices=["Faster-RCNN"],label="Choose Model",interactive="True")
            with Column(scale=0.5):
                drop_list_detect_sal = Dropdown(value=obj_det_saliency_algo_name[-1],choices=["RandomGridStack","DRISE"],label="Choose Saliency Algorithm",interactive="True")
        with Row():
            with Column(scale=0.33):
                masks_detect = Number(value=num_masks_state[-1],label="Number of Random Masks (Press Enter to submit the input)",interactive=True,visible=True,precision=0)
                occlusion_grid_size = Textbox(value=occlusion_grid_state[-1],label="Tuple of occlusion grid size values (Press Enter to submit the input)",interactive=True,visible=False)
                spatial_res_detect = Number(value=spatial_res_state[-1],label="Spatial Resolution of Masking Grid (Press Enter to submit the input)" ,interactive=True,visible=True,precision=0)
            with Column(scale=0.33):
                seed_detect = Number(value=seed_state[-1],label="Seed (Press Enter to submit the input)",interactive=True,visible=True,precision=0)
                p1_detect = Slider(value=p1_state[-1],label="P1",interactive=True,visible=True, minimum=0,maximum=1,step=0.1)
            with Column(scale=0.33):
                threads_detect = Slider(value=threads_state[-1],label="Threads",interactive=True,visible=True)
        with Row():
            with Column():
                input_img_detect = Image(label="Saliency Map Generation", width=640, height=480)
                num_detections = Slider(value=2,label="Top-N detections", interactive=True,visible=True)
                detection = Button("Run Detection Algorithm")
            with Column():
                detect_label = AnnotatedImage(label="Detections")
            with Column():
                with Row():
                    class_name_det = Dropdown(label="Detection to compute saliency",interactive=True,visible=True)
                with Row():
                    img_alpha_det = Slider(value=0.7,label="Image Opacity",interactive=True,visible=True,minimum=0,maximum=1,step=0.1)
                    sal_alpha_det = Slider(value=0.3,label="Saliency Map Opacity",interactive=True,visible=True,minimum=0,maximum=1,step=0.1)
                with Row():
                    min_sal_range_det = Slider(value=0.95,label="Minimum Saliency Value",interactive=True,visible=True,minimum=0.80,maximum=1,step=0.05)
                    max_sal_range_det = Slider(value=1,label="Maximum Saliency Value",interactive=True,visible=True,minimum=0.80,maximum=1,step=0.05)
                with Row():
                    generate_det_saliency = Button("Generate Saliency")
            with Column():
                with Tabs():
                    with TabItem("Display saliency map plot"):
                        det_saliency_plot = Plot()

    # Image Classification dropdown list event listeners        
    drop_list.select(select_img_cls_model,drop_list,drop_list)
    drop_list_sal.select(select_img_cls_saliency_algo,drop_list_sal,drop_list_sal)
    drop_list_sal.change(show_textbox_parameters,drop_list_sal,[window_size,stride,masks,spatial_res,seed])
    drop_list_sal.change(show_slider_parameters,drop_list_sal,[threads,p1])
    drop_list_sal.change(show_debiased_checkbox,drop_list_sal,debiased)

    # Image Classification textbox, slider and checkbox event listeners   
    window_size.submit(enter_window_size,window_size,window_size)
    masks.submit(enter_num_masks,masks,masks)
    stride.submit(enter_stride, stride, stride)
    spatial_res.submit(enter_spatial_res, spatial_res, spatial_res)
    seed.submit(enter_seed, seed, seed)
    threads.change(select_threads, threads, threads)
    p1.change(select_p1, p1, p1)
    debiased.change(check_debiased,debiased,debiased)

    # Image Classification prediction and saliency generation event listeners
    classify.click(predict, [input_img, num_classes], [class_label,class_name])
    generate_saliency.click(interpretation_function, [input_img, class_label, class_name, img_alpha, sal_alpha, min_sal_range, max_sal_range], [interpretation_plot])

    # Object Detection dropdown list event listeners
    drop_list_detect_model.select(select_obj_det_model,drop_list_detect_model,drop_list_detect_model)
    drop_list_detect_sal.select(select_obj_det_saliency_algo,drop_list_detect_sal,drop_list_detect_sal)
    drop_list_detect_sal.change(show_slider_parameters,drop_list_detect_sal,[threads_detect,p1_detect])
    drop_list_detect_sal.change(show_textbox_parameters,drop_list_detect_sal,[masks_detect,spatial_res_detect,seed_detect,occlusion_grid_size])

    # Object detection textbox and slider event listeners   
    masks_detect.submit(enter_num_masks,masks_detect,masks_detect)
    occlusion_grid_size.submit(enter_occlusion_grid_size,occlusion_grid_size,occlusion_grid_size)
    spatial_res_detect.submit(enter_spatial_res, spatial_res_detect, spatial_res_detect)
    seed_detect.submit(enter_seed, seed_detect, seed_detect)
    threads_detect.change(select_threads, threads_detect, threads_detect)
    p1_detect.change(select_p1, p1_detect, p1_detect)

    # Object detection prediction, class selection and saliency generation event listeners
    detection.click(run_detect, [input_img_detect, num_detections], [detect_label,class_name_det])
    generate_det_saliency.click(run_detect_saliency,[input_img_detect, num_detections, class_name_det, img_alpha_det, sal_alpha_det, min_sal_range_det, max_sal_range_det],det_saliency_plot)

demo.launch()