# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List, Optional, Tuple, Union
import cv2
import mmcv
import numpy as np
import seaborn as sns
except ImportError:
sns = None
import torch
from mmengine.dist import master_only
from mmengine.structures import InstanceData, PixelData
from mmengine.visualization import Visualizer
from ..evaluation import INSTANCE_OFFSET
from ..registry import VISUALIZERS
from ..structures import DetDataSample
from ..structures.mask import BitmapMasks, PolygonMasks, bitmap_to_polygon
from .palette import _get_adaptive_scales, get_palette, jitter_color
class DetLocalVisualizer(Visualizer):
"""MMDetection Local Visualizer.
name (str): Name of the instance. Defaults to 'visualizer'.
image (np.ndarray, optional): the origin image to draw. The format
should be RGB. Defaults to None.
vis_backends (list, optional): Visual backend config list.
Defaults to None.
save_dir (str, optional): Save file dir for all storage backends.
If it is None, the backend storage will not save any data.
bbox_color (str, tuple(int), optional): Color of bbox lines.
The tuple of color should be in BGR order. Defaults to None.
text_color (str, tuple(int), optional): Color of texts.
The tuple of color should be in BGR order.
Defaults to (200, 200, 200).
mask_color (str, tuple(int), optional): Color of masks.
The tuple of color should be in BGR order.
Defaults to None.
line_width (int, float): The linewidth of lines.
Defaults to 3.
alpha (int, float): The transparency of bboxes or mask.
Defaults to 0.8.
>>> import numpy as np
>>> import torch
>>> from mmengine.structures import InstanceData
>>> from mmdet.structures import DetDataSample
>>> from mmdet.visualization import DetLocalVisualizer
>>> det_local_visualizer = DetLocalVisualizer()
>>> image = np.random.randint(0, 256,
... size=(10, 12, 3)).astype('uint8')
>>> gt_instances = InstanceData()
>>> gt_instances.bboxes = torch.Tensor([[1, 2, 2, 5]])
>>> gt_instances.labels = torch.randint(0, 2, (1,))
>>> gt_det_data_sample = DetDataSample()
>>> gt_det_data_sample.gt_instances = gt_instances
>>> det_local_visualizer.add_datasample('image', image,
... gt_det_data_sample)
>>> det_local_visualizer.add_datasample(
... 'image', image, gt_det_data_sample,
... out_file='out_file.jpg')
>>> det_local_visualizer.add_datasample(
... 'image', image, gt_det_data_sample,
... show=True)
>>> pred_instances = InstanceData()
>>> pred_instances.bboxes = torch.Tensor([[2, 4, 4, 8]])
>>> pred_instances.labels = torch.randint(0, 2, (1,))
>>> pred_det_data_sample = DetDataSample()
>>> pred_det_data_sample.pred_instances = pred_instances
>>> det_local_visualizer.add_datasample('image', image,
... gt_det_data_sample,
... pred_det_data_sample)
def __init__(self,
name: str = 'visualizer',
image: Optional[np.ndarray] = None,
vis_backends: Optional[Dict] = None,
save_dir: Optional[str] = None,
bbox_color: Optional[Union[str, Tuple[int]]] = None,
text_color: Optional[Union[str,
Tuple[int]]] = (200, 200, 200),
mask_color: Optional[Union[str, Tuple[int]]] = None,
line_width: Union[int, float] = 3,
alpha: float = 0.8) -> None:
self.bbox_color = bbox_color
self.text_color = text_color
self.mask_color = mask_color
self.line_width = line_width
self.alpha = alpha
# Set default value. When calling
# `DetLocalVisualizer().dataset_meta=xxx`,
# it will override the default value.
self.dataset_meta = {}
def _draw_instances(self, image: np.ndarray, instances: ['InstanceData'],
classes: Optional[List[str]],
palette: Optional[List[tuple]]) -> np.ndarray:
"""Draw instances of GT or prediction.
image (np.ndarray): The image to draw.
instances (:obj:`InstanceData`): Data structure for
instance-level annotations or predictions.
classes (List[str], optional): Category information.
palette (List[tuple], optional): Palette information
corresponding to the category.
np.ndarray: the drawn image which channel is RGB.
if 'bboxes' in instances and instances.bboxes.sum() > 0:
bboxes = instances.bboxes
labels = instances.labels
max_label = int(max(labels) if len(labels) > 0 else 0)
text_palette = get_palette(self.text_color, max_label + 1)
text_colors = [text_palette[label] for label in labels]
bbox_color = palette if self.bbox_color is None \
else self.bbox_color
bbox_palette = get_palette(bbox_color, max_label + 1)
colors = [bbox_palette[label] for label in labels]
positions = bboxes[:, :2] + self.line_width
areas = (bboxes[:, 3] - bboxes[:, 1]) * (
bboxes[:, 2] - bboxes[:, 0])
scales = _get_adaptive_scales(areas)
for i, (pos, label) in enumerate(zip(positions, labels)):
if 'label_names' in instances:
label_text = instances.label_names[i]
label_text = classes[
label] if classes is not None else f'class {label}'
if 'scores' in instances:
score = round(float(instances.scores[i]) * 100, 1)
label_text += f': {score}'
font_sizes=int(13 * scales[i]),
'facecolor': 'black',
'alpha': 0.8,
'pad': 0.7,
'edgecolor': 'none'
if 'masks' in instances:
labels = instances.labels
masks = instances.masks
if isinstance(masks, torch.Tensor):
masks = masks.numpy()
elif isinstance(masks, (PolygonMasks, BitmapMasks)):
masks = masks.to_ndarray()
masks = masks.astype(bool)
max_label = int(max(labels) if len(labels) > 0 else 0)
mask_color = palette if self.mask_color is None \
else self.mask_color
mask_palette = get_palette(mask_color, max_label + 1)
colors = [jitter_color(mask_palette[label]) for label in labels]
text_palette = get_palette(self.text_color, max_label + 1)
text_colors = [text_palette[label] for label in labels]
polygons = []
for i, mask in enumerate(masks):
contours, _ = bitmap_to_polygon(mask)
self.draw_polygons(polygons, edge_colors='w', alpha=self.alpha)
self.draw_binary_masks(masks, colors=colors, alphas=self.alpha)
if len(labels) > 0 and \
('bboxes' not in instances or
instances.bboxes.sum() == 0):
# instances.bboxes.sum()==0 represent dummy bboxes.
# A typical example of SOLO does not exist bbox branch.
areas = []
positions = []
for mask in masks:
_, _, stats, centroids = cv2.connectedComponentsWithStats(
mask.astype(np.uint8), connectivity=8)
if stats.shape[0] > 1:
largest_id = np.argmax(stats[1:, -1]) + 1
areas.append(stats[largest_id, -1])
areas = np.stack(areas, axis=0)
scales = _get_adaptive_scales(areas)
for i, (pos, label) in enumerate(zip(positions, labels)):
if 'label_names' in instances:
label_text = instances.label_names[i]
label_text = classes[
label] if classes is not None else f'class {label}'
if 'scores' in instances:
score = round(float(instances.scores[i]) * 100, 1)
label_text += f': {score}'
font_sizes=int(13 * scales[i]),
'facecolor': 'black',
'alpha': 0.8,
'pad': 0.7,
'edgecolor': 'none'
return self.get_image()
def _draw_panoptic_seg(self, image: np.ndarray,
panoptic_seg: ['PixelData'],
classes: Optional[List[str]],
palette: Optional[List]) -> np.ndarray:
"""Draw panoptic seg of GT or prediction.
image (np.ndarray): The image to draw.
panoptic_seg (:obj:`PixelData`): Data structure for
pixel-level annotations or predictions.
classes (List[str], optional): Category information.
np.ndarray: the drawn image which channel is RGB.
# TODO: Is there a way to bypass?
num_classes = len(classes)
panoptic_seg_data = panoptic_seg.sem_seg[0]
ids = np.unique(panoptic_seg_data)[::-1]
if 'label_names' in panoptic_seg:
# open set panoptic segmentation
classes = panoptic_seg.metainfo['label_names']
ignore_index = panoptic_seg.metainfo.get('ignore_index',
ids = ids[ids != ignore_index]
# for VOID label
ids = ids[ids != num_classes]
labels = np.array([id % INSTANCE_OFFSET for id in ids], dtype=np.int64)
segms = (panoptic_seg_data[None] == ids[:, None, None])
max_label = int(max(labels) if len(labels) > 0 else 0)
mask_color = palette if self.mask_color is None \
else self.mask_color
mask_palette = get_palette(mask_color, max_label + 1)
colors = [mask_palette[label] for label in labels]
# draw segm
polygons = []
for i, mask in enumerate(segms):
contours, _ = bitmap_to_polygon(mask)
self.draw_polygons(polygons, edge_colors='w', alpha=self.alpha)
self.draw_binary_masks(segms, colors=colors, alphas=self.alpha)
# draw label
areas = []
positions = []
for mask in segms:
_, _, stats, centroids = cv2.connectedComponentsWithStats(
mask.astype(np.uint8), connectivity=8)
max_id = np.argmax(stats[1:, -1]) + 1
areas.append(stats[max_id, -1])
areas = np.stack(areas, axis=0)
scales = _get_adaptive_scales(areas)
text_palette = get_palette(self.text_color, max_label + 1)
text_colors = [text_palette[label] for label in labels]
for i, (pos, label) in enumerate(zip(positions, labels)):
label_text = classes[label]
font_sizes=int(13 * scales[i]),
'facecolor': 'black',
'alpha': 0.8,
'pad': 0.7,
'edgecolor': 'none'
return self.get_image()
def _draw_sem_seg(self, image: np.ndarray, sem_seg: PixelData,
classes: Optional[List],
palette: Optional[List]) -> np.ndarray:
"""Draw semantic seg of GT or prediction.
image (np.ndarray): The image to draw.
sem_seg (:obj:`PixelData`): Data structure for pixel-level
annotations or predictions.
classes (list, optional): Input classes for result rendering, as
the prediction of segmentation model is a segment map with
label indices, `classes` is a list which includes items
responding to the label indices. If classes is not defined,
visualizer will take `cityscapes` classes by default.
Defaults to None.
palette (list, optional): Input palette for result rendering, which
is a list of color palette responding to the classes.
Defaults to None.
np.ndarray: the drawn image which channel is RGB.
sem_seg_data = sem_seg.sem_seg
if isinstance(sem_seg_data, torch.Tensor):
sem_seg_data = sem_seg_data.numpy()
# 0 ~ num_class, the value 0 means background
ids = np.unique(sem_seg_data)
ignore_index = sem_seg.metainfo.get('ignore_index', 255)
ids = ids[ids != ignore_index]
if 'label_names' in sem_seg:
# open set semseg
label_names = sem_seg.metainfo['label_names']
label_names = classes
labels = np.array(ids, dtype=np.int64)
colors = [palette[label] for label in labels]
# draw semantic masks
for i, (label, color) in enumerate(zip(labels, colors)):
masks = sem_seg_data == label
self.draw_binary_masks(masks, colors=[color], alphas=self.alpha)
label_text = label_names[label]
_, _, stats, centroids = cv2.connectedComponentsWithStats(
masks[0].astype(np.uint8), connectivity=8)
if stats.shape[0] > 1:
largest_id = np.argmax(stats[1:, -1]) + 1
centroids = centroids[largest_id]
areas = stats[largest_id, -1]
scales = _get_adaptive_scales(areas)
colors=(255, 255, 255),
font_sizes=int(13 * scales),
'facecolor': 'black',
'alpha': 0.8,
'pad': 0.7,
'edgecolor': 'none'
return self.get_image()
def add_datasample(
name: str,
image: np.ndarray,
data_sample: Optional['DetDataSample'] = None,
draw_gt: bool = True,
draw_pred: bool = True,
show: bool = False,
wait_time: float = 0,
# TODO: Supported in mmengine's Viusalizer.
out_file: Optional[str] = None,
pred_score_thr: float = 0.3,
step: int = 0) -> None:
"""Draw datasample and save to all backends.
- If GT and prediction are plotted at the same time, they are
displayed in a stitched image where the left image is the
ground truth and the right image is the prediction.
- If ``show`` is True, all storage backends are ignored, and
the images will be displayed in a local window.
- If ``out_file`` is specified, the drawn image will be
saved to ``out_file``. t is usually used when the display
is not available.
name (str): The image identifier.
image (np.ndarray): The image to draw.
data_sample (:obj:`DetDataSample`, optional): A data
sample that contain annotations and predictions.
Defaults to None.
draw_gt (bool): Whether to draw GT DetDataSample. Default to True.
draw_pred (bool): Whether to draw Prediction DetDataSample.
Defaults to True.
show (bool): Whether to display the drawn image. Default to False.
wait_time (float): The interval of show (s). Defaults to 0.
out_file (str): Path to output file. Defaults to None.
pred_score_thr (float): The threshold to visualize the bboxes
and masks. Defaults to 0.3.
step (int): Global step value to record. Defaults to 0.
image = image.clip(0, 255).astype(np.uint8)
classes = self.dataset_meta.get('classes', None)
palette = self.dataset_meta.get('palette', None)
gt_img_data = None
pred_img_data = None
if data_sample is not None:
data_sample = data_sample.cpu()
if draw_gt and data_sample is not None:
gt_img_data = image
if 'gt_instances' in data_sample:
gt_img_data = self._draw_instances(image,
classes, palette)
if 'gt_sem_seg' in data_sample:
gt_img_data = self._draw_sem_seg(gt_img_data,
classes, palette)
if 'gt_panoptic_seg' in data_sample:
assert classes is not None, 'class information is ' \
'not provided when ' \
'visualizing panoptic ' \
'segmentation results.'
gt_img_data = self._draw_panoptic_seg(
gt_img_data, data_sample.gt_panoptic_seg, classes, palette)
if draw_pred and data_sample is not None:
pred_img_data = image
if 'pred_instances' in data_sample:
pred_instances = data_sample.pred_instances
pred_instances = pred_instances[
pred_instances.scores > pred_score_thr]
pred_img_data = self._draw_instances(image, pred_instances,
classes, palette)
if 'pred_sem_seg' in data_sample:
pred_img_data = self._draw_sem_seg(pred_img_data,
classes, palette)
if 'pred_panoptic_seg' in data_sample:
assert classes is not None, 'class information is ' \
'not provided when ' \
'visualizing panoptic ' \
'segmentation results.'
pred_img_data = self._draw_panoptic_seg(
pred_img_data, data_sample.pred_panoptic_seg.numpy(),
classes, palette)
if gt_img_data is not None and pred_img_data is not None:
drawn_img = np.concatenate((gt_img_data, pred_img_data), axis=1)
elif gt_img_data is not None:
drawn_img = gt_img_data
elif pred_img_data is not None:
drawn_img = pred_img_data
# Display the original image directly if nothing is drawn.
drawn_img = image
# It is convenient for users to obtain the drawn image.
# For example, the user wants to obtain the drawn image and
# save it as a video during video inference.
if show:, win_name=name, wait_time=wait_time)
if out_file is not None:
mmcv.imwrite(drawn_img[..., ::-1], out_file)
self.add_image(name, drawn_img, step)
def random_color(seed):
"""Random a color according to the input seed."""
if sns is None:
raise RuntimeError('motmetrics is not installed,\
please install it by: pip install seaborn')
colors = sns.color_palette()
color = colors[np.random.choice(range(len(colors)))]
color = tuple([int(255 * c) for c in color])
return color
class TrackLocalVisualizer(Visualizer):
"""Tracking Local Visualizer for the MOT, VIS tasks.
name (str): Name of the instance. Defaults to 'visualizer'.
image (np.ndarray, optional): the origin image to draw. The format
should be RGB. Defaults to None.
vis_backends (list, optional): Visual backend config list.
Defaults to None.
save_dir (str, optional): Save file dir for all storage backends.
If it is None, the backend storage will not save any data.
line_width (int, float): The linewidth of lines.
Defaults to 3.
alpha (int, float): The transparency of bboxes or mask.
Defaults to 0.8.
def __init__(self,
name: str = 'visualizer',
image: Optional[np.ndarray] = None,
vis_backends: Optional[Dict] = None,
save_dir: Optional[str] = None,
line_width: Union[int, float] = 3,
alpha: float = 0.8) -> None:
super().__init__(name, image, vis_backends, save_dir)
self.line_width = line_width
self.alpha = alpha
# Set default value. When calling
# `TrackLocalVisualizer().dataset_meta=xxx`,
# it will override the default value.
self.dataset_meta = {}
def _draw_instances(self, image: np.ndarray,
instances: InstanceData) -> np.ndarray:
"""Draw instances of GT or prediction.
image (np.ndarray): The image to draw.
instances (:obj:`InstanceData`): Data structure for
instance-level annotations or predictions.
np.ndarray: the drawn image which channel is RGB.
classes = self.dataset_meta.get('classes', None)
# get colors and texts
# for the MOT and VIS tasks
colors = [random_color(_id) for _id in instances.instances_id]
categories = [
classes[label] if classes is not None else f'cls{label}'
for label in instances.labels
if 'scores' in instances:
texts = [
f'{category_name}\n{instance_id} | {score:.2f}'
for category_name, instance_id, score in zip(
categories, instances.instances_id, instances.scores)
texts = [
f'{category_name}\n{instance_id}' for category_name,
instance_id in zip(categories, instances.instances_id)
# draw bboxes and texts
if 'bboxes' in instances:
# draw bboxes
bboxes = instances.bboxes.clone()
# draw texts
if texts is not None:
positions = bboxes[:, :2] + self.line_width
areas = (bboxes[:, 3] - bboxes[:, 1]) * (
bboxes[:, 2] - bboxes[:, 0])
scales = _get_adaptive_scales(areas.cpu().numpy())
for i, pos in enumerate(positions):
font_sizes=int(13 * scales[i]),
'facecolor': [c / 255 for c in colors[i]],
'alpha': 0.8,
'pad': 0.7,
'edgecolor': 'none'
# draw masks
if 'masks' in instances:
masks = instances.masks
polygons = []
for i, mask in enumerate(masks):
contours, _ = bitmap_to_polygon(mask)
self.draw_polygons(polygons, edge_colors='w', alpha=self.alpha)
self.draw_binary_masks(masks, colors=colors, alphas=self.alpha)
return self.get_image()
def add_datasample(
name: str,
image: np.ndarray,
data_sample: DetDataSample = None,
draw_gt: bool = True,
draw_pred: bool = True,
show: bool = False,
wait_time: int = 0,
# TODO: Supported in mmengine's Viusalizer.
out_file: Optional[str] = None,
pred_score_thr: float = 0.3,
step: int = 0) -> None:
"""Draw datasample and save to all backends.
- If GT and prediction are plotted at the same time, they are
displayed in a stitched image where the left image is the
ground truth and the right image is the prediction.
- If ``show`` is True, all storage backends are ignored, and
the images will be displayed in a local window.
- If ``out_file`` is specified, the drawn image will be
saved to ``out_file``. t is usually used when the display
is not available.
name (str): The image identifier.
image (np.ndarray): The image to draw.
data_sample (OptTrackSampleList): A data
sample that contain annotations and predictions.
Defaults to None.
draw_gt (bool): Whether to draw GT TrackDataSample.
Default to True.
draw_pred (bool): Whether to draw Prediction TrackDataSample.
Defaults to True.
show (bool): Whether to display the drawn image. Default to False.
wait_time (int): The interval of show (s). Defaults to 0.
out_file (str): Path to output file. Defaults to None.
pred_score_thr (float): The threshold to visualize the bboxes
and masks. Defaults to 0.3.
step (int): Global step value to record. Defaults to 0.
gt_img_data = None
pred_img_data = None
if data_sample is not None:
data_sample = data_sample.cpu()
if draw_gt and data_sample is not None:
assert 'gt_instances' in data_sample
gt_img_data = self._draw_instances(image, data_sample.gt_instances)
if draw_pred and data_sample is not None:
assert 'pred_track_instances' in data_sample
pred_instances = data_sample.pred_track_instances
if 'scores' in pred_instances:
pred_instances = pred_instances[
pred_instances.scores > pred_score_thr].cpu()
pred_img_data = self._draw_instances(image, pred_instances)
if gt_img_data is not None and pred_img_data is not None:
drawn_img = np.concatenate((gt_img_data, pred_img_data), axis=1)
elif gt_img_data is not None:
drawn_img = gt_img_data
drawn_img = pred_img_data
if show:, win_name=name, wait_time=wait_time)
if out_file is not None:
mmcv.imwrite(drawn_img[..., ::-1], out_file)
self.add_image(name, drawn_img, step)