|
|
|
import copy
|
|
import itertools
|
|
import json
|
|
import logging
|
|
import os
|
|
import pickle
|
|
from collections import OrderedDict
|
|
import torch
|
|
|
|
import detectron2.utils.comm as comm
|
|
from detectron2.config import CfgNode
|
|
from detectron2.data import MetadataCatalog
|
|
from detectron2.structures import Boxes, BoxMode, pairwise_iou
|
|
from detectron2.utils.file_io import PathManager
|
|
from detectron2.utils.logger import create_small_table
|
|
|
|
from .coco_evaluation import instances_to_coco_json
|
|
from .evaluator import DatasetEvaluator
|
|
|
|
|
|
class LVISEvaluator(DatasetEvaluator):
|
|
"""
|
|
Evaluate object proposal and instance detection/segmentation outputs using
|
|
LVIS's metrics and evaluation API.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
dataset_name,
|
|
tasks=None,
|
|
distributed=True,
|
|
output_dir=None,
|
|
*,
|
|
max_dets_per_image=None,
|
|
):
|
|
"""
|
|
Args:
|
|
dataset_name (str): name of the dataset to be evaluated.
|
|
It must have the following corresponding metadata:
|
|
"json_file": the path to the LVIS format annotation
|
|
tasks (tuple[str]): tasks that can be evaluated under the given
|
|
configuration. A task is one of "bbox", "segm".
|
|
By default, will infer this automatically from predictions.
|
|
distributed (True): if True, will collect results from all ranks for evaluation.
|
|
Otherwise, will evaluate the results in the current process.
|
|
output_dir (str): optional, an output directory to dump results.
|
|
max_dets_per_image (None or int): limit on maximum detections per image in evaluating AP
|
|
This limit, by default of the LVIS dataset, is 300.
|
|
"""
|
|
from lvis import LVIS
|
|
|
|
self._logger = logging.getLogger(__name__)
|
|
|
|
if tasks is not None and isinstance(tasks, CfgNode):
|
|
self._logger.warn(
|
|
"COCO Evaluator instantiated using config, this is deprecated behavior."
|
|
" Please pass in explicit arguments instead."
|
|
)
|
|
self._tasks = None
|
|
else:
|
|
self._tasks = tasks
|
|
|
|
self._distributed = distributed
|
|
self._output_dir = output_dir
|
|
self._max_dets_per_image = max_dets_per_image
|
|
|
|
self._cpu_device = torch.device("cpu")
|
|
|
|
self._metadata = MetadataCatalog.get(dataset_name)
|
|
json_file = PathManager.get_local_path(self._metadata.json_file)
|
|
self._lvis_api = LVIS(json_file)
|
|
|
|
|
|
self._do_evaluation = len(self._lvis_api.get_ann_ids()) > 0
|
|
|
|
def reset(self):
|
|
self._predictions = []
|
|
|
|
def process(self, inputs, outputs):
|
|
"""
|
|
Args:
|
|
inputs: the inputs to a LVIS model (e.g., GeneralizedRCNN).
|
|
It is a list of dict. Each dict corresponds to an image and
|
|
contains keys like "height", "width", "file_name", "image_id".
|
|
outputs: the outputs of a LVIS model. It is a list of dicts with key
|
|
"instances" that contains :class:`Instances`.
|
|
"""
|
|
for input, output in zip(inputs, outputs):
|
|
prediction = {"image_id": input["image_id"]}
|
|
|
|
if "instances" in output:
|
|
instances = output["instances"].to(self._cpu_device)
|
|
prediction["instances"] = instances_to_coco_json(instances, input["image_id"])
|
|
if "proposals" in output:
|
|
prediction["proposals"] = output["proposals"].to(self._cpu_device)
|
|
self._predictions.append(prediction)
|
|
|
|
def evaluate(self):
|
|
if self._distributed:
|
|
comm.synchronize()
|
|
predictions = comm.gather(self._predictions, dst=0)
|
|
predictions = list(itertools.chain(*predictions))
|
|
|
|
if not comm.is_main_process():
|
|
return
|
|
else:
|
|
predictions = self._predictions
|
|
|
|
if len(predictions) == 0:
|
|
self._logger.warning("[LVISEvaluator] Did not receive valid predictions.")
|
|
return {}
|
|
|
|
if self._output_dir:
|
|
PathManager.mkdirs(self._output_dir)
|
|
file_path = os.path.join(self._output_dir, "instances_predictions.pth")
|
|
with PathManager.open(file_path, "wb") as f:
|
|
torch.save(predictions, f)
|
|
|
|
self._results = OrderedDict()
|
|
if "proposals" in predictions[0]:
|
|
self._eval_box_proposals(predictions)
|
|
if "instances" in predictions[0]:
|
|
self._eval_predictions(predictions)
|
|
|
|
return copy.deepcopy(self._results)
|
|
|
|
def _tasks_from_predictions(self, predictions):
|
|
for pred in predictions:
|
|
if "segmentation" in pred:
|
|
return ("bbox", "segm")
|
|
return ("bbox",)
|
|
|
|
def _eval_predictions(self, predictions):
|
|
"""
|
|
Evaluate predictions. Fill self._results with the metrics of the tasks.
|
|
|
|
Args:
|
|
predictions (list[dict]): list of outputs from the model
|
|
"""
|
|
self._logger.info("Preparing results in the LVIS format ...")
|
|
lvis_results = list(itertools.chain(*[x["instances"] for x in predictions]))
|
|
tasks = self._tasks or self._tasks_from_predictions(lvis_results)
|
|
|
|
|
|
|
|
if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"):
|
|
reverse_id_mapping = {
|
|
v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items()
|
|
}
|
|
for result in lvis_results:
|
|
result["category_id"] = reverse_id_mapping[result["category_id"]]
|
|
else:
|
|
|
|
for result in lvis_results:
|
|
result["category_id"] += 1
|
|
|
|
if self._output_dir:
|
|
file_path = os.path.join(self._output_dir, "lvis_instances_results.json")
|
|
self._logger.info("Saving results to {}".format(file_path))
|
|
with PathManager.open(file_path, "w") as f:
|
|
f.write(json.dumps(lvis_results))
|
|
f.flush()
|
|
|
|
if not self._do_evaluation:
|
|
self._logger.info("Annotations are not available for evaluation.")
|
|
return
|
|
|
|
self._logger.info("Evaluating predictions ...")
|
|
for task in sorted(tasks):
|
|
res = _evaluate_predictions_on_lvis(
|
|
self._lvis_api,
|
|
lvis_results,
|
|
task,
|
|
max_dets_per_image=self._max_dets_per_image,
|
|
class_names=self._metadata.get("thing_classes"),
|
|
)
|
|
self._results[task] = res
|
|
|
|
def _eval_box_proposals(self, predictions):
|
|
"""
|
|
Evaluate the box proposals in predictions.
|
|
Fill self._results with the metrics for "box_proposals" task.
|
|
"""
|
|
if self._output_dir:
|
|
|
|
|
|
bbox_mode = BoxMode.XYXY_ABS.value
|
|
ids, boxes, objectness_logits = [], [], []
|
|
for prediction in predictions:
|
|
ids.append(prediction["image_id"])
|
|
boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy())
|
|
objectness_logits.append(prediction["proposals"].objectness_logits.numpy())
|
|
|
|
proposal_data = {
|
|
"boxes": boxes,
|
|
"objectness_logits": objectness_logits,
|
|
"ids": ids,
|
|
"bbox_mode": bbox_mode,
|
|
}
|
|
with PathManager.open(os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f:
|
|
pickle.dump(proposal_data, f)
|
|
|
|
if not self._do_evaluation:
|
|
self._logger.info("Annotations are not available for evaluation.")
|
|
return
|
|
|
|
self._logger.info("Evaluating bbox proposals ...")
|
|
res = {}
|
|
areas = {"all": "", "small": "s", "medium": "m", "large": "l"}
|
|
for limit in [100, 1000]:
|
|
for area, suffix in areas.items():
|
|
stats = _evaluate_box_proposals(predictions, self._lvis_api, area=area, limit=limit)
|
|
key = "AR{}@{:d}".format(suffix, limit)
|
|
res[key] = float(stats["ar"].item() * 100)
|
|
self._logger.info("Proposal metrics: \n" + create_small_table(res))
|
|
self._results["box_proposals"] = res
|
|
|
|
|
|
|
|
|
|
def _evaluate_box_proposals(dataset_predictions, lvis_api, thresholds=None, area="all", limit=None):
|
|
"""
|
|
Evaluate detection proposal recall metrics. This function is a much
|
|
faster alternative to the official LVIS API recall evaluation code. However,
|
|
it produces slightly different results.
|
|
"""
|
|
|
|
|
|
areas = {
|
|
"all": 0,
|
|
"small": 1,
|
|
"medium": 2,
|
|
"large": 3,
|
|
"96-128": 4,
|
|
"128-256": 5,
|
|
"256-512": 6,
|
|
"512-inf": 7,
|
|
}
|
|
area_ranges = [
|
|
[0**2, 1e5**2],
|
|
[0**2, 32**2],
|
|
[32**2, 96**2],
|
|
[96**2, 1e5**2],
|
|
[96**2, 128**2],
|
|
[128**2, 256**2],
|
|
[256**2, 512**2],
|
|
[512**2, 1e5**2],
|
|
]
|
|
assert area in areas, "Unknown area range: {}".format(area)
|
|
area_range = area_ranges[areas[area]]
|
|
gt_overlaps = []
|
|
num_pos = 0
|
|
|
|
for prediction_dict in dataset_predictions:
|
|
predictions = prediction_dict["proposals"]
|
|
|
|
|
|
|
|
inds = predictions.objectness_logits.sort(descending=True)[1]
|
|
predictions = predictions[inds]
|
|
|
|
ann_ids = lvis_api.get_ann_ids(img_ids=[prediction_dict["image_id"]])
|
|
anno = lvis_api.load_anns(ann_ids)
|
|
gt_boxes = [
|
|
BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) for obj in anno
|
|
]
|
|
gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4)
|
|
gt_boxes = Boxes(gt_boxes)
|
|
gt_areas = torch.as_tensor([obj["area"] for obj in anno])
|
|
|
|
if len(gt_boxes) == 0 or len(predictions) == 0:
|
|
continue
|
|
|
|
valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1])
|
|
gt_boxes = gt_boxes[valid_gt_inds]
|
|
|
|
num_pos += len(gt_boxes)
|
|
|
|
if len(gt_boxes) == 0:
|
|
continue
|
|
|
|
if limit is not None and len(predictions) > limit:
|
|
predictions = predictions[:limit]
|
|
|
|
overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes)
|
|
|
|
_gt_overlaps = torch.zeros(len(gt_boxes))
|
|
for j in range(min(len(predictions), len(gt_boxes))):
|
|
|
|
|
|
max_overlaps, argmax_overlaps = overlaps.max(dim=0)
|
|
|
|
|
|
gt_ovr, gt_ind = max_overlaps.max(dim=0)
|
|
assert gt_ovr >= 0
|
|
|
|
box_ind = argmax_overlaps[gt_ind]
|
|
|
|
_gt_overlaps[j] = overlaps[box_ind, gt_ind]
|
|
assert _gt_overlaps[j] == gt_ovr
|
|
|
|
overlaps[box_ind, :] = -1
|
|
overlaps[:, gt_ind] = -1
|
|
|
|
|
|
gt_overlaps.append(_gt_overlaps)
|
|
gt_overlaps = (
|
|
torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32)
|
|
)
|
|
gt_overlaps, _ = torch.sort(gt_overlaps)
|
|
|
|
if thresholds is None:
|
|
step = 0.05
|
|
thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32)
|
|
recalls = torch.zeros_like(thresholds)
|
|
|
|
for i, t in enumerate(thresholds):
|
|
recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos)
|
|
|
|
ar = recalls.mean()
|
|
return {
|
|
"ar": ar,
|
|
"recalls": recalls,
|
|
"thresholds": thresholds,
|
|
"gt_overlaps": gt_overlaps,
|
|
"num_pos": num_pos,
|
|
}
|
|
|
|
|
|
def _evaluate_predictions_on_lvis(
|
|
lvis_gt, lvis_results, iou_type, max_dets_per_image=None, class_names=None
|
|
):
|
|
"""
|
|
Args:
|
|
iou_type (str):
|
|
max_dets_per_image (None or int): limit on maximum detections per image in evaluating AP
|
|
This limit, by default of the LVIS dataset, is 300.
|
|
class_names (None or list[str]): if provided, will use it to predict
|
|
per-category AP.
|
|
|
|
Returns:
|
|
a dict of {metric name: score}
|
|
"""
|
|
metrics = {
|
|
"bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
|
|
"segm": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
|
|
}[iou_type]
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
if len(lvis_results) == 0:
|
|
logger.warn("No predictions from the model!")
|
|
return {metric: float("nan") for metric in metrics}
|
|
|
|
if iou_type == "segm":
|
|
lvis_results = copy.deepcopy(lvis_results)
|
|
|
|
|
|
|
|
|
|
for c in lvis_results:
|
|
c.pop("bbox", None)
|
|
|
|
if max_dets_per_image is None:
|
|
max_dets_per_image = 300
|
|
|
|
from lvis import LVISEval, LVISResults
|
|
|
|
logger.info(f"Evaluating with max detections per image = {max_dets_per_image}")
|
|
lvis_results = LVISResults(lvis_gt, lvis_results, max_dets=max_dets_per_image)
|
|
lvis_eval = LVISEval(lvis_gt, lvis_results, iou_type)
|
|
lvis_eval.run()
|
|
lvis_eval.print_results()
|
|
|
|
|
|
results = lvis_eval.get_results()
|
|
results = {metric: float(results[metric] * 100) for metric in metrics}
|
|
logger.info("Evaluation results for {}: \n".format(iou_type) + create_small_table(results))
|
|
return results
|
|
|