|
|
|
import datetime
|
|
import logging
|
|
import time
|
|
from collections import OrderedDict, abc
|
|
from contextlib import ExitStack, contextmanager
|
|
from typing import List, Union
|
|
import torch
|
|
from torch import nn
|
|
|
|
from detectron2.utils.comm import get_world_size, is_main_process
|
|
from detectron2.utils.logger import log_every_n_seconds
|
|
|
|
|
|
class DatasetEvaluator:
|
|
"""
|
|
Base class for a dataset evaluator.
|
|
|
|
The function :func:`inference_on_dataset` runs the model over
|
|
all samples in the dataset, and have a DatasetEvaluator to process the inputs/outputs.
|
|
|
|
This class will accumulate information of the inputs/outputs (by :meth:`process`),
|
|
and produce evaluation results in the end (by :meth:`evaluate`).
|
|
"""
|
|
|
|
def reset(self):
|
|
"""
|
|
Preparation for a new round of evaluation.
|
|
Should be called before starting a round of evaluation.
|
|
"""
|
|
pass
|
|
|
|
def process(self, inputs, outputs):
|
|
"""
|
|
Process the pair of inputs and outputs.
|
|
If they contain batches, the pairs can be consumed one-by-one using `zip`:
|
|
|
|
.. code-block:: python
|
|
|
|
for input_, output in zip(inputs, outputs):
|
|
# do evaluation on single input/output pair
|
|
...
|
|
|
|
Args:
|
|
inputs (list): the inputs that's used to call the model.
|
|
outputs (list): the return value of `model(inputs)`
|
|
"""
|
|
pass
|
|
|
|
def evaluate(self):
|
|
"""
|
|
Evaluate/summarize the performance, after processing all input/output pairs.
|
|
|
|
Returns:
|
|
dict:
|
|
A new evaluator class can return a dict of arbitrary format
|
|
as long as the user can process the results.
|
|
In our train_net.py, we expect the following format:
|
|
|
|
* key: the name of the task (e.g., bbox)
|
|
* value: a dict of {metric name: score}, e.g.: {"AP50": 80}
|
|
"""
|
|
pass
|
|
|
|
|
|
class DatasetEvaluators(DatasetEvaluator):
|
|
"""
|
|
Wrapper class to combine multiple :class:`DatasetEvaluator` instances.
|
|
|
|
This class dispatches every evaluation call to
|
|
all of its :class:`DatasetEvaluator`.
|
|
"""
|
|
|
|
def __init__(self, evaluators):
|
|
"""
|
|
Args:
|
|
evaluators (list): the evaluators to combine.
|
|
"""
|
|
super().__init__()
|
|
self._evaluators = evaluators
|
|
|
|
def reset(self):
|
|
for evaluator in self._evaluators:
|
|
evaluator.reset()
|
|
|
|
def process(self, inputs, outputs):
|
|
for evaluator in self._evaluators:
|
|
evaluator.process(inputs, outputs)
|
|
|
|
def evaluate(self):
|
|
results = OrderedDict()
|
|
for evaluator in self._evaluators:
|
|
result = evaluator.evaluate()
|
|
if is_main_process() and result is not None:
|
|
for k, v in result.items():
|
|
assert (
|
|
k not in results
|
|
), "Different evaluators produce results with the same key {}".format(k)
|
|
results[k] = v
|
|
return results
|
|
|
|
|
|
def inference_on_dataset(
|
|
model,
|
|
data_loader,
|
|
evaluator: Union[DatasetEvaluator, List[DatasetEvaluator], None],
|
|
callbacks=None,
|
|
):
|
|
"""
|
|
Run model on the data_loader and evaluate the metrics with evaluator.
|
|
Also benchmark the inference speed of `model.__call__` accurately.
|
|
The model will be used in eval mode.
|
|
|
|
Args:
|
|
model (callable): a callable which takes an object from
|
|
`data_loader` and returns some outputs.
|
|
|
|
If it's an nn.Module, it will be temporarily set to `eval` mode.
|
|
If you wish to evaluate a model in `training` mode instead, you can
|
|
wrap the given model and override its behavior of `.eval()` and `.train()`.
|
|
data_loader: an iterable object with a length.
|
|
The elements it generates will be the inputs to the model.
|
|
evaluator: the evaluator(s) to run. Use `None` if you only want to benchmark,
|
|
but don't want to do any evaluation.
|
|
callbacks (dict of callables): a dictionary of callback functions which can be
|
|
called at each stage of inference.
|
|
|
|
Returns:
|
|
The return value of `evaluator.evaluate()`
|
|
"""
|
|
num_devices = get_world_size()
|
|
logger = logging.getLogger(__name__)
|
|
logger.info("Start inference on {} batches".format(len(data_loader)))
|
|
|
|
total = len(data_loader)
|
|
if evaluator is None:
|
|
|
|
evaluator = DatasetEvaluators([])
|
|
if isinstance(evaluator, abc.MutableSequence):
|
|
evaluator = DatasetEvaluators(evaluator)
|
|
evaluator.reset()
|
|
|
|
num_warmup = min(5, total - 1)
|
|
start_time = time.perf_counter()
|
|
total_data_time = 0
|
|
total_compute_time = 0
|
|
total_eval_time = 0
|
|
with ExitStack() as stack:
|
|
if isinstance(model, nn.Module):
|
|
stack.enter_context(inference_context(model))
|
|
stack.enter_context(torch.no_grad())
|
|
|
|
start_data_time = time.perf_counter()
|
|
dict.get(callbacks or {}, "on_start", lambda: None)()
|
|
for idx, inputs in enumerate(data_loader):
|
|
total_data_time += time.perf_counter() - start_data_time
|
|
if idx == num_warmup:
|
|
start_time = time.perf_counter()
|
|
total_data_time = 0
|
|
total_compute_time = 0
|
|
total_eval_time = 0
|
|
|
|
start_compute_time = time.perf_counter()
|
|
dict.get(callbacks or {}, "before_inference", lambda: None)()
|
|
outputs = model(inputs)
|
|
dict.get(callbacks or {}, "after_inference", lambda: None)()
|
|
if torch.cuda.is_available():
|
|
torch.cuda.synchronize()
|
|
total_compute_time += time.perf_counter() - start_compute_time
|
|
|
|
start_eval_time = time.perf_counter()
|
|
evaluator.process(inputs, outputs)
|
|
total_eval_time += time.perf_counter() - start_eval_time
|
|
|
|
iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup)
|
|
data_seconds_per_iter = total_data_time / iters_after_start
|
|
compute_seconds_per_iter = total_compute_time / iters_after_start
|
|
eval_seconds_per_iter = total_eval_time / iters_after_start
|
|
total_seconds_per_iter = (time.perf_counter() - start_time) / iters_after_start
|
|
if idx >= num_warmup * 2 or compute_seconds_per_iter > 5:
|
|
eta = datetime.timedelta(seconds=int(total_seconds_per_iter * (total - idx - 1)))
|
|
log_every_n_seconds(
|
|
logging.INFO,
|
|
(
|
|
f"Inference done {idx + 1}/{total}. "
|
|
f"Dataloading: {data_seconds_per_iter:.4f} s/iter. "
|
|
f"Inference: {compute_seconds_per_iter:.4f} s/iter. "
|
|
f"Eval: {eval_seconds_per_iter:.4f} s/iter. "
|
|
f"Total: {total_seconds_per_iter:.4f} s/iter. "
|
|
f"ETA={eta}"
|
|
),
|
|
n=5,
|
|
)
|
|
start_data_time = time.perf_counter()
|
|
dict.get(callbacks or {}, "on_end", lambda: None)()
|
|
|
|
|
|
total_time = time.perf_counter() - start_time
|
|
total_time_str = str(datetime.timedelta(seconds=total_time))
|
|
|
|
logger.info(
|
|
"Total inference time: {} ({:.6f} s / iter per device, on {} devices)".format(
|
|
total_time_str, total_time / (total - num_warmup), num_devices
|
|
)
|
|
)
|
|
total_compute_time_str = str(datetime.timedelta(seconds=int(total_compute_time)))
|
|
logger.info(
|
|
"Total inference pure compute time: {} ({:.6f} s / iter per device, on {} devices)".format(
|
|
total_compute_time_str, total_compute_time / (total - num_warmup), num_devices
|
|
)
|
|
)
|
|
|
|
results = evaluator.evaluate()
|
|
|
|
|
|
if results is None:
|
|
results = {}
|
|
return results
|
|
|
|
|
|
@contextmanager
|
|
def inference_context(model):
|
|
"""
|
|
A context where the model is temporarily changed to eval mode,
|
|
and restored to previous mode afterwards.
|
|
|
|
Args:
|
|
model: a torch Module
|
|
"""
|
|
training_mode = model.training
|
|
model.eval()
|
|
yield
|
|
model.train(training_mode)
|
|
|