kevinconka commited on
Commit
a4b3018
·
verified ·
1 Parent(s): 062331a

Update ref-metrics.py

Browse files
Files changed (1) hide show
  1. ref-metrics.py +414 -96
ref-metrics.py CHANGED
@@ -11,59 +11,153 @@
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
  # See the License for the specific language governing permissions and
13
  # limitations under the License.
 
14
 
15
-
16
- import random
17
- import datetime
18
- import os
19
 
20
  import datasets
21
  import evaluate
22
- from seametrics.user_friendly.utils import calculate_from_payload
23
-
24
- import wandb
 
 
25
 
26
  _CITATION = """\
27
- @InProceedings{huggingface:module,
28
- title = {A great new module},
29
- authors={huggingface, Inc.},
30
- year={2020}
31
- }\
32
- @article{milan2016mot16,
33
- title={MOT16: A benchmark for multi-object tracking},
34
- author={Milan, Anton and Leal-Taix{\'e}, Laura and Reid, Ian and Roth, Stefan and Schindler, Konrad},
35
- journal={arXiv preprint arXiv:1603.00831},
36
- year={2016}
 
 
 
 
 
 
 
37
  }
38
  """
39
 
40
  _DESCRIPTION = """\
41
- The MOT Metrics module is designed to evaluate multi-object tracking (MOT)
42
- algorithms by computing various metrics based on predicted and ground truth bounding
43
- boxes. It serves as a crucial tool in assessing the performance of MOT systems,
44
- aiding in the iterative improvement of tracking algorithms."""
45
 
46
 
47
  _KWARGS_DESCRIPTION = """
48
-
49
- Calculates how good are predictions given some references, using certain scores
50
  Args:
51
- predictions: list of predictions to score. Each predictions
52
- should be a string with tokens separated by spaces.
53
- references: list of reference for each prediction. Each
54
- reference should be a string with tokens separated by spaces.
55
- max_iou (`float`, *optional*):
56
- If specified, this is the minimum Intersection over Union (IoU) threshold to consider a detection as a true positive.
57
- Default is 0.5.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  """
59
 
60
 
61
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
62
- class UserFriendlyMetrics(evaluate.Metric):
63
- """TODO: Short description of my evaluation module."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  def _info(self):
66
- # TODO: Specifies the evaluate.EvaluationModuleInfo object
67
  return evaluate.MetricInfo(
68
  # This is the description that will appear on the modules page.
69
  module_type="metric",
@@ -73,36 +167,265 @@ class UserFriendlyMetrics(evaluate.Metric):
73
  # This defines the format of each prediction and reference
74
  features=datasets.Features(
75
  {
76
- "predictions": datasets.Sequence(
77
- datasets.Sequence(datasets.Value("float"))
78
- ),
79
- "references": datasets.Sequence(
80
- datasets.Sequence(datasets.Value("float"))
81
- ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  }
83
  ),
84
  # Additional links to the codebase or references
85
- codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
86
- reference_urls=["http://path.to.reference.url/new_module"],
 
 
87
  )
88
 
89
- def _download_and_prepare(self, dl_manager):
90
- """Optional: download external resources useful to compute the scores"""
91
- # TODO: Download external resources if needed
92
- pass
93
-
94
- def compute_from_payload(
95
- self,
96
- payload,
97
- area_ranges_tuples=None, # Optional parameter
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  ):
99
  """
100
- Compute the metric from the payload.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  Args:
103
  payload (Payload): The payload to compute the metric from.
104
  **kwargs: Additional keyword arguments.
105
-
106
  Returns:
107
  dict: The computed metric results with the following format:
108
  {
@@ -126,49 +449,44 @@ class UserFriendlyMetrics(evaluate.Metric):
126
  - If the metric does not support area ranges, the metric should store the results under the `all` key.
127
  - If a range area is provided it will be displayed in the output. if area_ranges_tuples is None, then all the area ranges will be displayed
128
  """
129
- return self.dummy_values(area_ranges_tuples)
130
-
131
- def dummy_values(self, area_ranges_tuples=None):
132
- """Dummy randome values in the expected format that all new metrics need to return"""
133
-
134
- # Use default ranges if none are provided
135
- if area_ranges_tuples is None:
136
- area_names = ["all", "small", "medium", "large"]
137
- else:
138
- area_names = {
139
- key
140
- for key, value in area_ranges_tuples.items()
141
- if value["range"] is not None
142
- }
143
-
144
- # Generate random dummy values
145
- def generate_random_values():
146
- return {
147
- "tp": random.randint(0, 100), # Random integer between 0 and 100
148
- "fp": random.randint(0, 50), # Random integer between 0 and 50
149
- "fn": random.randint(0, 50), # Random integer between 0 and 50
150
- "precision": round(
151
- random.uniform(0.5, 1.0), 2
152
- ), # Random float between 0.5 and 1.0
153
- "recall": round(
154
- random.uniform(0.5, 1.0), 2
155
- ), # Random float between 0.5 and 1.0
156
- "f1": round(
157
- random.uniform(0.5, 1.0), 2
158
- ), # Random float between 0.5 and 1.0
159
- }
160
-
161
- # Initialize output structure
162
- dummy_output = {"model_1": {"overall": {}, "per_sequence": {"sequence_1": {},"sequence_2": {}}}}
163
 
164
- # Populate only the ranges specified in area_ranges_tuples with random values
165
- for area_name in area_names:
166
- dummy_output["model_1"]["overall"][area_name] = generate_random_values()
167
- dummy_output["model_1"]["per_sequence"]["sequence_1"][
168
- area_name
169
- ] = generate_random_values()
170
- dummy_output["model_1"]["per_sequence"]["sequence_2"][
171
- area_name
172
- ] = generate_random_values()
173
- return dummy_output
 
 
 
 
 
 
 
 
 
 
 
174
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
  # See the License for the specific language governing permissions and
13
  # limitations under the License.
14
+ """TODO: Add a description here."""
15
 
16
+ from typing import List, Literal, Tuple
 
 
 
17
 
18
  import datasets
19
  import evaluate
20
+ import numpy as np
21
+ from deprecated import deprecated
22
+ from seametrics.detection import PrecisionRecallF1Support
23
+ from seametrics.detection.utils import payload_to_det_metric
24
+ from seametrics.payload import Payload
25
 
26
  _CITATION = """\
27
+ @InProceedings{coco:2020,
28
+ title = {Microsoft {COCO:} Common Objects in Context},
29
+ authors={Tsung{-}Yi Lin and
30
+ Michael Maire and
31
+ Serge J. Belongie and
32
+ James Hays and
33
+ Pietro Perona and
34
+ Deva Ramanan and
35
+ Piotr Dollar and
36
+ C. Lawrence Zitnick},
37
+ booktitle = {Computer Vision - {ECCV} 2014 - 13th European Conference, Zurich,
38
+ Switzerland, September 6-12, 2014, Proceedings, Part {V}},
39
+ series = {Lecture Notes in Computer Science},
40
+ volume = {8693},
41
+ pages = {740--755},
42
+ publisher = {Springer},
43
+ year={2014}
44
  }
45
  """
46
 
47
  _DESCRIPTION = """\
48
+ This evaluation metric is designed to give provide object detection metrics at
49
+ different object size levels. It is based on a modified version of the commonly used
50
+ COCO-evaluation metrics.
51
+ """
52
 
53
 
54
  _KWARGS_DESCRIPTION = """
55
+ Calculates object detection metrics given predicted and ground truth bounding boxes for
56
+ a single image.
57
  Args:
58
+ predictions: list of predictions for each image. Each prediction should
59
+ be a dict containing the following
60
+ - 'boxes': list of bounding boxes, xywh in absolute pixel values
61
+ - 'labels': list of labels for each bounding box
62
+ - 'scores': list of scores for each bounding box
63
+ references: list of ground truth annotations for each image. Each reference should
64
+ be a dict containing the following
65
+ - 'boxes': list of bounding boxes, xywh in absolute pixel values
66
+ - 'labels': list of labels for each bounding box
67
+ - 'area': list of areas for each bounding box
68
+ Returns:
69
+ dict containing dicts for each specified area range with following items:
70
+ 'range': specified area with [max_px_area, max_px_area]
71
+ 'iouThr': min. IOU-threshold of a prediction with a ground truth box
72
+ to be considered a correct prediction
73
+ 'maxDets': maximum number of detections
74
+ 'tp': number of true positive (correct) predictions
75
+ 'fp': number of false positive (incorrect) predictions
76
+ 'fn': number of false negative (missed) predictions
77
+ 'duplicates': number of duplicate predictions
78
+ 'precision': best possible score = 1, worst possible score = 0
79
+ large if few false positive predictions
80
+ formula: tp/(fp+tp)
81
+ 'recall' best possible score = 1, worst possible score = 0
82
+ large if few missed predictions
83
+ formula: tp/(tp+fn)
84
+ 'f1': best possible score = 1, worst possible score = 0
85
+ trades off precision and recall
86
+ formula: 2*(precision*recall)/(precision+recall)
87
+ 'support': number of ground truth bounding boxes considered in the evaluation,
88
+ 'fpi': number of images with no ground truth but false positive predictions,
89
+ 'nImgs': number of images considered in evaluation
90
+ Examples:
91
+ >>> import evaluate
92
+ >>> from seametrics.payload.processor import PayloadProcessor
93
+ >>> payload = PayloadProcessor(...).payload
94
+ >>> module = evaluate.load("SEA-AI/det-metrics", ...)
95
+ >>> module._add_payload(payload)
96
+ >>> result = module.compute()
97
+ >>> print(result)
98
+ {'all': {
99
+ 'range': [0, 10000000000.0],
100
+ 'iouThr': '0.00',
101
+ 'maxDets': 100,
102
+ 'tp': 1,
103
+ 'fp': 3,
104
+ 'fn': 1,
105
+ 'duplicates': 0,
106
+ 'precision': 0.25,
107
+ 'recall': 0.5,
108
+ 'f1': 0.3333333333333333,
109
+ 'support': 2,
110
+ 'fpi': 0,
111
+ 'nImgs': 2
112
+ }
113
+ }
114
  """
115
 
116
 
117
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
118
+ class DetectionMetric(evaluate.Metric):
119
+ def __init__(
120
+ self,
121
+ area_ranges_tuples: List[Tuple[str, List[int]]] = [("all", [0, 1e5**2])],
122
+ iou_threshold: List[float] = [1e-10],
123
+ class_agnostic: bool = True,
124
+ bbox_format: str = "xywh",
125
+ iou_type: Literal["bbox", "segm"] = "bbox",
126
+ payload: Payload = None,
127
+ **kwargs,
128
+ ):
129
+ super().__init__(**kwargs)
130
+
131
+ # save parameters for later
132
+ self.payload = payload
133
+ self.model_names = payload.models if payload else ["custom"]
134
+ self.iou_threshold = iou_threshold
135
+ self.area_ranges_tuples = area_ranges_tuples
136
+ self.class_agnostic = class_agnostic
137
+ self.iou_type = iou_type
138
+ self.bbox_format = bbox_format
139
+
140
+ # postprocess parameters
141
+ self.iou_thresholds = (
142
+ iou_threshold if isinstance(iou_threshold, list) else [iou_threshold]
143
+ )
144
+ self.area_ranges = [v for _, v in area_ranges_tuples]
145
+ self.area_ranges_labels = [k for k, _ in area_ranges_tuples]
146
+
147
+ # initialize coco_metrics
148
+ self.coco_metric = PrecisionRecallF1Support(
149
+ iou_thresholds=self.iou_thresholds,
150
+ area_ranges=self.area_ranges,
151
+ area_ranges_labels=self.area_ranges_labels,
152
+ class_agnostic=self.class_agnostic,
153
+ iou_type=self.iou_type,
154
+ box_format=self.bbox_format,
155
+ )
156
+
157
+ # initialize evaluation metric
158
+ self._init_evaluation_metric()
159
 
160
  def _info(self):
 
161
  return evaluate.MetricInfo(
162
  # This is the description that will appear on the modules page.
163
  module_type="metric",
 
167
  # This defines the format of each prediction and reference
168
  features=datasets.Features(
169
  {
170
+ "predictions": [
171
+ datasets.Features(
172
+ {
173
+ "boxes": datasets.Sequence(
174
+ datasets.Sequence(datasets.Value("float"))
175
+ ),
176
+ "labels": datasets.Sequence(datasets.Value("int64")),
177
+ "scores": datasets.Sequence(datasets.Value("float")),
178
+ }
179
+ )
180
+ ],
181
+ "references": [
182
+ datasets.Features(
183
+ {
184
+ "boxes": datasets.Sequence(
185
+ datasets.Sequence(datasets.Value("float"))
186
+ ),
187
+ "labels": datasets.Sequence(datasets.Value("int64")),
188
+ "area": datasets.Sequence(datasets.Value("float")),
189
+ }
190
+ )
191
+ ],
192
  }
193
  ),
194
  # Additional links to the codebase or references
195
+ codebase_urls=[
196
+ "https://github.com/SEA-AI/seametrics/tree/main",
197
+ "https://lightning.ai/docs/torchmetrics/stable/detection/mean_average_precision.html",
198
+ ],
199
  )
200
 
201
+ def add(self, *, prediction, reference, **kwargs):
202
+ """Adds a batch of predictions and references to the metric"""
203
+ # in case the inputs are lists, convert them to numpy arrays
204
+ prediction = self._preprocess(prediction)
205
+ reference = self._preprocess(reference)
206
+
207
+ self.coco_metric.update(prediction, reference)
208
+
209
+ def _init_evaluation_metric(self, **kwargs):
210
+ """
211
+ Initializes the evaluation metric by generating sample data, preprocessing predictions and references,
212
+ and then adding the processed data to the metric using the super class method with additional keyword arguments.
213
+
214
+ Parameters:
215
+ **kwargs: Additional keyword arguments for the super class method.
216
+
217
+ Returns:
218
+ None
219
+ """
220
+ predictions, references = self._generate_sample_data()
221
+ predictions = self._preprocess(predictions)
222
+ references = self._preprocess(references)
223
+
224
+ # does not impact the metric, but is required for the interface x_x
225
+ super(evaluate.Metric, self).add(
226
+ prediction=self._postprocess(predictions),
227
+ references=self._postprocess(references),
228
+ **kwargs,
229
+ )
230
+
231
+ @deprecated(reason="Use `module._add_payload` instead")
232
+ def add_batch(self, payload: Payload, model_name: str = None):
233
+ """Takes as input a payload and adds the batch to the metric"""
234
+ self._add_payload(payload, model_name)
235
+
236
+ def _compute(self, *, predictions, references, **kwargs):
237
+ """Called within the evaluate.Metric.compute() method"""
238
+
239
+ results = {}
240
+ for model_name in self.model_names:
241
+ print(f"\n##### {model_name} #####")
242
+ # add payload if available (otherwise predictions and references must be added with add function)
243
+ if self.payload:
244
+ self._add_payload(self.payload, model_name)
245
+
246
+ results[model_name] = self.coco_metric.compute()
247
+
248
+ # reset coco_metrics for next model
249
+ self.coco_metric = PrecisionRecallF1Support(
250
+ iou_thresholds=self.iou_thresholds,
251
+ area_ranges=self.area_ranges,
252
+ area_ranges_labels=self.area_ranges_labels,
253
+ class_agnostic=self.class_agnostic,
254
+ iou_type=self.iou_type,
255
+ box_format=self.bbox_format,
256
+ )
257
+ return results
258
+
259
+ def _add_payload(self, payload: Payload, model_name: str = None):
260
+ """Converts the payload to the format expected by the metric"""
261
+ # import only if needed since fiftyone is not a direct dependency
262
+
263
+ predictions, references = payload_to_det_metric(payload, model_name)
264
+ self.add(prediction=predictions, reference=references)
265
+
266
+ return self
267
+
268
+ def _preprocess(self, list_of_dicts):
269
+ """Converts the lists to numpy arrays for type checking"""
270
+ return [self._lists_to_np(d) for d in list_of_dicts]
271
+
272
+ def _postprocess(self, list_of_dicts):
273
+ """Converts the numpy arrays to lists for type checking"""
274
+ return [self._np_to_lists(d) for d in list_of_dicts]
275
+
276
+ def _np_to_lists(self, d):
277
+ """datasets does not support numpy arrays for type checking"""
278
+ for k, v in d.items():
279
+ if isinstance(v, dict):
280
+ self._np_to_lists(v)
281
+ elif isinstance(v, np.ndarray):
282
+ d[k] = v.tolist()
283
+ return d
284
+
285
+ def _lists_to_np(self, d):
286
+ """datasets does not support numpy arrays for type checking"""
287
+ for k, v in d.items():
288
+ if isinstance(v, dict):
289
+ self._lists_to_np(v)
290
+ elif isinstance(v, list):
291
+ d[k] = np.array(v)
292
+ return d
293
+
294
+ def generate_confidence_curves(
295
+ self, results, confidence_config={"T": 0, "R": 0, "K": 0, "A": 0, "M": 0}
296
  ):
297
  """
298
+ Generate confidence curves based on results and confidence configuration.
299
+
300
+ Parameters:
301
+ results (dict): Results of the evaluation for different models.
302
+ confidence_config (dict): Configuration for confidence values. Defaults to {"T": 0, "R": 0, "K": 0, "A": 0, "M": 0}.
303
+ T: [1e-10] iou threshold
304
+ R: recall threshold (not used)
305
+ K: class index (class-agnostic mAP, so only 0)
306
+ A: 0=all, 1=small, 2=medium, 3=large, ... (depending on area ranges)
307
+ M: [100] maxDets default in precision_recall_f1_support
308
+
309
+ Returns:
310
+ fig (plotly.graph_objects.Figure): The plotly figure showing the confidence curves.
311
+ """
312
+ import plotly.graph_objects as go
313
+ from seametrics.detection.utils import get_confidence_metric_vals
314
+
315
+ # Create traces
316
+ fig = go.Figure()
317
+ metrics = ["precision", "recall", "f1"]
318
+ for model_name in self.model_names:
319
+ print(f"##### {model_name} #####")
320
+ plot_data = get_confidence_metric_vals(
321
+ cocoeval=results[model_name]["eval"],
322
+ T=confidence_config["T"],
323
+ R=confidence_config["R"],
324
+ K=confidence_config["K"],
325
+ A=confidence_config["A"],
326
+ M=confidence_config["M"],
327
+ )
328
+
329
+ for metric in metrics:
330
+ fig.add_trace(
331
+ go.Scatter(
332
+ x=plot_data["conf"],
333
+ y=plot_data[metric],
334
+ mode="lines",
335
+ name=f"{model_name} {metric}",
336
+ line=dict(dash=None if metric == "f1" else "dash"),
337
+ )
338
+ )
339
+
340
+ fig.update_layout(
341
+ title="Metric vs Confidence",
342
+ hovermode="x unified",
343
+ xaxis_title="Confidence",
344
+ yaxis_title="Metric value",
345
+ )
346
+ return fig
347
 
348
+ def wandb(self, results , wandb_runs: list = None, wandb_section: str = None, wandb_project='detection_metrics'):
349
+ """
350
+ Logs metrics to Weights and Biases (wandb) for tracking and visualization.
351
+
352
+ This function logs the provided metrics to Weights and Biases (wandb), a platform for tracking machine learning experiments.
353
+ Each key in the `results` dictionary represents a separate run and the corresponding value contains the metrics for that run.
354
+ If a W&B run list is provided, the results of the runs will be added to the passed W&B runs. Otherwise new W&B runs will be created.
355
+ If a W&B section ist provided, the metrics will be logged in this section drop-down. Otherwise no extra W&B section is created
356
+ and the metrics are logged directly.
357
+ The function logs in to wandb using an API key obtained from the secret 'WANDB_API_KEY', initializes a run for
358
+ each key in `results` and logs the metrics.
359
+
360
+ Args:
361
+ results (dict): A dictionary where each key is a unique identifier for a run and each value is another dictionary
362
+ containing the metrics to log. Example:
363
+ {
364
+ "run1": {"metrics": {"accuracy": 0.9, "loss": 0.1}},
365
+ "run2": {"metrics": {"accuracy": 0.85, "loss": 0.15}}
366
+ }
367
+ wandb_runs (list, optional): A list containing W&B runs where the results should be added
368
+ (e.g. the first item in results will be added to the first run in wandb_runs, etc.)
369
+ wandb_section (str, optional): A string to specify the W&B
370
+ wandb_project (str, optional): The name of the wandb project to which the runs will be logged. Defaults to 'detection_metrics'.
371
+
372
+ Environment Variables:
373
+ WANDB_API_KEY: The API key for authenticating with wandb.
374
+
375
+ Imports:
376
+ os: To retrieve environment variables.
377
+ wandb: To interact with the Weights and Biases platform.
378
+ datetime: To generate a timestamp for run names.
379
+ """
380
+ import os
381
+ import wandb
382
+ import datetime
383
+
384
+ current_datetime = datetime.datetime.now()
385
+ formatted_datetime = current_datetime.strftime("%Y-%m-%d_%H-%M-%S")
386
+ wandb.login(key=os.getenv('WANDB_API_KEY'))
387
+
388
+ if not wandb_runs is None:
389
+ assert len(wandb_runs) == len(results), "runs and results must have the same length"
390
+
391
+ for i, k in enumerate(results.keys()):
392
+ if wandb_runs is None:
393
+ run = wandb.init(project=wandb_project, name=f"{k}-{formatted_datetime}")
394
+ else:
395
+ run = wandb_runs[i]
396
+ run.log({f"{wandb_section}/{m}" : v for m, v in results[k]['metrics'].items()} if wandb_section is not None else results[k]['metrics'])
397
+ if wandb_runs is None:
398
+ run.finish()
399
+
400
+ def _generate_sample_data(self):
401
+ """
402
+ Generates dummy sample data for predictions and references used for initialization.
403
+
404
+ Returns:
405
+ Tuple[List[Dict[str, List[Union[float, int]]]], List[Dict[str, List[Union[float, int]]]]]:
406
+ - predictions (List[Dict[str, List[Union[float, int]]]]): A list of dictionaries representing the predictions. Each dictionary contains the following keys:
407
+ - boxes (List[List[float]]): A list of bounding boxes in the format [x, y, w, h].
408
+ - labels (List[int]): A list of labels.
409
+ - scores (List[float]): A list of scores.
410
+ - references (List[Dict[str, List[Union[float, int]]]]): A list of dictionaries representing the references. Each dictionary contains the following keys:
411
+ - boxes (List[List[float]]): A list of bounding boxes in the format [x, y, w, h].
412
+ - labels (List[int]): A list of labels.
413
+ - area (List[float]): A list of areas.
414
+ """
415
+ predictions = [
416
+ {"boxes": [[1.0, 2.0, 3.0, 4.0]], "labels": [0], "scores": [1.0]}
417
+ ]
418
+ references = [{"boxes": [[1.0, 2.0, 3.0, 4.0]], "labels": [0], "area": [1.0]}]
419
+
420
+ return predictions, references
421
+
422
+
423
+ def compute_from_payload(self, payload: Payload):
424
+ """
425
+ Compute the metric from the payload.
426
  Args:
427
  payload (Payload): The payload to compute the metric from.
428
  **kwargs: Additional keyword arguments.
 
429
  Returns:
430
  dict: The computed metric results with the following format:
431
  {
 
449
  - If the metric does not support area ranges, the metric should store the results under the `all` key.
450
  - If a range area is provided it will be displayed in the output. if area_ranges_tuples is None, then all the area ranges will be displayed
451
  """
452
+ results = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
453
 
454
+ for model_name in payload.models:
455
+ results[model_name] = {"overall": {}, "per_sequence": {}}
456
+
457
+ # per-sequence loop
458
+ for seq_name, sequence in payload.sequences.items():
459
+ # create new payload only with specific sequence and model
460
+ sequence_payload = Payload(
461
+ dataset=payload.dataset,
462
+ gt_field_name=payload.gt_field_name,
463
+ models=[model_name],
464
+ sequences={seq_name: sequence}
465
+ )
466
+ module = DetectionMetric(
467
+ area_ranges_tuples=self.area_ranges_tuples,
468
+ iou_threshold=self.iou_threshold,
469
+ class_agnostic=self.class_agnostic,
470
+ bbox_format=self.bbox_format,
471
+ iou_type=self.iou_type,
472
+ payload=sequence_payload
473
+ )
474
+ results[model_name]["per_sequence"][seq_name] = module.compute()[model_name]["metrics"]
475
 
476
+ # overall per-model loop
477
+ model_payload = Payload(
478
+ dataset=payload.dataset,
479
+ gt_field_name=payload.gt_field_name,
480
+ models=[model_name],
481
+ sequences=payload.sequences
482
+ )
483
+ module = DetectionMetric(
484
+ area_ranges_tuples=self.area_ranges_tuples,
485
+ iou_threshold=self.iou_threshold,
486
+ class_agnostic=self.class_agnostic,
487
+ bbox_format=self.bbox_format,
488
+ iou_type=self.iou_type,
489
+ payload=model_payload
490
+ )
491
+ results[model_name]["overall"] = module.compute()[model_name]["metrics"]
492
+ return results