bowdbeg commited on
Commit
cd855de
·
1 Parent(s): 774aee4

implement acceleration by multiprocessing

Browse files
Files changed (2) hide show
  1. __main__.py +10 -1
  2. matching_series.py +37 -12
__main__.py CHANGED
@@ -1,10 +1,12 @@
1
  import json
2
  import logging
 
3
  from argparse import ArgumentParser
4
 
5
  import evaluate
6
  import numpy as np
7
 
 
8
  logger = logging.getLogger(__name__)
9
  parser = ArgumentParser(
10
  description="Compute the matching series score between two time series freezed in a numpy array"
@@ -13,6 +15,7 @@ parser.add_argument("predictions", type=str, help="Path to the numpy array conta
13
  parser.add_argument("references", type=str, help="Path to the numpy array containing the references")
14
  parser.add_argument("--output", type=str, help="Path to the output file")
15
  parser.add_argument("--batch_size", type=int, help="Batch size to use for the computation")
 
16
  args = parser.parse_args()
17
 
18
  if not args.predictions or not args.references:
@@ -21,15 +24,21 @@ if not args.predictions or not args.references:
21
  predictions = np.load(args.predictions)
22
  references = np.load(args.references)
23
 
 
 
24
 
25
  logger.info(f"predictions shape: {predictions.shape}")
26
  logger.info(f"references shape: {references.shape}")
27
 
28
  import matching_series
29
 
 
30
  metric = matching_series.matching_series()
31
  # metric = evaluate.load("matching_series.py")
32
- results = metric.compute(predictions=predictions, references=references, batch_size=args.batch_size)
 
 
 
33
 
34
  print(results)
35
  if args.output:
 
1
  import json
2
  import logging
3
+ import time
4
  from argparse import ArgumentParser
5
 
6
  import evaluate
7
  import numpy as np
8
 
9
+ logging.basicConfig(level=logging.INFO)
10
  logger = logging.getLogger(__name__)
11
  parser = ArgumentParser(
12
  description="Compute the matching series score between two time series freezed in a numpy array"
 
15
  parser.add_argument("references", type=str, help="Path to the numpy array containing the references")
16
  parser.add_argument("--output", type=str, help="Path to the output file")
17
  parser.add_argument("--batch_size", type=int, help="Batch size to use for the computation")
18
+ parser.add_argument("--num_process", type=int, help="Batch size to use for the computation", default=1)
19
  args = parser.parse_args()
20
 
21
  if not args.predictions or not args.references:
 
24
  predictions = np.load(args.predictions)
25
  references = np.load(args.references)
26
 
27
+ predictions = predictions[:1000]
28
+ references = references[:1000]
29
 
30
  logger.info(f"predictions shape: {predictions.shape}")
31
  logger.info(f"references shape: {references.shape}")
32
 
33
  import matching_series
34
 
35
+ s = time.time()
36
  metric = matching_series.matching_series()
37
  # metric = evaluate.load("matching_series.py")
38
+ results = metric.compute(
39
+ predictions=predictions, references=references, batch_size=args.batch_size, num_process=args.num_process
40
+ )
41
+ logger.info(f"Time taken: {time.time() - s}")
42
 
43
  print(results)
44
  if args.output:
matching_series.py CHANGED
@@ -13,6 +13,7 @@
13
  # limitations under the License.
14
  """TODO: Add a description here."""
15
 
 
16
  import math
17
  import statistics
18
  from typing import List, Optional, Union
@@ -134,6 +135,7 @@ class matching_series(evaluate.Metric):
134
  cuc_n_calculation: int = 3,
135
  cuc_n_samples: Union[List[int], str] = "auto",
136
  metric: str = "mse",
 
137
  ):
138
  """
139
  Compute the scores of the module given the predictions and references
@@ -161,17 +163,39 @@ class matching_series(evaluate.Metric):
161
  # distance between predictions and references for all example combinations for each features
162
  # shape: (num_generation, num_reference, num_features)
163
  if batch_size is not None:
164
- distance = np.zeros((len(predictions), len(references), predictions.shape[-1]))
165
- # iterate over the predictions and references in batches
166
- for i in range(0, len(predictions) + batch_size, batch_size):
167
- for j in range(0, len(references) + batch_size, batch_size):
168
- d = self._compute_metric(
169
- predictions[i : i + batch_size, None],
170
- references[None, j : j + batch_size],
171
- metric=metric,
172
- axis=-2,
 
 
 
 
 
 
 
 
173
  )
174
- distance[i : i + batch_size, j : j + batch_size] = d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  else:
176
  distance = self._compute_metric(predictions[:, None], references, metric=metric, axis=1)
177
 
@@ -311,12 +335,13 @@ class matching_series(evaluate.Metric):
311
  cuc = np.trapz(coverages, n_samples) / len(n_samples) / max(n_samples)
312
  return coverages, cuc
313
 
314
- def _compute_metric(self, x, y, metric: str = "mse", axis: int = -1):
 
315
  if metric.lower() == "mse":
316
  return np.mean((x - y) ** 2, axis=axis)
317
  elif metric.lower() == "mae":
318
  return np.mean(np.abs(x - y), axis=axis)
319
  elif metric.lower() == "rmse":
320
- return np.sqrt(self._compute_metric(x, y, metric="mse", axis=axis))
321
  else:
322
  raise ValueError("Unknown metric: {}".format(metric))
 
13
  # limitations under the License.
14
  """TODO: Add a description here."""
15
 
16
+ import concurrent.futures
17
  import math
18
  import statistics
19
  from typing import List, Optional, Union
 
135
  cuc_n_calculation: int = 3,
136
  cuc_n_samples: Union[List[int], str] = "auto",
137
  metric: str = "mse",
138
+ num_process: int = 1,
139
  ):
140
  """
141
  Compute the scores of the module given the predictions and references
 
163
  # distance between predictions and references for all example combinations for each features
164
  # shape: (num_generation, num_reference, num_features)
165
  if batch_size is not None:
166
+
167
+ if num_process > 1:
168
+ distance = np.zeros((len(predictions), len(references), predictions.shape[-1]))
169
+
170
+ idxs = [
171
+ (i, j)
172
+ for i in range(0, len(predictions) + batch_size, batch_size)
173
+ for j in range(0, len(references) + batch_size, batch_size)
174
+ ]
175
+ args = [
176
+ (predictions[i : i + batch_size, None], references[None, j : j + batch_size], metric, -2)
177
+ for i, j in idxs
178
+ ]
179
+ with concurrent.futures.ProcessPoolExecutor(max_workers=num_process) as executor:
180
+ results = executor.map(
181
+ self._compute_metric,
182
+ *zip(*args),
183
  )
184
+ for (i, j), d in zip(idxs, results):
185
+ distance[i : i + batch_size, j : j + batch_size] = d
186
+
187
+ else:
188
+ distance = np.zeros((len(predictions), len(references), predictions.shape[-1]))
189
+ # iterate over the predictions and references in batches
190
+ for i in range(0, len(predictions) + batch_size, batch_size):
191
+ for j in range(0, len(references) + batch_size, batch_size):
192
+ d = self._compute_metric(
193
+ predictions[i : i + batch_size, None],
194
+ references[None, j : j + batch_size],
195
+ metric=metric,
196
+ axis=-2,
197
+ )
198
+ distance[i : i + batch_size, j : j + batch_size] = d
199
  else:
200
  distance = self._compute_metric(predictions[:, None], references, metric=metric, axis=1)
201
 
 
335
  cuc = np.trapz(coverages, n_samples) / len(n_samples) / max(n_samples)
336
  return coverages, cuc
337
 
338
+ @staticmethod
339
+ def _compute_metric(x, y, metric: str = "mse", axis: int = -1):
340
  if metric.lower() == "mse":
341
  return np.mean((x - y) ** 2, axis=axis)
342
  elif metric.lower() == "mae":
343
  return np.mean(np.abs(x - y), axis=axis)
344
  elif metric.lower() == "rmse":
345
+ return np.sqrt(np.mean((x - y) ** 2, axis=axis))
346
  else:
347
  raise ValueError("Unknown metric: {}".format(metric))