Spaces:

bowdbeg
/

matching_series

Running

App Files Files Community

bowdbeg commited on Jun 20, 2024

Commit

cd855de

1 Parent(s): 774aee4

implement acceleration by multiprocessing

Browse files

Files changed (2) hide show

__main__.py +10 -1
matching_series.py +37 -12

__main__.py CHANGED Viewed

@@ -1,10 +1,12 @@
 import json
 import logging
 from argparse import ArgumentParser
 import evaluate
 import numpy as np
 logger = logging.getLogger(__name__)
 parser = ArgumentParser(
     description="Compute the matching series score between two time series freezed in a numpy array"
@@ -13,6 +15,7 @@ parser.add_argument("predictions", type=str, help="Path to the numpy array conta
 parser.add_argument("references", type=str, help="Path to the numpy array containing the references")
 parser.add_argument("--output", type=str, help="Path to the output file")
 parser.add_argument("--batch_size", type=int, help="Batch size to use for the computation")
 args = parser.parse_args()
 if not args.predictions or not args.references:
@@ -21,15 +24,21 @@ if not args.predictions or not args.references:
 predictions = np.load(args.predictions)
 references = np.load(args.references)
 logger.info(f"predictions shape: {predictions.shape}")
 logger.info(f"references shape: {references.shape}")
 import matching_series
 metric = matching_series.matching_series()
 # metric = evaluate.load("matching_series.py")
-results = metric.compute(predictions=predictions, references=references, batch_size=args.batch_size)
 print(results)
 if args.output:

 import json
 import logging
+import time
 from argparse import ArgumentParser
 import evaluate
 import numpy as np
+logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 parser = ArgumentParser(
     description="Compute the matching series score between two time series freezed in a numpy array"
 parser.add_argument("references", type=str, help="Path to the numpy array containing the references")
 parser.add_argument("--output", type=str, help="Path to the output file")
 parser.add_argument("--batch_size", type=int, help="Batch size to use for the computation")
+parser.add_argument("--num_process", type=int, help="Batch size to use for the computation", default=1)
 args = parser.parse_args()
 if not args.predictions or not args.references:
 predictions = np.load(args.predictions)
 references = np.load(args.references)
+predictions = predictions[:1000]
+references = references[:1000]
 logger.info(f"predictions shape: {predictions.shape}")
 logger.info(f"references shape: {references.shape}")
 import matching_series
+s = time.time()
 metric = matching_series.matching_series()
 # metric = evaluate.load("matching_series.py")
+results = metric.compute(
+    predictions=predictions, references=references, batch_size=args.batch_size, num_process=args.num_process
+)
+logger.info(f"Time taken: {time.time() - s}")
 print(results)
 if args.output:

matching_series.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 """TODO: Add a description here."""
 import math
 import statistics
 from typing import List, Optional, Union
@@ -134,6 +135,7 @@ class matching_series(evaluate.Metric):
         cuc_n_calculation: int = 3,
         cuc_n_samples: Union[List[int], str] = "auto",
         metric: str = "mse",
     ):
         """
         Compute the scores of the module given the predictions and references
@@ -161,17 +163,39 @@ class matching_series(evaluate.Metric):
         # distance between predictions and references for all example combinations for each features
         # shape: (num_generation, num_reference, num_features)
         if batch_size is not None:
-            distance = np.zeros((len(predictions), len(references), predictions.shape[-1]))
-            # iterate over the predictions and references in batches
-            for i in range(0, len(predictions) + batch_size, batch_size):
-                for j in range(0, len(references) + batch_size, batch_size):
-                    d = self._compute_metric(
-                        predictions[i : i + batch_size, None],
-                        references[None, j : j + batch_size],
-                        metric=metric,
-                        axis=-2,
                     )
-                    distance[i : i + batch_size, j : j + batch_size] = d
         else:
             distance = self._compute_metric(predictions[:, None], references, metric=metric, axis=1)
@@ -311,12 +335,13 @@ class matching_series(evaluate.Metric):
         cuc = np.trapz(coverages, n_samples) / len(n_samples) / max(n_samples)
         return coverages, cuc
-    def _compute_metric(self, x, y, metric: str = "mse", axis: int = -1):
         if metric.lower() == "mse":
             return np.mean((x - y) ** 2, axis=axis)
         elif metric.lower() == "mae":
             return np.mean(np.abs(x - y), axis=axis)
         elif metric.lower() == "rmse":
-            return np.sqrt(self._compute_metric(x, y, metric="mse", axis=axis))
         else:
             raise ValueError("Unknown metric: {}".format(metric))

 # limitations under the License.
 """TODO: Add a description here."""
+import concurrent.futures
 import math
 import statistics
 from typing import List, Optional, Union
         cuc_n_calculation: int = 3,
         cuc_n_samples: Union[List[int], str] = "auto",
         metric: str = "mse",
+        num_process: int = 1,
     ):
         """
         Compute the scores of the module given the predictions and references
         # distance between predictions and references for all example combinations for each features
         # shape: (num_generation, num_reference, num_features)
         if batch_size is not None:
+            if num_process > 1:
+                distance = np.zeros((len(predictions), len(references), predictions.shape[-1]))
+                idxs = [
+                    (i, j)
+                    for i in range(0, len(predictions) + batch_size, batch_size)
+                    for j in range(0, len(references) + batch_size, batch_size)
+                ]
+                args = [
+                    (predictions[i : i + batch_size, None], references[None, j : j + batch_size], metric, -2)
+                    for i, j in idxs
+                ]
+                with concurrent.futures.ProcessPoolExecutor(max_workers=num_process) as executor:
+                    results = executor.map(
+                        self._compute_metric,
+                        *zip(*args),
                     )
+                    for (i, j), d in zip(idxs, results):
+                        distance[i : i + batch_size, j : j + batch_size] = d
+            else:
+                distance = np.zeros((len(predictions), len(references), predictions.shape[-1]))
+                # iterate over the predictions and references in batches
+                for i in range(0, len(predictions) + batch_size, batch_size):
+                    for j in range(0, len(references) + batch_size, batch_size):
+                        d = self._compute_metric(
+                            predictions[i : i + batch_size, None],
+                            references[None, j : j + batch_size],
+                            metric=metric,
+                            axis=-2,
+                        )
+                        distance[i : i + batch_size, j : j + batch_size] = d
         else:
             distance = self._compute_metric(predictions[:, None], references, metric=metric, axis=1)
         cuc = np.trapz(coverages, n_samples) / len(n_samples) / max(n_samples)
         return coverages, cuc
+    @staticmethod
+    def _compute_metric(x, y, metric: str = "mse", axis: int = -1):
         if metric.lower() == "mse":
             return np.mean((x - y) ** 2, axis=axis)
         elif metric.lower() == "mae":
             return np.mean(np.abs(x - y), axis=axis)
         elif metric.lower() == "rmse":
+            return np.sqrt(np.mean((x - y) ** 2, axis=axis))
         else:
             raise ValueError("Unknown metric: {}".format(metric))