bowdbeg commited on
Commit
8f3e4ca
·
1 Parent(s): 8de55d8

implement metric

Browse files
Files changed (5) hide show
  1. .gitignore +133 -0
  2. README.md +1 -1
  3. __main__.py +37 -0
  4. matching_series.py +98 -26
  5. requirements.txt +2 -1
.gitignore ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .vscode
2
+ data/
3
+ output/
4
+
5
+ # Byte-compiled / optimized / DLL files
6
+ __pycache__/
7
+ *.py[cod]
8
+ *$py.class
9
+
10
+ # C extensions
11
+ *.so
12
+
13
+ # Distribution / packaging
14
+ .Python
15
+ build/
16
+ develop-eggs/
17
+ dist/
18
+ downloads/
19
+ eggs/
20
+ .eggs/
21
+ lib/
22
+ lib64/
23
+ parts/
24
+ sdist/
25
+ var/
26
+ wheels/
27
+ pip-wheel-metadata/
28
+ share/python-wheels/
29
+ *.egg-info/
30
+ .installed.cfg
31
+ *.egg
32
+ MANIFEST
33
+
34
+ # PyInstaller
35
+ # Usually these files are written by a python script from a template
36
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
37
+ *.manifest
38
+ *.spec
39
+
40
+ # Installer logs
41
+ pip-log.txt
42
+ pip-delete-this-directory.txt
43
+
44
+ # Unit test / coverage reports
45
+ htmlcov/
46
+ .tox/
47
+ .nox/
48
+ .coverage
49
+ .coverage.*
50
+ .cache
51
+ nosetests.xml
52
+ coverage.xml
53
+ *.cover
54
+ *.py,cover
55
+ .hypothesis/
56
+ .pytest_cache/
57
+
58
+ # Translations
59
+ *.mo
60
+ *.pot
61
+
62
+ # Django stuff:
63
+ *.log
64
+ local_settings.py
65
+ db.sqlite3
66
+ db.sqlite3-journal
67
+
68
+ # Flask stuff:
69
+ instance/
70
+ .webassets-cache
71
+
72
+ # Scrapy stuff:
73
+ .scrapy
74
+
75
+ # Sphinx documentation
76
+ docs/_build/
77
+
78
+ # PyBuilder
79
+ target/
80
+
81
+ # Jupyter Notebook
82
+ .ipynb_checkpoints
83
+
84
+ # IPython
85
+ profile_default/
86
+ ipython_config.py
87
+
88
+ # pyenv
89
+ .python-version
90
+
91
+ # pipenv
92
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
94
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
95
+ # install all needed dependencies.
96
+ #Pipfile.lock
97
+
98
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
99
+ __pypackages__/
100
+
101
+ # Celery stuff
102
+ celerybeat-schedule
103
+ celerybeat.pid
104
+
105
+ # SageMath parsed files
106
+ *.sage.py
107
+
108
+ # Environments
109
+ .env
110
+ .venv
111
+ env/
112
+ venv/
113
+ ENV/
114
+ env.bak/
115
+ venv.bak/
116
+
117
+ # Spyder project settings
118
+ .spyderproject
119
+ .spyproject
120
+
121
+ # Rope project settings
122
+ .ropeproject
123
+
124
+ # mkdocs documentation
125
+ /site
126
+
127
+ # mypy
128
+ .mypy_cache/
129
+ .dmypy.json
130
+ dmypy.json
131
+
132
+ # Pyre type checker
133
+ .pyre/
README.md CHANGED
@@ -3,7 +3,7 @@ title: matching_series
3
  tags:
4
  - evaluate
5
  - metric
6
- description: 'TODO: add a description here'
7
  sdk: gradio
8
  sdk_version: 4.36.1
9
  app_file: app.py
 
3
  tags:
4
  - evaluate
5
  - metric
6
+ description: "Matching-based time-series generation metric"
7
  sdk: gradio
8
  sdk_version: 4.36.1
9
  app_file: app.py
__main__.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ from argparse import ArgumentParser
4
+
5
+ import evaluate
6
+ import numpy as np
7
+
8
+ logger = logging.getLogger(__name__)
9
+ parser = ArgumentParser(
10
+ description="Compute the matching series score between two time series freezed in a numpy array"
11
+ )
12
+ parser.add_argument("predictions", type=str, help="Path to the numpy array containing the predictions")
13
+ parser.add_argument("references", type=str, help="Path to the numpy array containing the references")
14
+ parser.add_argument("--output", type=str, help="Path to the output file")
15
+ parser.add_argument("--batch_size", type=int, help="Batch size to use for the computation")
16
+ args = parser.parse_args()
17
+
18
+ if not args.predictions or not args.references:
19
+ raise ValueError("You must provide the path to the predictions and references numpy arrays")
20
+
21
+ predictions = np.load(args.predictions)
22
+ references = np.load(args.references)
23
+
24
+
25
+ logger.info(f"predictions shape: {predictions.shape}")
26
+ logger.info(f"references shape: {references.shape}")
27
+
28
+ import matching_series
29
+
30
+ metric = matching_series.matching_series()
31
+ # metric = evaluate.load("matching_series.py")
32
+ results = metric.compute(predictions=predictions, references=references, batch_size=args.batch_size)
33
+
34
+ print(results)
35
+ if args.output:
36
+ with open(args.output, "w") as f:
37
+ json.dump(results, f)
matching_series.py CHANGED
@@ -13,9 +13,10 @@
13
  # limitations under the License.
14
  """TODO: Add a description here."""
15
 
16
- import evaluate
17
  import datasets
18
-
 
 
19
 
20
  # TODO: Add BibTeX citation
21
  _CITATION = """\
@@ -36,26 +37,21 @@ This new module is designed to solve this great ML task and is crafted with a lo
36
  _KWARGS_DESCRIPTION = """
37
  Calculates how good are predictions given some references, using certain scores
38
  Args:
39
- predictions: list of predictions to score. Each predictions
40
- should be a string with tokens separated by spaces.
41
- references: list of reference for each prediction. Each
42
- reference should be a string with tokens separated by spaces.
43
  Returns:
44
- accuracy: description of the first score,
45
- another_score: description of the second score,
46
  Examples:
47
  Examples should be written in doctest format, and should illustrate how
48
  to use the function.
49
 
50
- >>> my_new_module = evaluate.load("my_new_module")
51
- >>> results = my_new_module.compute(references=[0, 1], predictions=[0, 1])
52
  >>> print(results)
53
- {'accuracy': 1.0}
54
  """
55
 
56
- # TODO: Define external resources urls if needed
57
- BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
58
-
59
 
60
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
61
  class matching_series(evaluate.Metric):
@@ -70,26 +66,102 @@ class matching_series(evaluate.Metric):
70
  citation=_CITATION,
71
  inputs_description=_KWARGS_DESCRIPTION,
72
  # This defines the format of each prediction and reference
73
- features=datasets.Features({
74
- 'predictions': datasets.Value('int64'),
75
- 'references': datasets.Value('int64'),
76
- }),
 
 
77
  # Homepage of the module for documentation
78
  homepage="http://module.homepage",
79
  # Additional links to the codebase or references
80
  codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
81
- reference_urls=["http://path.to.reference.url/new_module"]
82
  )
83
 
84
  def _download_and_prepare(self, dl_manager):
85
  """Optional: download external resources useful to compute the scores"""
86
- # TODO: Download external resources if needed
87
  pass
88
 
89
- def _compute(self, predictions, references):
90
- """Returns the scores"""
91
- # TODO: Compute the different scores of the module
92
- accuracy = sum(i == j for i, j in zip(predictions, references)) / len(predictions)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  return {
94
- "accuracy": accuracy,
95
- }
 
 
 
 
 
 
 
 
13
  # limitations under the License.
14
  """TODO: Add a description here."""
15
 
 
16
  import datasets
17
+ import evaluate
18
+ import numpy as np
19
+ import torch
20
 
21
  # TODO: Add BibTeX citation
22
  _CITATION = """\
 
37
  _KWARGS_DESCRIPTION = """
38
  Calculates how good are predictions given some references, using certain scores
39
  Args:
40
+ predictions: list of generated time series.
41
+ shape: (num_generation, num_timesteps, num_features)
42
+ references: list of reference
43
+ shape: (num_reference, num_timesteps, num_features)
44
  Returns:
 
 
45
  Examples:
46
  Examples should be written in doctest format, and should illustrate how
47
  to use the function.
48
 
49
+ >>> my_new_module = evaluate.load("bowdbeg/matching_series")
50
+ >>> results = my_new_module.compute(references=[[[0.0, 1.0]]], predictions=[[[0.0, 1.0]]])
51
  >>> print(results)
52
+ {'matchin': 1.0}
53
  """
54
 
 
 
 
55
 
56
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
57
  class matching_series(evaluate.Metric):
 
66
  citation=_CITATION,
67
  inputs_description=_KWARGS_DESCRIPTION,
68
  # This defines the format of each prediction and reference
69
+ features=datasets.Features(
70
+ {
71
+ "predictions": datasets.Sequence(datasets.Sequence(datasets.Value("float"))),
72
+ "references": datasets.Sequence(datasets.Sequence(datasets.Value("float"))),
73
+ }
74
+ ),
75
  # Homepage of the module for documentation
76
  homepage="http://module.homepage",
77
  # Additional links to the codebase or references
78
  codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
79
+ reference_urls=["http://path.to.reference.url/new_module"],
80
  )
81
 
82
  def _download_and_prepare(self, dl_manager):
83
  """Optional: download external resources useful to compute the scores"""
 
84
  pass
85
 
86
+ def _compute(self, predictions: list | np.ndarray, references: list | np.ndarray, batch_size: None | int = None):
87
+ """
88
+ Compute the scores of the module given the predictions and references
89
+ Args:
90
+ predictions: list of generated time series.
91
+ shape: (num_generation, num_timesteps, num_features)
92
+ references: list of reference
93
+ shape: (num_reference, num_timesteps, num_features)
94
+ batch_size: batch size to use for the computation. If None, the whole dataset is processed at once.
95
+ Returns:
96
+ """
97
+ predictions = np.array(predictions)
98
+ references = np.array(references)
99
+ if predictions.shape[1:] != references.shape[1:]:
100
+ raise ValueError(
101
+ "The number of features in the predictions and references should be the same. predictions: {}, references: {}".format(
102
+ predictions.shape[1:], references.shape[1:]
103
+ )
104
+ )
105
+
106
+ # at first, convert the inputs to numpy arrays
107
+
108
+ # MSE between predictions and references for all example combinations for each features
109
+ # shape: (num_generation, num_reference, num_features)
110
+ if batch_size is not None:
111
+ mse = np.zeros((len(predictions), len(references), predictions.shape[-1]))
112
+ # iterate over the predictions and references in batches
113
+ for i in range(0, len(predictions) + batch_size, batch_size):
114
+ for j in range(0, len(references) + batch_size, batch_size):
115
+ mse[i : i + batch_size, j : j + batch_size] = np.mean(
116
+ (predictions[i : i + batch_size, None] - references[None, j : j + batch_size]) ** 2, axis=-2
117
+ )
118
+ else:
119
+ mse = np.mean((predictions[:, None] - references) ** 2, axis=1)
120
+
121
+ index_mse = mse.diagonal(axis1=0, axis2=1).mean()
122
+
123
+ # matching scores
124
+ mse_mean = mse.mean(axis=-1)
125
+ # best match for each generated time series
126
+ # shape: (num_generation,)
127
+ best_match = np.argmin(mse_mean, axis=-1)
128
+
129
+ # matching mse
130
+ # shape: (num_generation,)
131
+ matching_mse = mse_mean[np.arange(len(best_match)), best_match].mean()
132
+
133
+ # best match for each reference time series
134
+ # shape: (num_reference,)
135
+ best_match_inv = np.argmin(mse_mean, axis=0)
136
+ covered_mse = mse_mean[best_match_inv, np.arange(len(best_match_inv))].mean()
137
+
138
+ harmonic_mean = 2 / (1 / matching_mse + 1 / covered_mse)
139
+
140
+ # take matching for each feature and compute metrics for them
141
+ matching_mse_features = []
142
+ covered_mse_features = []
143
+ harmonic_mean_features = []
144
+ index_mse_features = []
145
+ for f in range(predictions.shape[-1]):
146
+ mse_f = mse[:, :, f]
147
+ index_mse_f = mse_f.diagonal(axis1=0, axis2=1).mean()
148
+ best_match_f = np.argmin(mse_f, axis=-1)
149
+ matching_mse_f = mse_f[np.arange(len(best_match_f)), best_match_f].mean()
150
+ best_match_inv_f = np.argmin(mse_f, axis=0)
151
+ covered_mse_f = mse_f[best_match_inv_f, np.arange(len(best_match_inv_f))].mean()
152
+ harmonic_mean_f = 2 / (1 / matching_mse_f + 1 / covered_mse_f)
153
+ matching_mse_features.append(matching_mse_f)
154
+ covered_mse_features.append(covered_mse_f)
155
+ harmonic_mean_features.append(harmonic_mean_f)
156
+ index_mse_features.append(index_mse_f)
157
+
158
  return {
159
+ "matching_mse": matching_mse,
160
+ "harmonic_mean": harmonic_mean,
161
+ "covered_mse": covered_mse,
162
+ "index_mse": index_mse,
163
+ "matching_mse_features": matching_mse_features,
164
+ "harmonic_mean_features": harmonic_mean_features,
165
+ "covered_mse_features": covered_mse_features,
166
+ "index_mse_features": index_mse_features,
167
+ }
requirements.txt CHANGED
@@ -1 +1,2 @@
1
- git+https://github.com/huggingface/evaluate@main
 
 
1
+ git+https://github.com/huggingface/evaluate@main
2
+ numpy