Upload metric.py with huggingface_hub
Browse files
metric.py
CHANGED
|
@@ -1,16 +1,13 @@
|
|
| 1 |
-
from
|
| 2 |
-
from typing import Any, Dict, Generator, Iterable, List, Optional, Union
|
| 3 |
|
| 4 |
-
import datasets
|
| 5 |
import evaluate
|
| 6 |
-
from datasets import Features,
|
| 7 |
|
| 8 |
from .artifact import __file__ as _
|
| 9 |
from .blocks import __file__ as _
|
| 10 |
from .card import __file__ as _
|
| 11 |
from .catalog import __file__ as _
|
| 12 |
from .collections import __file__ as _
|
| 13 |
-
from .common import __file__ as _
|
| 14 |
from .dataclass import __file__ as _
|
| 15 |
from .dict_utils import __file__ as _
|
| 16 |
from .file_utils import __file__ as _
|
|
@@ -23,20 +20,12 @@ from .load import __file__ as _
|
|
| 23 |
from .loaders import __file__ as _
|
| 24 |
from .metrics import __file__ as _
|
| 25 |
from .normalizers import __file__ as _
|
| 26 |
-
from .operator import (
|
| 27 |
-
|
| 28 |
-
SequntialOperator,
|
| 29 |
-
SequntialOperatorInitilizer,
|
| 30 |
-
StreamInitializerOperator,
|
| 31 |
-
)
|
| 32 |
from .operator import __file__ as _
|
| 33 |
-
from .operators import (
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
FlattenInstances,
|
| 37 |
-
MergeStreams,
|
| 38 |
-
SplitByValue,
|
| 39 |
-
)
|
| 40 |
from .operators import __file__ as _
|
| 41 |
from .processors import __file__ as _
|
| 42 |
from .random_utils import __file__ as _
|
|
@@ -44,6 +33,7 @@ from .recipe import __file__ as _
|
|
| 44 |
from .register import __file__ as _
|
| 45 |
from .register import _reset_env_local_catalogs, register_all_artifacts
|
| 46 |
from .renderers import __file__ as _
|
|
|
|
| 47 |
from .schema import __file__ as _
|
| 48 |
from .split_utils import __file__ as _
|
| 49 |
from .splitters import __file__ as _
|
|
@@ -75,12 +65,31 @@ class MultiStreamScoreMean(MultiStreamOperator):
|
|
| 75 |
instance["score"]["global"]["groups_mean_score"] = score
|
| 76 |
yield instance
|
| 77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
def process(self, multi_stream: MultiStream) -> MultiStream:
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
|
|
|
| 81 |
result = {}
|
| 82 |
for stream_name, stream in multi_stream.items():
|
| 83 |
-
result[stream_name] = Stream(
|
|
|
|
|
|
|
| 84 |
|
| 85 |
return MultiStream(result)
|
| 86 |
|
|
@@ -90,20 +99,41 @@ class FromPredictionsAndOriginalData(StreamInitializerOperator):
|
|
| 90 |
for prediction, original in zip(predictions, references):
|
| 91 |
yield {**original, "prediction": prediction}
|
| 92 |
|
| 93 |
-
def process(
|
|
|
|
|
|
|
| 94 |
return MultiStream(
|
| 95 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
)
|
| 97 |
|
| 98 |
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
|
| 102 |
-
class MetricRecipe(
|
|
|
|
|
|
|
| 103 |
def prepare(self):
|
| 104 |
register_all_artifacts()
|
| 105 |
self.steps = [
|
| 106 |
FromPredictionsAndOriginalData(),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
ApplyOperatorsField(
|
| 108 |
inputs_fields=["prediction", "references"],
|
| 109 |
fields_to_treat_as_list=["references"],
|
|
@@ -111,37 +141,48 @@ class MetricRecipe(SequntialOperatorInitilizer):
|
|
| 111 |
default_operators=["processors.to_string_stripped"],
|
| 112 |
),
|
| 113 |
SplitByValue(["group"]),
|
| 114 |
-
|
| 115 |
"metrics",
|
| 116 |
-
|
| 117 |
),
|
| 118 |
MultiStreamScoreMean(),
|
| 119 |
MergeStreams(),
|
| 120 |
]
|
| 121 |
|
| 122 |
|
| 123 |
-
UNITXT_METRIC_SCHEMA = Features(
|
|
|
|
|
|
|
| 124 |
|
| 125 |
|
| 126 |
-
def _compute(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
_reset_env_local_catalogs()
|
| 128 |
register_all_artifacts()
|
| 129 |
-
recipe = MetricRecipe()
|
| 130 |
|
| 131 |
-
multi_stream = recipe(
|
|
|
|
|
|
|
| 132 |
|
| 133 |
if flatten:
|
| 134 |
operator = FlattenInstances()
|
| 135 |
multi_stream = operator(multi_stream)
|
| 136 |
|
| 137 |
stream = multi_stream[split_name]
|
| 138 |
-
|
| 139 |
return list(stream)
|
| 140 |
|
| 141 |
|
| 142 |
# TODO: currently we have two classes with this name. metric.Metric and matrics.Metric...
|
| 143 |
# @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
| 144 |
class Metric(evaluate.Metric):
|
|
|
|
|
|
|
| 145 |
def _info(self):
|
| 146 |
return evaluate.MetricInfo(
|
| 147 |
description="_DESCRIPTION",
|
|
@@ -155,11 +196,16 @@ class Metric(evaluate.Metric):
|
|
| 155 |
],
|
| 156 |
)
|
| 157 |
|
| 158 |
-
def _compute(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
try:
|
| 160 |
-
from unitxt.dataset import
|
| 161 |
-
get_dataset_artifact as get_dataset_artifact_installed
|
| 162 |
-
)
|
| 163 |
|
| 164 |
unitxt_installed = True
|
| 165 |
except ImportError:
|
|
@@ -169,7 +215,17 @@ class Metric(evaluate.Metric):
|
|
| 169 |
from unitxt.metric import _compute as _compute_installed
|
| 170 |
|
| 171 |
return _compute_installed(
|
| 172 |
-
predictions=predictions,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
)
|
| 174 |
-
|
| 175 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Dict, Iterable, List
|
|
|
|
| 2 |
|
|
|
|
| 3 |
import evaluate
|
| 4 |
+
from datasets import Features, Value
|
| 5 |
|
| 6 |
from .artifact import __file__ as _
|
| 7 |
from .blocks import __file__ as _
|
| 8 |
from .card import __file__ as _
|
| 9 |
from .catalog import __file__ as _
|
| 10 |
from .collections import __file__ as _
|
|
|
|
| 11 |
from .dataclass import __file__ as _
|
| 12 |
from .dict_utils import __file__ as _
|
| 13 |
from .file_utils import __file__ as _
|
|
|
|
| 20 |
from .loaders import __file__ as _
|
| 21 |
from .metrics import __file__ as _
|
| 22 |
from .normalizers import __file__ as _
|
| 23 |
+
from .operator import (MultiStreamOperator, SequentialOperator,
|
| 24 |
+
SequentialOperatorInitilizer, StreamInitializerOperator)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
from .operator import __file__ as _
|
| 26 |
+
from .operators import (Apply, ApplyMetric, ApplyOperatorsField,
|
| 27 |
+
ApplyStreamOperatorsField, FlattenInstances,
|
| 28 |
+
MergeStreams, SplitByValue)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
from .operators import __file__ as _
|
| 30 |
from .processors import __file__ as _
|
| 31 |
from .random_utils import __file__ as _
|
|
|
|
| 33 |
from .register import __file__ as _
|
| 34 |
from .register import _reset_env_local_catalogs, register_all_artifacts
|
| 35 |
from .renderers import __file__ as _
|
| 36 |
+
from .schema import UNITXT_DATASET_SCHEMA
|
| 37 |
from .schema import __file__ as _
|
| 38 |
from .split_utils import __file__ as _
|
| 39 |
from .splitters import __file__ as _
|
|
|
|
| 65 |
instance["score"]["global"]["groups_mean_score"] = score
|
| 66 |
yield instance
|
| 67 |
|
| 68 |
+
def spread_results_one_stream(self, stream: Stream):
|
| 69 |
+
for instance in stream:
|
| 70 |
+
instance["score"]["global"]["groups_mean_score"] = instance["score"][
|
| 71 |
+
"global"
|
| 72 |
+
]["score"]
|
| 73 |
+
yield instance
|
| 74 |
+
|
| 75 |
def process(self, multi_stream: MultiStream) -> MultiStream:
|
| 76 |
+
result = {}
|
| 77 |
+
|
| 78 |
+
# optimization in to avoid double calculation of metrics
|
| 79 |
+
# when aggregating results, if there is only one stream.
|
| 80 |
+
if len(multi_stream) == 1:
|
| 81 |
+
for stream_name, stream in multi_stream.items():
|
| 82 |
+
result[stream_name] = Stream(
|
| 83 |
+
self.spread_results_one_stream, gen_kwargs={"stream": stream}
|
| 84 |
+
)
|
| 85 |
+
return MultiStream(result)
|
| 86 |
|
| 87 |
+
mean_score = self.aggegate_results(multi_stream)
|
| 88 |
result = {}
|
| 89 |
for stream_name, stream in multi_stream.items():
|
| 90 |
+
result[stream_name] = Stream(
|
| 91 |
+
self.spread_results, gen_kwargs={"stream": stream, "score": mean_score}
|
| 92 |
+
)
|
| 93 |
|
| 94 |
return MultiStream(result)
|
| 95 |
|
|
|
|
| 99 |
for prediction, original in zip(predictions, references):
|
| 100 |
yield {**original, "prediction": prediction}
|
| 101 |
|
| 102 |
+
def process(
|
| 103 |
+
self, predictions: List[str], references: Iterable, split_name: str = "all"
|
| 104 |
+
) -> MultiStream:
|
| 105 |
return MultiStream(
|
| 106 |
+
{
|
| 107 |
+
split_name: Stream(
|
| 108 |
+
self.zip,
|
| 109 |
+
gen_kwargs={"predictions": predictions, "references": references},
|
| 110 |
+
)
|
| 111 |
+
}
|
| 112 |
)
|
| 113 |
|
| 114 |
|
| 115 |
+
# The additional_inputs field in the schema is defined as
|
| 116 |
+
# Sequence({"key": Value(dtype="string"), "value": Value("string")})
|
| 117 |
+
# When receiving instances from this scheme, the keys and values are returned as two separate
|
| 118 |
+
# lists, and are converted to a dictionary.
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def _from_key_value_pairs(key_value_list: Dict[str, list]) -> Dict[str, str]:
|
| 122 |
+
return dict(zip(key_value_list["key"], key_value_list["value"]))
|
| 123 |
|
| 124 |
|
| 125 |
+
class MetricRecipe(SequentialOperatorInitilizer):
|
| 126 |
+
calc_confidence_intervals: bool = True
|
| 127 |
+
|
| 128 |
def prepare(self):
|
| 129 |
register_all_artifacts()
|
| 130 |
self.steps = [
|
| 131 |
FromPredictionsAndOriginalData(),
|
| 132 |
+
Apply(
|
| 133 |
+
"additional_inputs",
|
| 134 |
+
function=_from_key_value_pairs,
|
| 135 |
+
to_field="additional_inputs",
|
| 136 |
+
),
|
| 137 |
ApplyOperatorsField(
|
| 138 |
inputs_fields=["prediction", "references"],
|
| 139 |
fields_to_treat_as_list=["references"],
|
|
|
|
| 141 |
default_operators=["processors.to_string_stripped"],
|
| 142 |
),
|
| 143 |
SplitByValue(["group"]),
|
| 144 |
+
ApplyMetric(
|
| 145 |
"metrics",
|
| 146 |
+
calc_confidence_intervals=self.calc_confidence_intervals,
|
| 147 |
),
|
| 148 |
MultiStreamScoreMean(),
|
| 149 |
MergeStreams(),
|
| 150 |
]
|
| 151 |
|
| 152 |
|
| 153 |
+
UNITXT_METRIC_SCHEMA = Features(
|
| 154 |
+
{"predictions": Value("string"), "references": dict(UNITXT_DATASET_SCHEMA)}
|
| 155 |
+
)
|
| 156 |
|
| 157 |
|
| 158 |
+
def _compute(
|
| 159 |
+
predictions: List[str],
|
| 160 |
+
references: Iterable,
|
| 161 |
+
flatten: bool = False,
|
| 162 |
+
split_name: str = "all",
|
| 163 |
+
calc_confidence_intervals: bool = True,
|
| 164 |
+
):
|
| 165 |
_reset_env_local_catalogs()
|
| 166 |
register_all_artifacts()
|
| 167 |
+
recipe = MetricRecipe(calc_confidence_intervals=calc_confidence_intervals)
|
| 168 |
|
| 169 |
+
multi_stream = recipe(
|
| 170 |
+
predictions=predictions, references=references, split_name=split_name
|
| 171 |
+
)
|
| 172 |
|
| 173 |
if flatten:
|
| 174 |
operator = FlattenInstances()
|
| 175 |
multi_stream = operator(multi_stream)
|
| 176 |
|
| 177 |
stream = multi_stream[split_name]
|
|
|
|
| 178 |
return list(stream)
|
| 179 |
|
| 180 |
|
| 181 |
# TODO: currently we have two classes with this name. metric.Metric and matrics.Metric...
|
| 182 |
# @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
| 183 |
class Metric(evaluate.Metric):
|
| 184 |
+
calc_confidence_intervals: bool = True
|
| 185 |
+
|
| 186 |
def _info(self):
|
| 187 |
return evaluate.MetricInfo(
|
| 188 |
description="_DESCRIPTION",
|
|
|
|
| 196 |
],
|
| 197 |
)
|
| 198 |
|
| 199 |
+
def _compute(
|
| 200 |
+
self,
|
| 201 |
+
predictions: List[str],
|
| 202 |
+
references: Iterable,
|
| 203 |
+
flatten: bool = False,
|
| 204 |
+
split_name: str = "all",
|
| 205 |
+
):
|
| 206 |
try:
|
| 207 |
+
from unitxt.dataset import \
|
| 208 |
+
get_dataset_artifact as get_dataset_artifact_installed
|
|
|
|
| 209 |
|
| 210 |
unitxt_installed = True
|
| 211 |
except ImportError:
|
|
|
|
| 215 |
from unitxt.metric import _compute as _compute_installed
|
| 216 |
|
| 217 |
return _compute_installed(
|
| 218 |
+
predictions=predictions,
|
| 219 |
+
references=references,
|
| 220 |
+
flatten=flatten,
|
| 221 |
+
split_name=split_name,
|
| 222 |
+
calc_confidence_intervals=self.calc_confidence_intervals,
|
| 223 |
)
|
| 224 |
+
|
| 225 |
+
return _compute(
|
| 226 |
+
predictions=predictions,
|
| 227 |
+
references=references,
|
| 228 |
+
flatten=flatten,
|
| 229 |
+
split_name=split_name,
|
| 230 |
+
calc_confidence_intervals=self.calc_confidence_intervals,
|
| 231 |
+
)
|