Spaces:
Sleeping
Sleeping
wandb fix
Browse files- user-friendly-metrics.py +13 -20
user-friendly-metrics.py
CHANGED
@@ -82,16 +82,14 @@ class UserFriendlyMetrics(evaluate.Metric):
|
|
82 |
citation=_CITATION,
|
83 |
inputs_description=_KWARGS_DESCRIPTION,
|
84 |
# This defines the format of each prediction and reference
|
85 |
-
features=datasets.Features(
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
}
|
94 |
-
),
|
95 |
# Additional links to the codebase or references
|
96 |
codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
|
97 |
reference_urls=["http://path.to.reference.url/new_module"],
|
@@ -155,7 +153,7 @@ class UserFriendlyMetrics(evaluate.Metric):
|
|
155 |
|
156 |
predictions, references = payload_to_uf_metrics(payload, model_name=model_name, filter_dict=self.filter_dict)
|
157 |
|
158 |
-
results[model_name]["per_sequence"][seq_name] = self._compute(predictions, references)
|
159 |
|
160 |
# overall
|
161 |
model_payload = Payload(
|
@@ -166,7 +164,7 @@ class UserFriendlyMetrics(evaluate.Metric):
|
|
166 |
)
|
167 |
predictions, references = payload_to_uf_metrics(payload, model_name=model_name, filter_dict=self.filter_dict)
|
168 |
|
169 |
-
results[model_name]["overall"] = self._compute(predictions, references)
|
170 |
|
171 |
return results
|
172 |
|
@@ -220,19 +218,14 @@ class UserFriendlyMetrics(evaluate.Metric):
|
|
220 |
|
221 |
categories = {
|
222 |
"user_friendly_metrics": {
|
223 |
-
"
|
224 |
-
"mostly_tracked_score_0.5",
|
225 |
-
"mostly_tracked_score_0.8",
|
226 |
},
|
227 |
"evaluation_metrics_dev": {
|
228 |
"recall",
|
229 |
},
|
230 |
"user_friendly_metrics_dev": {
|
231 |
-
"
|
232 |
-
|
233 |
-
"mostly_tracked_count_0.8",
|
234 |
-
"unique_obj_count",
|
235 |
-
},
|
236 |
"predictions_summary": {
|
237 |
"tp",
|
238 |
"fn",
|
|
|
82 |
citation=_CITATION,
|
83 |
inputs_description=_KWARGS_DESCRIPTION,
|
84 |
# This defines the format of each prediction and reference
|
85 |
+
features=datasets.Features({
|
86 |
+
"predictions": datasets.Sequence(
|
87 |
+
datasets.Sequence(datasets.Value("float"))
|
88 |
+
),
|
89 |
+
"references": datasets.Features({ "all":
|
90 |
+
datasets.Sequence(datasets.Sequence(datasets.Value("float")))}
|
91 |
+
)
|
92 |
+
}), #couldn't get this to work
|
|
|
|
|
93 |
# Additional links to the codebase or references
|
94 |
codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
|
95 |
reference_urls=["http://path.to.reference.url/new_module"],
|
|
|
153 |
|
154 |
predictions, references = payload_to_uf_metrics(payload, model_name=model_name, filter_dict=self.filter_dict)
|
155 |
|
156 |
+
results[model_name]["per_sequence"][seq_name] = self._compute(predictions=predictions, references=references)
|
157 |
|
158 |
# overall
|
159 |
model_payload = Payload(
|
|
|
164 |
)
|
165 |
predictions, references = payload_to_uf_metrics(payload, model_name=model_name, filter_dict=self.filter_dict)
|
166 |
|
167 |
+
results[model_name]["overall"] = self._compute(predictions=predictions, references=references)
|
168 |
|
169 |
return results
|
170 |
|
|
|
218 |
|
219 |
categories = {
|
220 |
"user_friendly_metrics": {
|
221 |
+
f"mostly_tracked_score_{str(threshold).replace('.', '_')}" for threshold in self.recognition_thresholds
|
|
|
|
|
222 |
},
|
223 |
"evaluation_metrics_dev": {
|
224 |
"recall",
|
225 |
},
|
226 |
"user_friendly_metrics_dev": {
|
227 |
+
f"mostly_tracked_count_{str(threshold).replace('.', '_')}" for threshold in self.recognition_thresholds
|
228 |
+
}.union("unique_object_count"),
|
|
|
|
|
|
|
229 |
"predictions_summary": {
|
230 |
"tp",
|
231 |
"fn",
|