Spaces:

Helium7
/

topk_accuracy

Runtime error

App Files Files Community

topk_accuracy / topk_accuracy.py

Helium7

init

acc3357 over 1 year ago

raw

history blame contribute delete

3.57 kB

	# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	"""Top-k Accuracy metric."""

	import datasets
	from sklearn.metrics import top_k_accuracy_score

	import evaluate


	_DESCRIPTION = """
	Accuracy is the proportion of correct predictions among the total number of cases processed. It can be computed with:
	Accuracy = (TP + TN) / (TP + TN + FP + FN)
	Where:
	TP: True positive
	TN: True negative
	FP: False positive
	FN: False negative

	Top-k Accuracy is the proportion of correct predictions among the top k predictions.
	"""


	_KWARGS_DESCRIPTION = """
	Args:
	predictions (`list` of `list` of `float`): Model predictions.
	references (`list` of `int`): Ground truth labels.
	normalize (`boolean`): If set to False, returns the number of correctly classified samples. Otherwise, returns the fraction of correctly classified samples. Defaults to True.
	sample_weight (`list` of `float`): Sample weights Defaults to None.

	Returns:
	accuracy (`float` or `int`): Top-k accuracy score. Minimum possible value is 0. Maximum possible value is 1.0, or the number of examples input, if `normalize` is set to `True`.. A higher score means higher accuracy.

	Examples:

	>>> import numpy as np
	>>> from sklearn.metrics import top_k_accuracy_score
	>>> y_true = np.array([0, 1, 2, 2])
	>>> y_score = np.array([[0.5, 0.2, 0.2], # 0 is in top 2
	... [0.3, 0.4, 0.2], # 1 is in top 2
	... [0.2, 0.4, 0.3], # 2 is in top 2
	... [0.7, 0.2, 0.1]]) # 2 isn't in top 2
	>>> top_k_accuracy_score(y_true, y_score, k=2)
	0.75
	>>> # Not normalizing gives the number of "correctly" classified samples
	>>> top_k_accuracy_score(y_true, y_score, k=2, normalize=False)
	3
	"""

	_CITATION = """
	"""


	@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
	class Accuracy(evaluate.Metric):
	def _info(self):
	return evaluate.MetricInfo(
	description=_DESCRIPTION,
	inputs_description=_KWARGS_DESCRIPTION,
	citation=_CITATION,
	features=datasets.Features(
	{
	"predictions": datasets.Sequence(datasets.Sequence(datasets.Value("float32"))),
	"references": datasets.Sequence(datasets.Value("int32")),
	}
	if self.config_name == "multilabel"
	else {
	"predictions": datasets.Sequence(datasets.Value("float32")),
	"references": datasets.Value("int32"),
	}
	),
	reference_urls=["https://scikit-learn.org/stable/modules/generated/sklearn.metrics.top_k_accuracy_score.html"],
	)

	def _compute(self, predictions, references, normalize=True, sample_weight=None):
	return {
	"accuracy": float(
	top_k_accuracy_score(references, predictions, normalize=normalize, sample_weight=sample_weight)
	)
	}