vihsd / vihsd.py
phucdev's picture
Remove wrong reference
0046569
from typing import Any
import datasets
import evaluate
from sklearn.metrics import f1_score, accuracy_score
_DESCRIPTION = """
This metric is used to compute the accuracy and F1 score of models on the ViHSD dataset from [A Large-scale Dataset for Hate Speech Detection on Vietnamese Social Media Texts](https://arxiv.org/abs/2103.11528) by Luu et al. (2021).
The ViHSD dataset is a large-scale dataset for hate speech detection on Vietnamese social media texts.
It contains over 30,000 comments, each labeled as CLEAN, OFFENSIVE, or HATE.
The dataset is used to evaluate the quality of hate speech detection models, including deep learning and transformer models.
"""
_KWARGS_DESCRIPTION = """
Args:
predictions: list of predictions to score.
Each translation should be tokenized into a list of tokens.
references: list of lists of references for each translation.
Each reference should be tokenized into a list of tokens.
Returns: depending on the GLUE subset, one or several of:
"accuracy": Accuracy
"micro_f1": Micro averaged F1 score
"macro_f1": Macro averaged F1 score
"weighted_f1": Weighted averaged F1 score
"""
_CITATION = """
@InProceedings{10.1007/978-3-030-79457-6_35,
author="Luu, Son T.
and Nguyen, Kiet Van
and Nguyen, Ngan Luu-Thuy",
editor="Fujita, Hamido
and Selamat, Ali
and Lin, Jerry Chun-Wei
and Ali, Moonis",
title="A Large-Scale Dataset for Hate Speech Detection on Vietnamese Social Media Texts",
booktitle="Advances and Trends in Artificial Intelligence. Artificial Intelligence Practices",
year="2021",
publisher="Springer International Publishing",
address="Cham",
pages="415--426",
abstract="In recent years, Vietnam witnesses the mass development of social network users on different social platforms such as Facebook, Youtube, Instagram, and Tiktok. On social media, hate speech has become a critical problem for social network users. To solve this problem, we introduce the ViHSD - a human-annotated dataset for automatically detecting hate speech on the social network. This dataset contains over 30,000 comments, each comment in the dataset has one of three labels: CLEAN, OFFENSIVE, or HATE. Besides, we introduce the data creation process for annotating and evaluating the quality of the dataset. Finally, we evaluate the dataset by deep learning and transformer models.",
isbn="978-3-030-79457-6"
}
"""
def acc_and_f1(preds, labels):
return {
"accuracy": float(accuracy_score(y_true=labels, y_pred=preds)),
"micro_f1": float(f1_score(y_true=labels, y_pred=preds, average="micro")),
"macro_f1": float(f1_score(y_true=labels, y_pred=preds, average="macro")),
"weighted_f1": float(f1_score(y_true=labels, y_pred=preds, average="weighted")),
}
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class ViHSD(evaluate.Metric):
def _info(self):
return evaluate.MetricInfo(
description=_DESCRIPTION,
citation=_CITATION,
homepage="https://github.com/sonlam1102/vihsd",
inputs_description=_KWARGS_DESCRIPTION,
features=datasets.Features(
{
"predictions": datasets.Value("int64"),
"references": datasets.Value("int64"),
}
),
codebase_urls=["https://github.com/sonlam1102/vihsd"],
reference_urls=[
"https://github.com/sonlam1102/vihsd",
"https://arxiv.org/abs/2103.11528",
],
format="numpy",
)
def _compute(
self,
predictions: Any = None,
references: Any = None,
**kwargs: Any
):
return acc_and_f1(predictions, references)