|
from typing import Any |
|
|
|
import datasets |
|
import evaluate |
|
|
|
from sklearn.metrics import f1_score, accuracy_score |
|
|
|
|
|
_DESCRIPTION = """ |
|
This metric is used to compute the accuracy and F1 score of models on the ViHSD dataset from [A Large-scale Dataset for Hate Speech Detection on Vietnamese Social Media Texts](https://arxiv.org/abs/2103.11528) by Luu et al. (2021). |
|
The ViHSD dataset is a large-scale dataset for hate speech detection on Vietnamese social media texts. |
|
It contains over 30,000 comments, each labeled as CLEAN, OFFENSIVE, or HATE. |
|
The dataset is used to evaluate the quality of hate speech detection models, including deep learning and transformer models. |
|
""" |
|
|
|
_KWARGS_DESCRIPTION = """ |
|
Args: |
|
predictions: list of predictions to score. |
|
Each translation should be tokenized into a list of tokens. |
|
references: list of lists of references for each translation. |
|
Each reference should be tokenized into a list of tokens. |
|
Returns: depending on the GLUE subset, one or several of: |
|
"accuracy": Accuracy |
|
"micro_f1": Micro averaged F1 score |
|
"macro_f1": Macro averaged F1 score |
|
"weighted_f1": Weighted averaged F1 score |
|
""" |
|
|
|
_CITATION = """ |
|
@InProceedings{10.1007/978-3-030-79457-6_35, |
|
author="Luu, Son T. |
|
and Nguyen, Kiet Van |
|
and Nguyen, Ngan Luu-Thuy", |
|
editor="Fujita, Hamido |
|
and Selamat, Ali |
|
and Lin, Jerry Chun-Wei |
|
and Ali, Moonis", |
|
title="A Large-Scale Dataset for Hate Speech Detection on Vietnamese Social Media Texts", |
|
booktitle="Advances and Trends in Artificial Intelligence. Artificial Intelligence Practices", |
|
year="2021", |
|
publisher="Springer International Publishing", |
|
address="Cham", |
|
pages="415--426", |
|
abstract="In recent years, Vietnam witnesses the mass development of social network users on different social platforms such as Facebook, Youtube, Instagram, and Tiktok. On social media, hate speech has become a critical problem for social network users. To solve this problem, we introduce the ViHSD - a human-annotated dataset for automatically detecting hate speech on the social network. This dataset contains over 30,000 comments, each comment in the dataset has one of three labels: CLEAN, OFFENSIVE, or HATE. Besides, we introduce the data creation process for annotating and evaluating the quality of the dataset. Finally, we evaluate the dataset by deep learning and transformer models.", |
|
isbn="978-3-030-79457-6" |
|
} |
|
""" |
|
|
|
|
|
def acc_and_f1(preds, labels): |
|
return { |
|
"accuracy": float(accuracy_score(y_true=labels, y_pred=preds)), |
|
"micro_f1": float(f1_score(y_true=labels, y_pred=preds, average="micro")), |
|
"macro_f1": float(f1_score(y_true=labels, y_pred=preds, average="macro")), |
|
"weighted_f1": float(f1_score(y_true=labels, y_pred=preds, average="weighted")), |
|
} |
|
|
|
|
|
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION) |
|
class ViHSD(evaluate.Metric): |
|
def _info(self): |
|
return evaluate.MetricInfo( |
|
description=_DESCRIPTION, |
|
citation=_CITATION, |
|
homepage="https://github.com/sonlam1102/vihsd", |
|
inputs_description=_KWARGS_DESCRIPTION, |
|
features=datasets.Features( |
|
{ |
|
"predictions": datasets.Value("int64"), |
|
"references": datasets.Value("int64"), |
|
} |
|
), |
|
codebase_urls=["https://github.com/sonlam1102/vihsd"], |
|
reference_urls=[ |
|
"https://github.com/sonlam1102/vihsd", |
|
"https://arxiv.org/abs/2103.11528", |
|
], |
|
format="numpy", |
|
) |
|
|
|
|
|
def _compute( |
|
self, |
|
predictions: Any = None, |
|
references: Any = None, |
|
**kwargs: Any |
|
): |
|
return acc_and_f1(predictions, references) |
|
|