Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
説明の修正
Browse files- src/about.py +23 -39
src/about.py
CHANGED
|
@@ -51,16 +51,16 @@ class Tasks(Enum):
|
|
| 51 |
alt_j_to_e_comet_wmt22 = Task("scores", "alt-j-to-e_comet_wmt22", "ALT J to E COMET WMT22 ⭐", TaskType.MT)
|
| 52 |
chabsa_set_f1 = Task("scores", "chabsa_set_f1", "ChABSA ⭐", TaskType.EL)
|
| 53 |
commonsensemoralja_exact_match = Task(
|
| 54 |
-
"scores", "commonsensemoralja_exact_match", "CommonSenseMoralJA", TaskType.MC
|
| 55 |
)
|
| 56 |
-
jamp_exact_match = Task("scores", "jamp_exact_match", "JAMP", TaskType.NLI)
|
| 57 |
-
janli_exact_match = Task("scores", "janli_exact_match", "JANLI", TaskType.NLI)
|
| 58 |
-
jcommonsenseqa_exact_match = Task("scores", "jcommonsenseqa_exact_match", "JCommonSenseQA", TaskType.MC)
|
| 59 |
-
jemhopqa_char_f1 = Task("scores", "jemhopqa_char_f1", "JEMHopQA", TaskType.QA)
|
| 60 |
-
jmmlu_exact_match = Task("scores", "jmmlu_exact_match", "JMMLU", TaskType.HE)
|
| 61 |
-
jnli_exact_match = Task("scores", "jnli_exact_match", "JNLI", TaskType.NLI)
|
| 62 |
-
jsem_exact_match = Task("scores", "jsem_exact_match", "JSEM", TaskType.NLI)
|
| 63 |
-
jsick_exact_match = Task("scores", "jsick_exact_match", "JSICK", TaskType.NLI)
|
| 64 |
jsquad_char_f1 = Task("scores", "jsquad_char_f1", "JSquad ⭐", TaskType.RC)
|
| 65 |
jsts_pearson = Task(
|
| 66 |
"scores", "jsts_pearson", "JSTS (Pearson)", TaskType.STS
|
|
@@ -68,18 +68,18 @@ class Tasks(Enum):
|
|
| 68 |
jsts_spearman = Task(
|
| 69 |
"scores", "jsts_spearman", "JSTS (Spearman)", TaskType.STS
|
| 70 |
) # Semantic Textual Similarity - 意味的類似度
|
| 71 |
-
kuci_exact_match = Task("scores", "kuci_exact_match", "KUCI", TaskType.MC)
|
| 72 |
mawps_exact_match = Task("scores", "mawps_exact_match", "MAWPS ⭐", TaskType.MR)
|
| 73 |
mbpp_code_exec = Task("scores", "mbpp_code_exec", "MBPP (exec) ⭐", TaskType.CG)
|
| 74 |
mbpp_pylint_check = Task("scores", "mbpp_pylint_check", "MBPP (pylint)", TaskType.CG)
|
| 75 |
-
mmlu_en_exact_match = Task("scores", "mmlu_en_exact_match", "MMLU", TaskType.HE)
|
| 76 |
-
niilc_char_f1 = Task("scores", "niilc_char_f1", "NIILC", TaskType.QA)
|
| 77 |
-
aio_char_f1 = Task("scores", "aio_char_f1", "JAQKET", TaskType.QA)
|
| 78 |
-
wiki_coreference_set_f1 = Task("scores", "wiki_coreference_set_f1", "Wiki Coreference", TaskType.FA)
|
| 79 |
-
wiki_dependency_set_f1 = Task("scores", "wiki_dependency_set_f1", "Wiki Dependency", TaskType.FA)
|
| 80 |
-
wiki_ner_set_f1 = Task("scores", "wiki_ner_set_f1", "Wiki NER", TaskType.FA)
|
| 81 |
-
wiki_pas_set_f1 = Task("scores", "wiki_pas_set_f1", "Wiki PAS", TaskType.FA)
|
| 82 |
-
wiki_reading_char_f1 = Task("scores", "wiki_reading_char_f1", "Wiki Reading", TaskType.FA)
|
| 83 |
wikicorpus_e_to_j_bert_score_ja_f1 = Task(
|
| 84 |
"scores", "wikicorpus-e-to-j_bert_score_ja_f1", "WikiCorpus E to J BERT Score", TaskType.MT
|
| 85 |
)
|
|
@@ -221,21 +221,13 @@ This task is supported by llm-jp-eval, but it is not included in the evaluation
|
|
| 221 |
To reproduce our results, please follow the instructions of the evalution tool, **llm-jp-eval** available in [Japanese](https://github.com/llm-jp/llm-jp-eval/blob/main/README.md) and in [English](https://github.com/llm-jp/llm-jp-eval/blob/main/README_en.md).
|
| 222 |
|
| 223 |
## Average Score Calculation
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
Tasks included in average calculation:
|
| 227 |
-
- RC: JSQuAD ⭐
|
| 228 |
-
- EL: ChABSA ⭐
|
| 229 |
-
- MR: MAWPS ⭐
|
| 230 |
-
- MT: ALT E to J COMET WMT22 ⭐, ALT J to E COMET WMT22 ⭐, WikiCorpus E to J COMET WMT22 ⭐, WikiCorpus J to E COMET WMT22 ⭐
|
| 231 |
-
- CG: MBPP (exec) ⭐
|
| 232 |
-
- SUM: XL-Sum ROUGE2 ⭐
|
| 233 |
|
| 234 |
"""
|
| 235 |
|
| 236 |
LLM_BENCHMARKS_TEXT_JA = """
|
| 237 |
## 仕組み
|
| 238 |
-
📈
|
| 239 |
|
| 240 |
**NLI(自然言語推論)**
|
| 241 |
|
|
@@ -293,7 +285,7 @@ LLM_BENCHMARKS_TEXT_JA = """
|
|
| 293 |
|
| 294 |
**STS(意味的テキスト類似度)**
|
| 295 |
|
| 296 |
-
このタスクはllm-jp-eval
|
| 297 |
|
| 298 |
* `JSTS`、STS(Semantic Textual Similarity)の日本語版(JGLUEの一部)[ソース](https://github.com/yahoojapan/JGLUE)(ライセンス CC BY-SA 4.0)
|
| 299 |
|
|
@@ -312,18 +304,10 @@ LLM_BENCHMARKS_TEXT_JA = """
|
|
| 312 |
* `XL-Sum`、44言語の大規模多言語抽象型要約データセットの日本語部分 [ソース](https://github.com/csebuetnlp/xl-sum)(ライセンス CC BY-NC-SA 4.0、非商用ライセンスのため、このデータセットは使用しません。ライセンスと利用規約に明確に同意した場合を除きます)
|
| 313 |
|
| 314 |
## 再現性
|
| 315 |
-
|
| 316 |
|
| 317 |
## 平均スコアの計算について
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
平均値計算に含まれるタスク:
|
| 321 |
-
- RC:JSQuAD ⭐
|
| 322 |
-
- EL:ChABSA ⭐
|
| 323 |
-
- MR:MAWPS ⭐
|
| 324 |
-
- MT:ALT E to J COMET WMT22 ⭐、ALT J to E COMET WMT22 ⭐、WikiCorpus E to J COMET WMT22 ⭐、WikiCorpus J to E COMET WMT22 ⭐
|
| 325 |
-
- CG:MBPP (exec) ⭐
|
| 326 |
-
- SUM:XL-Sum ROUGE2 ⭐
|
| 327 |
"""
|
| 328 |
|
| 329 |
|
|
|
|
| 51 |
alt_j_to_e_comet_wmt22 = Task("scores", "alt-j-to-e_comet_wmt22", "ALT J to E COMET WMT22 ⭐", TaskType.MT)
|
| 52 |
chabsa_set_f1 = Task("scores", "chabsa_set_f1", "ChABSA ⭐", TaskType.EL)
|
| 53 |
commonsensemoralja_exact_match = Task(
|
| 54 |
+
"scores", "commonsensemoralja_exact_match", "CommonSenseMoralJA ⭐", TaskType.MC
|
| 55 |
)
|
| 56 |
+
jamp_exact_match = Task("scores", "jamp_exact_match", "JAMP ⭐", TaskType.NLI)
|
| 57 |
+
janli_exact_match = Task("scores", "janli_exact_match", "JANLI ⭐", TaskType.NLI)
|
| 58 |
+
jcommonsenseqa_exact_match = Task("scores", "jcommonsenseqa_exact_match", "JCommonSenseQA ⭐", TaskType.MC)
|
| 59 |
+
jemhopqa_char_f1 = Task("scores", "jemhopqa_char_f1", "JEMHopQA ⭐", TaskType.QA)
|
| 60 |
+
jmmlu_exact_match = Task("scores", "jmmlu_exact_match", "JMMLU ⭐", TaskType.HE)
|
| 61 |
+
jnli_exact_match = Task("scores", "jnli_exact_match", "JNLI ⭐", TaskType.NLI)
|
| 62 |
+
jsem_exact_match = Task("scores", "jsem_exact_match", "JSEM ⭐", TaskType.NLI)
|
| 63 |
+
jsick_exact_match = Task("scores", "jsick_exact_match", "JSICK ⭐", TaskType.NLI)
|
| 64 |
jsquad_char_f1 = Task("scores", "jsquad_char_f1", "JSquad ⭐", TaskType.RC)
|
| 65 |
jsts_pearson = Task(
|
| 66 |
"scores", "jsts_pearson", "JSTS (Pearson)", TaskType.STS
|
|
|
|
| 68 |
jsts_spearman = Task(
|
| 69 |
"scores", "jsts_spearman", "JSTS (Spearman)", TaskType.STS
|
| 70 |
) # Semantic Textual Similarity - 意味的類似度
|
| 71 |
+
kuci_exact_match = Task("scores", "kuci_exact_match", "KUCI ⭐", TaskType.MC)
|
| 72 |
mawps_exact_match = Task("scores", "mawps_exact_match", "MAWPS ⭐", TaskType.MR)
|
| 73 |
mbpp_code_exec = Task("scores", "mbpp_code_exec", "MBPP (exec) ⭐", TaskType.CG)
|
| 74 |
mbpp_pylint_check = Task("scores", "mbpp_pylint_check", "MBPP (pylint)", TaskType.CG)
|
| 75 |
+
mmlu_en_exact_match = Task("scores", "mmlu_en_exact_match", "MMLU ⭐", TaskType.HE)
|
| 76 |
+
niilc_char_f1 = Task("scores", "niilc_char_f1", "NIILC ⭐", TaskType.QA)
|
| 77 |
+
aio_char_f1 = Task("scores", "aio_char_f1", "JAQKET ⭐", TaskType.QA)
|
| 78 |
+
wiki_coreference_set_f1 = Task("scores", "wiki_coreference_set_f1", "Wiki Coreference ⭐", TaskType.FA)
|
| 79 |
+
wiki_dependency_set_f1 = Task("scores", "wiki_dependency_set_f1", "Wiki Dependency ⭐", TaskType.FA)
|
| 80 |
+
wiki_ner_set_f1 = Task("scores", "wiki_ner_set_f1", "Wiki NER ⭐", TaskType.FA)
|
| 81 |
+
wiki_pas_set_f1 = Task("scores", "wiki_pas_set_f1", "Wiki PAS ⭐", TaskType.FA)
|
| 82 |
+
wiki_reading_char_f1 = Task("scores", "wiki_reading_char_f1", "Wiki Reading ⭐", TaskType.FA)
|
| 83 |
wikicorpus_e_to_j_bert_score_ja_f1 = Task(
|
| 84 |
"scores", "wikicorpus-e-to-j_bert_score_ja_f1", "WikiCorpus E to J BERT Score", TaskType.MT
|
| 85 |
)
|
|
|
|
| 221 |
To reproduce our results, please follow the instructions of the evalution tool, **llm-jp-eval** available in [Japanese](https://github.com/llm-jp/llm-jp-eval/blob/main/README.md) and in [English](https://github.com/llm-jp/llm-jp-eval/blob/main/README_en.md).
|
| 222 |
|
| 223 |
## Average Score Calculation
|
| 224 |
+
The calculation of the average score (AVG) includes only the scores marked with a ⭐.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
|
| 226 |
"""
|
| 227 |
|
| 228 |
LLM_BENCHMARKS_TEXT_JA = """
|
| 229 |
## 仕組み
|
| 230 |
+
📈 評価ツール [llm-jp-eval](https://github.com/llm-jp/llm-jp-eval) を活用し、16種類のタスクで日本語の大規模言語モデルを評価します。このツールは、様々な評価タスクで日本語LLMを評価するための統一的なフレームワークです。
|
| 231 |
|
| 232 |
**NLI(自然言語推論)**
|
| 233 |
|
|
|
|
| 285 |
|
| 286 |
**STS(意味的テキスト類似度)**
|
| 287 |
|
| 288 |
+
このタスクはllm-jp-evalでサポートされていますが、平均スコア (AVG) の計算には含まれていません。
|
| 289 |
|
| 290 |
* `JSTS`、STS(Semantic Textual Similarity)の日本語版(JGLUEの一部)[ソース](https://github.com/yahoojapan/JGLUE)(ライセンス CC BY-SA 4.0)
|
| 291 |
|
|
|
|
| 304 |
* `XL-Sum`、44言語の大規模多言語抽象型要約データセットの日本語部分 [ソース](https://github.com/csebuetnlp/xl-sum)(ライセンス CC BY-NC-SA 4.0、非商用ライセンスのため、このデータセットは使用しません。ライセンスと利用規約に明確に同意した場合を除きます)
|
| 305 |
|
| 306 |
## 再現性
|
| 307 |
+
結果を再現するには、評価ツール **llm-jp-eval** の指示に従ってください。詳細は [日本語](https://github.com/llm-jp/llm-jp-eval/blob/main/README.md) と [英語](https://github.com/llm-jp/llm-jp-eval/blob/main/README_en.md) でご覧いただけます。
|
| 308 |
|
| 309 |
## 平均スコアの計算について
|
| 310 |
+
平均スコア (AVG) の計算には、⭐マークのついたスコアのみが含まれます
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 311 |
"""
|
| 312 |
|
| 313 |
|