Update README.md
Browse files
README.md
CHANGED
@@ -50,13 +50,40 @@ pipeline_tag: text-generation
|
|
50 |
|
51 |
<br>
|
52 |
|
53 |
-
- **AI-Harness evaluation** [[link]](https://github.com/
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
|
58 |
-
|
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
|
62 |
---
|
|
|
50 |
|
51 |
<br>
|
52 |
|
53 |
+
- **(ENG) AI-Harness evaluation** [[link]](https://github.com/EleutherAI/lm-evaluation-harness)
|
54 |
+
|
55 |
+
| Tasks |Version|Filter|n-shot|Metric|Value | |Stderr|
|
56 |
+
|------------------|-------|------|-----:|------|-----:|---|-----:|
|
57 |
+
|HellaSwag | 1|none | 0|acc |0.6075|± |0.0049|
|
58 |
+
|HellaSwag | 1|none | 5|acc | |± | |
|
59 |
+
|BoolQ | 2|none | 0|acc |0.8737|± |0.0058|
|
60 |
+
|BoolQ | 2|none | 5|acc | |± | |
|
61 |
+
|COPA | 1|none | 0|acc |0.8300|± |0.0378|
|
62 |
+
|COPA | 1|none | 5|acc | |± | |
|
63 |
+
|MMLU |N/A |none | 0|acc |0.5826|± |0.1432|
|
64 |
+
|MMLU |N/A |none | 5|acc | |± | |
|
65 |
+
<!--| - humanities |N/A |none | 0|acc |0.5103|± |0.1669|
|
66 |
+
| - other |N/A |none | 0|acc |0.6662|± |0.1079|
|
67 |
+
| - social_sciences|N/A |none | 0|acc |0.6922|± |0.0979|
|
68 |
+
| - stem |N/A |none | 0|acc |0.5011|± |0.1159|-->
|
69 |
+
|
70 |
+
|
71 |
+
|
72 |
+
- **(KOR) AI-Harness evaluation** [[link]](https://github.com/Beomi/ko-lm-evaluation-harness)
|
73 |
+
|
74 |
+
|
75 |
+
| Tasks |Version|Filter|n-shot|Metric|Value | |Stderr|
|
76 |
+
|-------------------------|-------|------|-----:|------|-----:|---|-----:|
|
77 |
+
|KoBEST-HellaSwag | |none | 0|acc | |± | |
|
78 |
+
|KoBEST-HellaSwag | |none | 5|acc | |± | |
|
79 |
+
|KoBEST-BoolQ | |none | 0|acc | |± | |
|
80 |
+
|KoBEST-BoolQ | |none | 5|acc | |± | |
|
81 |
+
|KoBEST-COPA | |none | 0|acc | |± | |
|
82 |
+
|KoBEST-COPA | |none | 5|acc | |± | |
|
83 |
+
|KoBEST-SentiNeg | |none | 0|acc | |± | |
|
84 |
+
|KoBEST-SentiNeg | |none | 5|acc | |± | |
|
85 |
+
|KoBEST-MMLU | |none | 0|acc | |± | |
|
86 |
+
|KoBEST-MMLU | |none | 5|acc | |± | |
|
87 |
|
88 |
|
89 |
---
|