Update README.md
Browse files
README.md
CHANGED
@@ -46,22 +46,6 @@ pipeline_tag: text-generation
|
|
46 |
|
47 |
<br>
|
48 |
|
49 |
-
- **(ENG) AI-Harness evaluation** [[link]](https://github.com/EleutherAI/lm-evaluation-harness)
|
50 |
-
|
51 |
-
| Tasks |Version|Filter|n-shot|Metric|Value | |Stderr|
|
52 |
-
|------------------|-------|------|-----:|------|-----:|---|-----:|
|
53 |
-
|MMLU |N/A |none | 0|acc |0.5826|± |0.1432|
|
54 |
-
|MMLU |N/A |none | 5|acc | |± | |
|
55 |
-
|HellaSwag | 1|none | 0|acc |0.6075|± |0.0049|
|
56 |
-
|HellaSwag | 1|none | 5|acc | |± | |
|
57 |
-
|BoolQ | 2|none | 0|acc |0.8737|± |0.0058|
|
58 |
-
|BoolQ | 2|none | 5|acc | |± | |
|
59 |
-
|COPA | 1|none | 0|acc |0.8300|± |0.0378|
|
60 |
-
|COPA | 1|none | 5|acc | |± | |
|
61 |
-
|truthfulqa |N/A |none | 0|acc |0.4249|± |0.0023|
|
62 |
-
|truthfulqa |N/A |none | 5|acc | |± | |
|
63 |
-
|
64 |
-
|
65 |
- **(KOR) AI-Harness evaluation** [[link]](https://github.com/Beomi/ko-lm-evaluation-harness)
|
66 |
|
67 |
|
@@ -78,6 +62,23 @@ pipeline_tag: text-generation
|
|
78 |
|KoBEST-SentiNeg | 0|none | 0|acc |0.5844|± |0.0248|
|
79 |
|KoBEST-SentiNeg | 0|none | 5|acc |0.9471|± |0.0112|
|
80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
---
|
83 |
|
|
|
46 |
|
47 |
<br>
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
- **(KOR) AI-Harness evaluation** [[link]](https://github.com/Beomi/ko-lm-evaluation-harness)
|
50 |
|
51 |
|
|
|
62 |
|KoBEST-SentiNeg | 0|none | 0|acc |0.5844|± |0.0248|
|
63 |
|KoBEST-SentiNeg | 0|none | 5|acc |0.9471|± |0.0112|
|
64 |
|
65 |
+
<br>
|
66 |
+
|
67 |
+
- **(ENG) AI-Harness evaluation** [[link]](https://github.com/EleutherAI/lm-evaluation-harness)
|
68 |
+
|
69 |
+
| Tasks |Version|Filter|n-shot|Metric|Value | |Stderr|
|
70 |
+
|------------------|-------|------|-----:|------|-----:|---|-----:|
|
71 |
+
|MMLU |N/A |none | 0|acc |0.5826|± |0.1432|
|
72 |
+
|MMLU |N/A |none | 5|acc | |± | |
|
73 |
+
|HellaSwag | 1|none | 0|acc |0.6075|± |0.0049|
|
74 |
+
|HellaSwag | 1|none | 5|acc | |± | |
|
75 |
+
|BoolQ | 2|none | 0|acc |0.8737|± |0.0058|
|
76 |
+
|BoolQ | 2|none | 5|acc | |± | |
|
77 |
+
|COPA | 1|none | 0|acc |0.8300|± |0.0378|
|
78 |
+
|COPA | 1|none | 5|acc | |± | |
|
79 |
+
|truthfulqa |N/A |none | 0|acc |0.4249|± |0.0023|
|
80 |
+
|truthfulqa |N/A |none | 5|acc | |± | |
|
81 |
+
|
82 |
|
83 |
---
|
84 |
|