Spaces:
Running
Running
Add TurkishMMLU dataset and results (#2)
Browse files- Add TurkishMMLU dataset and results (b949e7e3ca62dc6da4c6623dbee14f0cc1843004)
- data/datasets.json +8 -0
- results/zero-shot/CerebrumTech__cere-llama-3-8b-tr.json +6 -0
- results/zero-shot/Llama-3.3-70B-Instruct.json +6 -0
- results/zero-shot/Ministral-8B-Instruct.json +6 -0
- results/zero-shot/Mistral-7B-Instruct-v0.3.json +6 -0
- results/zero-shot/Mistral-7B-v0.3.json +6 -0
- results/zero-shot/Mixtral-8x7B-Instruct-v0.1.json +6 -0
- results/zero-shot/Qwen2.5-0.5B-Instruct.json +6 -0
- results/zero-shot/Qwen2.5-0.5B.json +6 -0
- results/zero-shot/Qwen2.5-1.5B-Instruct.json +6 -0
- results/zero-shot/Qwen2.5-1.5B.json +6 -0
- results/zero-shot/Qwen2.5-14B-Instruct.json +6 -0
- results/zero-shot/Qwen2.5-14B.json +6 -0
- results/zero-shot/Qwen2.5-3B-Instruct.json +6 -0
- results/zero-shot/Qwen2.5-3B.json +6 -0
- results/zero-shot/Qwen2.5-7B-Instruct.json +6 -0
- results/zero-shot/Qwen2.5-7B.json +6 -0
- results/zero-shot/aya-23-35B.json +6 -0
- results/zero-shot/aya-23-8b.json +6 -0
- results/zero-shot/aya-expanse-32b.json +6 -0
- results/zero-shot/aya-expanse-8b.json +6 -0
- results/zero-shot/aya101.json +6 -0
- results/zero-shot/commencis-7b.json +6 -0
- results/zero-shot/kanarya-2b.json +6 -0
- results/zero-shot/llama-3-8b-instruct.json +7 -0
- results/zero-shot/llama-3-8b.json +7 -0
- results/zero-shot/llama-3.1-8b-instruct.json +7 -0
- results/zero-shot/llama-3.1-8b.json +6 -0
- results/zero-shot/llama-3.2-1b.json +7 -0
- results/zero-shot/llama-3.2-3b-instruct.json +7 -0
- results/zero-shot/llama-3.2-3b.json +7 -0
- results/zero-shot/mistral-7b.json +6 -0
- results/zero-shot/trendyol-7b.json +6 -0
- results/zero-shot/turna.json +6 -0
data/datasets.json
CHANGED
|
@@ -189,5 +189,13 @@
|
|
| 189 |
"url": "https://huggingface.co/datasets/furkanunluturk/turkce-atasozleri",
|
| 190 |
"hf_name": "abrek/turkce-atasozleri-lm-evaluation-harness",
|
| 191 |
"generative": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
}
|
| 193 |
}
|
|
|
|
| 189 |
"url": "https://huggingface.co/datasets/furkanunluturk/turkce-atasozleri",
|
| 190 |
"hf_name": "abrek/turkce-atasozleri-lm-evaluation-harness",
|
| 191 |
"generative": false
|
| 192 |
+
},
|
| 193 |
+
"turkishmmlu": {
|
| 194 |
+
"name": "TurkishMMLU",
|
| 195 |
+
"task": "multiple_choice",
|
| 196 |
+
"description": "TurkishMMLU is a multiple-choice dataset for Turkish Natural Language Processing (NLP) community based on Turkish Highschool Curricula for nine subjects.",
|
| 197 |
+
"url": "https://huggingface.co/datasets/AYueksel/TurkishMMLU",
|
| 198 |
+
"hf_name": "AYueksel/TurkishMMLU",
|
| 199 |
+
"generative": false
|
| 200 |
}
|
| 201 |
}
|
results/zero-shot/CerebrumTech__cere-llama-3-8b-tr.json
CHANGED
|
@@ -174,6 +174,12 @@
|
|
| 174 |
"task": "multiple_choice",
|
| 175 |
"acc": 0.48092485549132946,
|
| 176 |
"acc_norm": 0.48092485549132946
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
}
|
| 178 |
]
|
| 179 |
}
|
|
|
|
| 174 |
"task": "multiple_choice",
|
| 175 |
"acc": 0.48092485549132946,
|
| 176 |
"acc_norm": 0.48092485549132946
|
| 177 |
+
},
|
| 178 |
+
{
|
| 179 |
+
"name": "turkishmmlu",
|
| 180 |
+
"task": "multiple_choice",
|
| 181 |
+
"acc": 0.25555555555555554,
|
| 182 |
+
"acc_norm": 0.25555555555555554
|
| 183 |
}
|
| 184 |
]
|
| 185 |
}
|
results/zero-shot/Llama-3.3-70B-Instruct.json
CHANGED
|
@@ -176,6 +176,12 @@
|
|
| 176 |
"task": "multiple_choice",
|
| 177 |
"acc": 0.9254335260115607,
|
| 178 |
"acc_norm": 0.9254335260115607
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
}
|
| 180 |
]
|
| 181 |
}
|
|
|
|
| 176 |
"task": "multiple_choice",
|
| 177 |
"acc": 0.9254335260115607,
|
| 178 |
"acc_norm": 0.9254335260115607
|
| 179 |
+
},
|
| 180 |
+
{
|
| 181 |
+
"name": "turkishmmlu",
|
| 182 |
+
"task": "multiple_choice",
|
| 183 |
+
"acc": 0.646,
|
| 184 |
+
"acc_norm": 0.646
|
| 185 |
}
|
| 186 |
]
|
| 187 |
}
|
results/zero-shot/Ministral-8B-Instruct.json
CHANGED
|
@@ -173,6 +173,12 @@
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.4046242774566474,
|
| 175 |
"acc_norm": 0.4046242774566474
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
}
|
| 177 |
]
|
| 178 |
}
|
|
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.4046242774566474,
|
| 175 |
"acc_norm": 0.4046242774566474
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"name": "turkishmmlu",
|
| 179 |
+
"task": "multiple_choice",
|
| 180 |
+
"acc": 0.2644444444444444,
|
| 181 |
+
"acc_norm": 0.2644444444444444
|
| 182 |
}
|
| 183 |
]
|
| 184 |
}
|
results/zero-shot/Mistral-7B-Instruct-v0.3.json
CHANGED
|
@@ -173,6 +173,12 @@
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.3,
|
| 175 |
"acc_norm": 0.3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
}
|
| 177 |
]
|
| 178 |
}
|
|
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.3,
|
| 175 |
"acc_norm": 0.3
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"name": "turkishmmlu",
|
| 179 |
+
"task": "multiple_choice",
|
| 180 |
+
"acc": 0.19555555555555557,
|
| 181 |
+
"acc_norm": 0.19555555555555557
|
| 182 |
}
|
| 183 |
]
|
| 184 |
}
|
results/zero-shot/Mistral-7B-v0.3.json
CHANGED
|
@@ -173,6 +173,12 @@
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.27572254335260116,
|
| 175 |
"acc_norm": 0.27572254335260116
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
}
|
| 177 |
]
|
| 178 |
}
|
|
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.27572254335260116,
|
| 175 |
"acc_norm": 0.27572254335260116
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"name": "turkishmmlu",
|
| 179 |
+
"task": "multiple_choice",
|
| 180 |
+
"acc": 0.2688888888888889,
|
| 181 |
+
"acc_norm": 0.2688888888888889
|
| 182 |
}
|
| 183 |
]
|
| 184 |
}
|
results/zero-shot/Mixtral-8x7B-Instruct-v0.1.json
CHANGED
|
@@ -175,6 +175,12 @@
|
|
| 175 |
"task": "multiple_choice",
|
| 176 |
"acc": 0.5150289017341041,
|
| 177 |
"acc_norm": 0.5150289017341041
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
}
|
| 179 |
]
|
| 180 |
}
|
|
|
|
| 175 |
"task": "multiple_choice",
|
| 176 |
"acc": 0.5150289017341041,
|
| 177 |
"acc_norm": 0.5150289017341041
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"name": "turkishmmlu",
|
| 181 |
+
"task": "multiple_choice",
|
| 182 |
+
"acc": 0.358,
|
| 183 |
+
"acc_norm": 0.358
|
| 184 |
}
|
| 185 |
]
|
| 186 |
}
|
results/zero-shot/Qwen2.5-0.5B-Instruct.json
CHANGED
|
@@ -173,6 +173,12 @@
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.2832369942196532,
|
| 175 |
"acc_norm": 0.2832369942196532
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
}
|
| 177 |
]
|
| 178 |
}
|
|
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.2832369942196532,
|
| 175 |
"acc_norm": 0.2832369942196532
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"name": "turkishmmlu",
|
| 179 |
+
"task": "multiple_choice",
|
| 180 |
+
"acc": 0.2111111111111111,
|
| 181 |
+
"acc_norm": 0.2111111111111111
|
| 182 |
}
|
| 183 |
]
|
| 184 |
}
|
results/zero-shot/Qwen2.5-0.5B.json
CHANGED
|
@@ -173,6 +173,12 @@
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.20346820809248556,
|
| 175 |
"acc_norm": 0.20346820809248556
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
}
|
| 177 |
]
|
| 178 |
}
|
|
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.20346820809248556,
|
| 175 |
"acc_norm": 0.20346820809248556
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"name": "turkishmmlu",
|
| 179 |
+
"task": "multiple_choice",
|
| 180 |
+
"acc": 0.17888888888888888,
|
| 181 |
+
"acc_norm": 0.17888888888888888
|
| 182 |
}
|
| 183 |
]
|
| 184 |
}
|
results/zero-shot/Qwen2.5-1.5B-Instruct.json
CHANGED
|
@@ -173,6 +173,12 @@
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.3468208092485549,
|
| 175 |
"acc_norm": 0.3468208092485549
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
}
|
| 177 |
]
|
| 178 |
}
|
|
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.3468208092485549,
|
| 175 |
"acc_norm": 0.3468208092485549
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"name": "turkishmmlu",
|
| 179 |
+
"task": "multiple_choice",
|
| 180 |
+
"acc": 0.28888888888888886,
|
| 181 |
+
"acc_norm": 0.28888888888888886
|
| 182 |
}
|
| 183 |
]
|
| 184 |
}
|
results/zero-shot/Qwen2.5-1.5B.json
CHANGED
|
@@ -173,6 +173,12 @@
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.2300578034682081,
|
| 175 |
"acc_norm": 0.2300578034682081
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
}
|
| 177 |
]
|
| 178 |
}
|
|
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.2300578034682081,
|
| 175 |
"acc_norm": 0.2300578034682081
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"name": "turkishmmlu",
|
| 179 |
+
"task": "multiple_choice",
|
| 180 |
+
"acc": 0.23,
|
| 181 |
+
"acc_norm": 0.23
|
| 182 |
}
|
| 183 |
]
|
| 184 |
}
|
results/zero-shot/Qwen2.5-14B-Instruct.json
CHANGED
|
@@ -175,6 +175,12 @@
|
|
| 175 |
"task": "multiple_choice",
|
| 176 |
"acc": 0.7832369942196532,
|
| 177 |
"acc_norm": 0.7832369942196532
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
}
|
| 179 |
]
|
| 180 |
}
|
|
|
|
| 175 |
"task": "multiple_choice",
|
| 176 |
"acc": 0.7832369942196532,
|
| 177 |
"acc_norm": 0.7832369942196532
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"name": "turkishmmlu",
|
| 181 |
+
"task": "multiple_choice",
|
| 182 |
+
"acc": 0.5944444444444444,
|
| 183 |
+
"acc_norm": 0.5944444444444444
|
| 184 |
}
|
| 185 |
]
|
| 186 |
}
|
results/zero-shot/Qwen2.5-14B.json
CHANGED
|
@@ -175,6 +175,12 @@
|
|
| 175 |
"task": "multiple_choice",
|
| 176 |
"acc": 0.753757225433526,
|
| 177 |
"acc_norm": 0.753757225433526
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
}
|
| 179 |
]
|
| 180 |
}
|
|
|
|
| 175 |
"task": "multiple_choice",
|
| 176 |
"acc": 0.753757225433526,
|
| 177 |
"acc_norm": 0.753757225433526
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"name": "turkishmmlu",
|
| 181 |
+
"task": "multiple_choice",
|
| 182 |
+
"acc": 0.5622222222222222,
|
| 183 |
+
"acc_norm": 0.5622222222222222
|
| 184 |
}
|
| 185 |
]
|
| 186 |
}
|
results/zero-shot/Qwen2.5-3B-Instruct.json
CHANGED
|
@@ -173,6 +173,12 @@
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.6011560693641619,
|
| 175 |
"acc_norm": 0.6011560693641619
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
}
|
| 177 |
]
|
| 178 |
}
|
|
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.6011560693641619,
|
| 175 |
"acc_norm": 0.6011560693641619
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"name": "turkishmmlu",
|
| 179 |
+
"task": "multiple_choice",
|
| 180 |
+
"acc": 0.37777777777777777,
|
| 181 |
+
"acc_norm": 0.37777777777777777
|
| 182 |
}
|
| 183 |
]
|
| 184 |
}
|
results/zero-shot/Qwen2.5-3B.json
CHANGED
|
@@ -173,6 +173,12 @@
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.4346820809248555,
|
| 175 |
"acc_norm": 0.4346820809248555
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
}
|
| 177 |
]
|
| 178 |
}
|
|
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.4346820809248555,
|
| 175 |
"acc_norm": 0.4346820809248555
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"name": "turkishmmlu",
|
| 179 |
+
"task": "multiple_choice",
|
| 180 |
+
"acc": 0.22555555555555556,
|
| 181 |
+
"acc_norm": 0.22555555555555556
|
| 182 |
}
|
| 183 |
]
|
| 184 |
}
|
results/zero-shot/Qwen2.5-7B-Instruct.json
CHANGED
|
@@ -173,6 +173,12 @@
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.7121387283236994,
|
| 175 |
"acc_norm": 0.7121387283236994
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
}
|
| 177 |
]
|
| 178 |
}
|
|
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.7121387283236994,
|
| 175 |
"acc_norm": 0.7121387283236994
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"name": "turkishmmlu",
|
| 179 |
+
"task": "multiple_choice",
|
| 180 |
+
"acc": 0.47555555555555556,
|
| 181 |
+
"acc_norm": 0.47555555555555556
|
| 182 |
}
|
| 183 |
]
|
| 184 |
}
|
results/zero-shot/Qwen2.5-7B.json
CHANGED
|
@@ -173,6 +173,12 @@
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.7352601156069364,
|
| 175 |
"acc_norm": 0.7352601156069364
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
}
|
| 177 |
]
|
| 178 |
}
|
|
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.7352601156069364,
|
| 175 |
"acc_norm": 0.7352601156069364
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"name": "turkishmmlu",
|
| 179 |
+
"task": "multiple_choice",
|
| 180 |
+
"acc": 0.49333333333333335,
|
| 181 |
+
"acc_norm": 0.49333333333333335
|
| 182 |
}
|
| 183 |
]
|
| 184 |
}
|
results/zero-shot/aya-23-35B.json
CHANGED
|
@@ -175,6 +175,12 @@
|
|
| 175 |
"task": "multiple_choice",
|
| 176 |
"acc": 0.5687861271676301,
|
| 177 |
"acc_norm": 0.5687861271676301
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
}
|
| 179 |
]
|
| 180 |
}
|
|
|
|
| 175 |
"task": "multiple_choice",
|
| 176 |
"acc": 0.5687861271676301,
|
| 177 |
"acc_norm": 0.5687861271676301
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"name": "turkishmmlu",
|
| 181 |
+
"task": "multiple_choice",
|
| 182 |
+
"acc": 0.4533333333333333,
|
| 183 |
+
"acc_norm": 0.4533333333333333
|
| 184 |
}
|
| 185 |
]
|
| 186 |
}
|
results/zero-shot/aya-23-8b.json
CHANGED
|
@@ -169,6 +169,12 @@
|
|
| 169 |
"task": "multiple_choice",
|
| 170 |
"acc": 0.44971098265895953,
|
| 171 |
"acc_norm": 0.44971098265895953
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
}
|
| 173 |
]
|
| 174 |
}
|
|
|
|
| 169 |
"task": "multiple_choice",
|
| 170 |
"acc": 0.44971098265895953,
|
| 171 |
"acc_norm": 0.44971098265895953
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"name": "turkishmmlu",
|
| 175 |
+
"task": "multiple_choice",
|
| 176 |
+
"acc": 0.33,
|
| 177 |
+
"acc_norm": 0.33
|
| 178 |
}
|
| 179 |
]
|
| 180 |
}
|
results/zero-shot/aya-expanse-32b.json
CHANGED
|
@@ -174,6 +174,12 @@
|
|
| 174 |
"task": "multiple_choice",
|
| 175 |
"acc": 0.8236994219653179,
|
| 176 |
"acc_norm": 0.8236994219653179
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
}
|
| 178 |
]
|
| 179 |
}
|
|
|
|
| 174 |
"task": "multiple_choice",
|
| 175 |
"acc": 0.8236994219653179,
|
| 176 |
"acc_norm": 0.8236994219653179
|
| 177 |
+
},
|
| 178 |
+
{
|
| 179 |
+
"name": "turkishmmlu",
|
| 180 |
+
"task": "multiple_choice",
|
| 181 |
+
"acc": 0.5688888888888889,
|
| 182 |
+
"acc_norm": 0.5688888888888889
|
| 183 |
}
|
| 184 |
]
|
| 185 |
}
|
results/zero-shot/aya-expanse-8b.json
CHANGED
|
@@ -160,6 +160,12 @@
|
|
| 160 |
"task": "multiple_choice",
|
| 161 |
"acc": 0.723121387283237,
|
| 162 |
"acc_norm": 0.723121387283237
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
}
|
| 164 |
]
|
| 165 |
}
|
|
|
|
| 160 |
"task": "multiple_choice",
|
| 161 |
"acc": 0.723121387283237,
|
| 162 |
"acc_norm": 0.723121387283237
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"name": "turkishmmlu",
|
| 166 |
+
"task": "multiple_choice",
|
| 167 |
+
"acc": 0.46555555555555556,
|
| 168 |
+
"acc_norm": 0.46555555555555556
|
| 169 |
}
|
| 170 |
]
|
| 171 |
}
|
results/zero-shot/aya101.json
CHANGED
|
@@ -173,6 +173,12 @@
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.009826589595375723,
|
| 175 |
"acc_norm": 0.009826589595375723
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
}
|
| 177 |
]
|
| 178 |
}
|
|
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.009826589595375723,
|
| 175 |
"acc_norm": 0.009826589595375723
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"name": "turkishmmlu",
|
| 179 |
+
"task": "multiple_choice",
|
| 180 |
+
"acc": 0.374,
|
| 181 |
+
"acc_norm": 0.374
|
| 182 |
}
|
| 183 |
]
|
| 184 |
}
|
results/zero-shot/commencis-7b.json
CHANGED
|
@@ -173,6 +173,12 @@
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.22658959537572254,
|
| 175 |
"acc_norm": 0.22658959537572254
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
}
|
| 177 |
]
|
| 178 |
}
|
|
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.22658959537572254,
|
| 175 |
"acc_norm": 0.22658959537572254
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"name": "turkishmmlu",
|
| 179 |
+
"task": "multiple_choice",
|
| 180 |
+
"acc": 0.24666666666666667,
|
| 181 |
+
"acc_norm": 0.24666666666666667
|
| 182 |
}
|
| 183 |
]
|
| 184 |
}
|
results/zero-shot/kanarya-2b.json
CHANGED
|
@@ -172,6 +172,12 @@
|
|
| 172 |
"task": "multiple_choice",
|
| 173 |
"acc": 0.0,
|
| 174 |
"acc_norm": 0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
}
|
| 176 |
]
|
| 177 |
}
|
|
|
|
| 172 |
"task": "multiple_choice",
|
| 173 |
"acc": 0.0,
|
| 174 |
"acc_norm": 0.0
|
| 175 |
+
},
|
| 176 |
+
{
|
| 177 |
+
"name": "turkishmmlu",
|
| 178 |
+
"task": "multiple_choice",
|
| 179 |
+
"acc": 0.18,
|
| 180 |
+
"acc_norm": 0.18
|
| 181 |
}
|
| 182 |
]
|
| 183 |
}
|
results/zero-shot/llama-3-8b-instruct.json
CHANGED
|
@@ -168,6 +168,13 @@
|
|
| 168 |
"task": "multiple_choice",
|
| 169 |
"acc": 0.6947976878612717,
|
| 170 |
"acc_norm": 0.6947976878612717
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
}
|
|
|
|
| 172 |
]
|
| 173 |
}
|
|
|
|
| 168 |
"task": "multiple_choice",
|
| 169 |
"acc": 0.6947976878612717,
|
| 170 |
"acc_norm": 0.6947976878612717
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"name": "turkishmmlu",
|
| 174 |
+
"task": "multiple_choice",
|
| 175 |
+
"acc": 0.3811111111111111,
|
| 176 |
+
"acc_norm": 0.3811111111111111
|
| 177 |
}
|
| 178 |
+
|
| 179 |
]
|
| 180 |
}
|
results/zero-shot/llama-3-8b.json
CHANGED
|
@@ -167,6 +167,13 @@
|
|
| 167 |
"task": "multiple_choice",
|
| 168 |
"acc": 0.44046242774566474,
|
| 169 |
"acc_norm": 0.44046242774566474
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
}
|
|
|
|
| 171 |
]
|
| 172 |
}
|
|
|
|
| 167 |
"task": "multiple_choice",
|
| 168 |
"acc": 0.44046242774566474,
|
| 169 |
"acc_norm": 0.44046242774566474
|
| 170 |
+
},
|
| 171 |
+
{
|
| 172 |
+
"name": "turkishmmlu",
|
| 173 |
+
"task": "multiple_choice",
|
| 174 |
+
"acc": 0.2544444444444444,
|
| 175 |
+
"acc_norm": 0.2544444444444444
|
| 176 |
}
|
| 177 |
+
|
| 178 |
]
|
| 179 |
}
|
results/zero-shot/llama-3.1-8b-instruct.json
CHANGED
|
@@ -167,6 +167,13 @@
|
|
| 167 |
"task": "multiple_choice",
|
| 168 |
"acc": 0.7549132947976879,
|
| 169 |
"acc_norm": 0.7549132947976879
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
}
|
|
|
|
| 171 |
]
|
| 172 |
}
|
|
|
|
| 167 |
"task": "multiple_choice",
|
| 168 |
"acc": 0.7549132947976879,
|
| 169 |
"acc_norm": 0.7549132947976879
|
| 170 |
+
},
|
| 171 |
+
{
|
| 172 |
+
"name": "turkishmmlu",
|
| 173 |
+
"task": "multiple_choice",
|
| 174 |
+
"acc": 0.3811111111111111,
|
| 175 |
+
"acc_norm": 0.3811111111111111
|
| 176 |
}
|
| 177 |
+
|
| 178 |
]
|
| 179 |
}
|
results/zero-shot/llama-3.1-8b.json
CHANGED
|
@@ -167,6 +167,12 @@
|
|
| 167 |
"task": "multiple_choice",
|
| 168 |
"acc": 0.5410404624277456,
|
| 169 |
"acc_norm": 0.5410404624277456
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
}
|
| 171 |
]
|
| 172 |
}
|
|
|
|
| 167 |
"task": "multiple_choice",
|
| 168 |
"acc": 0.5410404624277456,
|
| 169 |
"acc_norm": 0.5410404624277456
|
| 170 |
+
},
|
| 171 |
+
{
|
| 172 |
+
"name": "turkishmmlu",
|
| 173 |
+
"task": "multiple_choice",
|
| 174 |
+
"acc": 0.3055555555555556,
|
| 175 |
+
"acc_norm": 0.3055555555555556
|
| 176 |
}
|
| 177 |
]
|
| 178 |
}
|
results/zero-shot/llama-3.2-1b.json
CHANGED
|
@@ -199,6 +199,13 @@
|
|
| 199 |
"task": "multiple_choice",
|
| 200 |
"acc": 0.21676300578034682,
|
| 201 |
"acc_norm": 0.21676300578034682
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
}
|
|
|
|
| 203 |
]
|
| 204 |
}
|
|
|
|
| 199 |
"task": "multiple_choice",
|
| 200 |
"acc": 0.21676300578034682,
|
| 201 |
"acc_norm": 0.21676300578034682
|
| 202 |
+
},
|
| 203 |
+
{
|
| 204 |
+
"name": "turkishmmlu",
|
| 205 |
+
"task": "multiple_choice",
|
| 206 |
+
"acc": 0.18888888888888888,
|
| 207 |
+
"acc_norm": 0.18888888888888888
|
| 208 |
}
|
| 209 |
+
|
| 210 |
]
|
| 211 |
}
|
results/zero-shot/llama-3.2-3b-instruct.json
CHANGED
|
@@ -192,6 +192,13 @@
|
|
| 192 |
"task": "multiple_choice",
|
| 193 |
"acc": 0.010982658959537572,
|
| 194 |
"acc_norm": 0.010982658959537572
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
}
|
|
|
|
| 196 |
]
|
| 197 |
}
|
|
|
|
| 192 |
"task": "multiple_choice",
|
| 193 |
"acc": 0.010982658959537572,
|
| 194 |
"acc_norm": 0.010982658959537572
|
| 195 |
+
},
|
| 196 |
+
{
|
| 197 |
+
"name": "turkishmmlu",
|
| 198 |
+
"task": "multiple_choice",
|
| 199 |
+
"acc": 0.34444444444444444,
|
| 200 |
+
"acc_norm": 0.34444444444444444
|
| 201 |
}
|
| 202 |
+
|
| 203 |
]
|
| 204 |
}
|
results/zero-shot/llama-3.2-3b.json
CHANGED
|
@@ -161,6 +161,13 @@
|
|
| 161 |
"task": "multiple_choice",
|
| 162 |
"acc": 0.1994219653179191,
|
| 163 |
"acc_norm": 0.1994219653179191
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
}
|
|
|
|
| 165 |
]
|
| 166 |
}
|
|
|
|
| 161 |
"task": "multiple_choice",
|
| 162 |
"acc": 0.1994219653179191,
|
| 163 |
"acc_norm": 0.1994219653179191
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"name": "turkishmmlu",
|
| 167 |
+
"task": "multiple_choice",
|
| 168 |
+
"acc": 0.29,
|
| 169 |
+
"acc_norm": 0.29
|
| 170 |
}
|
| 171 |
+
|
| 172 |
]
|
| 173 |
}
|
results/zero-shot/mistral-7b.json
CHANGED
|
@@ -166,6 +166,12 @@
|
|
| 166 |
"task": "multiple_choice",
|
| 167 |
"acc": 0.30809248554913293,
|
| 168 |
"acc_norm": 0.30809248554913293
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
}
|
| 170 |
]
|
| 171 |
}
|
|
|
|
| 166 |
"task": "multiple_choice",
|
| 167 |
"acc": 0.30809248554913293,
|
| 168 |
"acc_norm": 0.30809248554913293
|
| 169 |
+
},
|
| 170 |
+
{
|
| 171 |
+
"name": "turkishmmlu",
|
| 172 |
+
"task": "multiple_choice",
|
| 173 |
+
"acc": 0.20333333333333334,
|
| 174 |
+
"acc_norm": 0.20333333333333334
|
| 175 |
}
|
| 176 |
]
|
| 177 |
}
|
results/zero-shot/trendyol-7b.json
CHANGED
|
@@ -173,6 +173,12 @@
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.0,
|
| 175 |
"acc_norm": 0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
}
|
| 177 |
]
|
| 178 |
}
|
|
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.0,
|
| 175 |
"acc_norm": 0.0
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"name": "turkishmmlu",
|
| 179 |
+
"task": "multiple_choice",
|
| 180 |
+
"acc": 0.2477777777777778,
|
| 181 |
+
"acc_norm": 0.2477777777777778
|
| 182 |
}
|
| 183 |
]
|
| 184 |
}
|
results/zero-shot/turna.json
CHANGED
|
@@ -173,6 +173,12 @@
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.19248554913294796,
|
| 175 |
"acc_norm": 0.19248554913294796
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
}
|
| 177 |
]
|
| 178 |
}
|
|
|
|
| 173 |
"task": "multiple_choice",
|
| 174 |
"acc": 0.19248554913294796,
|
| 175 |
"acc_norm": 0.19248554913294796
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"name": "turkishmmlu",
|
| 179 |
+
"task": "multiple_choice",
|
| 180 |
+
"acc": 0.19333333333333333,
|
| 181 |
+
"acc_norm": 0.19333333333333333
|
| 182 |
}
|
| 183 |
]
|
| 184 |
}
|