Update README.md
Browse files
README.md
CHANGED
@@ -256,7 +256,7 @@ Detailed model outputs are available as HuggingFace datasets for [Arena-Hard](ht
|
|
256 |
</td>
|
257 |
</tr>
|
258 |
<tr>
|
259 |
-
<td>MMLU-Pro
|
260 |
</td>
|
261 |
<td>48.1
|
262 |
</td>
|
@@ -266,7 +266,7 @@ Detailed model outputs are available as HuggingFace datasets for [Arena-Hard](ht
|
|
266 |
</td>
|
267 |
</tr>
|
268 |
<tr>
|
269 |
-
<td>IFEval
|
270 |
</td>
|
271 |
<td>86.4
|
272 |
</td>
|
@@ -276,7 +276,7 @@ Detailed model outputs are available as HuggingFace datasets for [Arena-Hard](ht
|
|
276 |
</td>
|
277 |
</tr>
|
278 |
<tr>
|
279 |
-
<td>BBH
|
280 |
</td>
|
281 |
<td>55.8
|
282 |
</td>
|
@@ -286,7 +286,7 @@ Detailed model outputs are available as HuggingFace datasets for [Arena-Hard](ht
|
|
286 |
</td>
|
287 |
</tr>
|
288 |
<tr>
|
289 |
-
<td>Math
|
290 |
</td>
|
291 |
<td>26.1
|
292 |
</td>
|
@@ -296,7 +296,7 @@ Detailed model outputs are available as HuggingFace datasets for [Arena-Hard](ht
|
|
296 |
</td>
|
297 |
</tr>
|
298 |
<tr>
|
299 |
-
<td>GPQA ()
|
300 |
</td>
|
301 |
<td>15.4
|
302 |
</td>
|
@@ -306,7 +306,7 @@ Detailed model outputs are available as HuggingFace datasets for [Arena-Hard](ht
|
|
306 |
</td>
|
307 |
</tr>
|
308 |
<tr>
|
309 |
-
<td>MuSR (
|
310 |
</td>
|
311 |
<td>18.2
|
312 |
</td>
|
|
|
256 |
</td>
|
257 |
</tr>
|
258 |
<tr>
|
259 |
+
<td>MMLU-Pro (5-shot)
|
260 |
</td>
|
261 |
<td>48.1
|
262 |
</td>
|
|
|
266 |
</td>
|
267 |
</tr>
|
268 |
<tr>
|
269 |
+
<td>IFEval (0-shot)
|
270 |
</td>
|
271 |
<td>86.4
|
272 |
</td>
|
|
|
276 |
</td>
|
277 |
</tr>
|
278 |
<tr>
|
279 |
+
<td>BBH (3-shot)
|
280 |
</td>
|
281 |
<td>55.8
|
282 |
</td>
|
|
|
286 |
</td>
|
287 |
</tr>
|
288 |
<tr>
|
289 |
+
<td>Math-|v|-5 (4-shot)
|
290 |
</td>
|
291 |
<td>26.1
|
292 |
</td>
|
|
|
296 |
</td>
|
297 |
</tr>
|
298 |
<tr>
|
299 |
+
<td>GPQA (0-shot)
|
300 |
</td>
|
301 |
<td>15.4
|
302 |
</td>
|
|
|
306 |
</td>
|
307 |
</tr>
|
308 |
<tr>
|
309 |
+
<td>MuSR (0-shot)
|
310 |
</td>
|
311 |
<td>18.2
|
312 |
</td>
|