alexmarques commited on
Commit
7389db8
·
verified ·
1 Parent(s): 3f1f2d2

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +6 -6
README.md CHANGED
@@ -256,7 +256,7 @@ Detailed model outputs are available as HuggingFace datasets for [Arena-Hard](ht
256
  </td>
257
  </tr>
258
  <tr>
259
- <td>MMLU-Pro
260
  </td>
261
  <td>48.1
262
  </td>
@@ -266,7 +266,7 @@ Detailed model outputs are available as HuggingFace datasets for [Arena-Hard](ht
266
  </td>
267
  </tr>
268
  <tr>
269
- <td>IFEval
270
  </td>
271
  <td>86.4
272
  </td>
@@ -276,7 +276,7 @@ Detailed model outputs are available as HuggingFace datasets for [Arena-Hard](ht
276
  </td>
277
  </tr>
278
  <tr>
279
- <td>BBH
280
  </td>
281
  <td>55.8
282
  </td>
@@ -286,7 +286,7 @@ Detailed model outputs are available as HuggingFace datasets for [Arena-Hard](ht
286
  </td>
287
  </tr>
288
  <tr>
289
- <td>Math |v| 5
290
  </td>
291
  <td>26.1
292
  </td>
@@ -296,7 +296,7 @@ Detailed model outputs are available as HuggingFace datasets for [Arena-Hard](ht
296
  </td>
297
  </tr>
298
  <tr>
299
- <td>GPQA ()
300
  </td>
301
  <td>15.4
302
  </td>
@@ -306,7 +306,7 @@ Detailed model outputs are available as HuggingFace datasets for [Arena-Hard](ht
306
  </td>
307
  </tr>
308
  <tr>
309
- <td>MuSR (5-shot)
310
  </td>
311
  <td>18.2
312
  </td>
 
256
  </td>
257
  </tr>
258
  <tr>
259
+ <td>MMLU-Pro (5-shot)
260
  </td>
261
  <td>48.1
262
  </td>
 
266
  </td>
267
  </tr>
268
  <tr>
269
+ <td>IFEval (0-shot)
270
  </td>
271
  <td>86.4
272
  </td>
 
276
  </td>
277
  </tr>
278
  <tr>
279
+ <td>BBH (3-shot)
280
  </td>
281
  <td>55.8
282
  </td>
 
286
  </td>
287
  </tr>
288
  <tr>
289
+ <td>Math-|v|-5 (4-shot)
290
  </td>
291
  <td>26.1
292
  </td>
 
296
  </td>
297
  </tr>
298
  <tr>
299
+ <td>GPQA (0-shot)
300
  </td>
301
  <td>15.4
302
  </td>
 
306
  </td>
307
  </tr>
308
  <tr>
309
+ <td>MuSR (0-shot)
310
  </td>
311
  <td>18.2
312
  </td>