Spaces:
Running
Running
| [ | |
| { | |
| "model": "Phi-3-mini-128k-instruct (3.8B)", | |
| "Average": 40.00, | |
| "MMLU": 36.97, | |
| "ARC":60.94, | |
| "WinoGrande": 46.88, | |
| "PIQA": 32.04, | |
| "CommonsenseQA": 49.15, | |
| "Race": 37.81, | |
| "MedMCQA": 22.61, | |
| "OpenkookQA": 33.60 | |
| }, | |
| { | |
| "model": "Qwen1.5 (1.8B)", | |
| "Average": 21.68, | |
| "MMLU": 9.99, | |
| "ARC":15.84 , | |
| "WinoGrande": 40.96, | |
| "PIQA": 15.52, | |
| "CommonsenseQA": 31.13, | |
| "Race": 34.91, | |
| "MedMCQA": 4.7, | |
| "OpenkookQA": 20.37 | |
| }, | |
| { | |
| "model": "Gemma (2B)", | |
| "Average": 16.66, | |
| "MMLU": 17.52, | |
| "ARC":23.93, | |
| "WinoGrande": 16.10, | |
| "PIQA": 15.09, | |
| "CommonsenseQA": 27.46, | |
| "Race": 14.32, | |
| "MedMCQA": 4.57, | |
| "OpenkookQA": 14.26 | |
| }, | |
| { | |
| "model": "SlimPajama-DC (1.3B)", | |
| "Average": 9.60, | |
| "MMLU": 9.22, | |
| "ARC":14.95, | |
| "WinoGrande": 14.76, | |
| "PIQA": 5.32, | |
| "CommonsenseQA": 9.01, | |
| "Race": 16.19, | |
| "MedMCQA": 1.68, | |
| "OpenkookQA": 5.70 | |
| }, | |
| { | |
| "model": "RedPajama (1B)", | |
| "Average": 9.00, | |
| "MMLU": 9.21, | |
| "ARC":13.5, | |
| "WinoGrande": 16.97, | |
| "PIQA": 0.86, | |
| "CommonsenseQA": 11.41, | |
| "Race": 14.35, | |
| "MedMCQA": 1.86, | |
| "OpenkookQA": 3.87 | |
| }, | |
| { | |
| "model": "OLMo (1.2B)", | |
| "Average": 8.85, | |
| "MMLU": 8.54, | |
| "ARC":13.18, | |
| "WinoGrande": 6.16, | |
| "PIQA": 8.05, | |
| "CommonsenseQA": 13.10, | |
| "Race": 13.61, | |
| "MedMCQA": 2.07, | |
| "OpenkookQA": 6.11 | |
| }, | |
| { | |
| "model": "Pythia (1.4B)", | |
| "Average": 8.79, | |
| "MMLU": 9.66, | |
| "ARC":14.69, | |
| "WinoGrande": 11.52, | |
| "PIQA": 4.17, | |
| "CommonsenseQA": 9.01, | |
| "Race": 12.76, | |
| "MedMCQA": 3.19, | |
| "OpenkookQA": 5.30 | |
| }, | |
| { | |
| "model": "TinyLLama (1.1B)", | |
| "Average": 8.45, | |
| "MMLU": 8.94, | |
| "ARC":13.31, | |
| "WinoGrande": 12.23, | |
| "PIQA": 3.59, | |
| "CommonsenseQA": 6.06, | |
| "Race": 16.7, | |
| "MedMCQA": 2.07, | |
| "OpenkookQA": 4.68 | |
| }, | |
| { | |
| "model": "OPT (1.3B)", | |
| "Average": 7.89, | |
| "MMLU": 7.40, | |
| "ARC":11.83, | |
| "WinoGrande": 12.47, | |
| "PIQA": 4.48, | |
| "CommonsenseQA": 7.61, | |
| "Race": 13.61, | |
| "MedMCQA": 1.25, | |
| "OpenkookQA": 4.48 | |
| }, | |
| { | |
| "model": "GPT-Neo (1.3B)", | |
| "Average": 7.42, | |
| "MMLU": 6.94, | |
| "ARC": 6.69, | |
| "WinoGrande": 10.81, | |
| "PIQA": 4.31, | |
| "CommonsenseQA": 6.34, | |
| "Race": 13.75, | |
| "MedMCQA": 2.63, | |
| "OpenkookQA": 4.89 | |
| }, | |
| { | |
| "model": "Cerebras-GPT (1.3B)", | |
| "Average": 4.86, | |
| "MMLU": 5.37, | |
| "ARC":4.43, | |
| "WinoGrande": 9.31, | |
| "PIQA": 2.16, | |
| "CommonsenseQA": 6.2, | |
| "Race": 6.9, | |
| "MedMCQA": 1.04, | |
| "OpenkookQA": 3.46 | |
| } | |
| ] |