Tianhua commited on
Commit
258a776
·
verified ·
1 Parent(s): c621df9

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +15 -12
README.md CHANGED
@@ -37,9 +37,9 @@ model-index:
37
  type: openai_humanneval
38
  name: OpenAI HumanEval
39
  metrics:
40
- - name: pass@1 (t=0.01)
41
  type: pass@1
42
- value: 31.707
43
  - name: pass@10 (t=0.8)
44
  type: pass@10
45
  value: 65.755
@@ -49,9 +49,9 @@ model-index:
49
  type: mbpp
50
  name: Mostly Basic Python Problems (mbpp)
51
  metrics:
52
- - name: pass@1 (t=0.01)
53
  type: pass@1
54
- value: 39.4
55
  - name: pass@10 (t=0.8)
56
  type: pass@10
57
  value: 59.895
@@ -61,7 +61,7 @@ model-index:
61
  type: race
62
  name: RACE
63
  metrics:
64
- - name: accuracy
65
  type: accuracy
66
  value: 41.148
67
  - task:
@@ -70,7 +70,10 @@ model-index:
70
  type: mmlu
71
  name: Measuring Massive Multitask Language Understanding (MMLU)
72
  metrics:
73
- - name: accuracy
 
 
 
74
  type: accuracy
75
  value: 52.789
76
  - task:
@@ -79,7 +82,7 @@ model-index:
79
  type: truthful_qa
80
  name: Truthful QA
81
  metrics:
82
- - name: accuracy
83
  type: accuracy
84
  value: 47.29
85
  - task:
@@ -100,7 +103,7 @@ model-index:
100
  type: copa
101
  name: COPA
102
  metrics:
103
- - name: accuracy
104
  type: accuracy
105
  value: 85
106
  - task:
@@ -109,7 +112,7 @@ model-index:
109
  type: boolq
110
  name: Boolq
111
  metrics:
112
- - name: accuracy
113
  type: accuracy
114
  value: 82.783
115
  - task:
@@ -118,7 +121,7 @@ model-index:
118
  type: openbookqa
119
  name: Openbook QA
120
  metrics:
121
- - name: accuracy
122
  type: accuracy
123
  value: 42
124
  - task:
@@ -139,7 +142,7 @@ model-index:
139
  type: piqa
140
  name: PIQA
141
  metrics:
142
- - name: accuracy
143
  type: accuracy
144
  value: 77.856
145
  - task:
@@ -148,7 +151,7 @@ model-index:
148
  type: ai2_arc
149
  name: ARC (Easy)
150
  metrics:
151
- - name: accuracy
152
  type: accuracy
153
  value: 70.328
154
  - task:
 
37
  type: openai_humanneval
38
  name: OpenAI HumanEval
39
  metrics:
40
+ - name: pass@1 (t=0.2)
41
  type: pass@1
42
+ value: 34.116
43
  - name: pass@10 (t=0.8)
44
  type: pass@10
45
  value: 65.755
 
49
  type: mbpp
50
  name: Mostly Basic Python Problems (mbpp)
51
  metrics:
52
+ - name: pass@1 (t=0.1)
53
  type: pass@1
54
+ value: 39.112
55
  - name: pass@10 (t=0.8)
56
  type: pass@10
57
  value: 59.895
 
61
  type: race
62
  name: RACE
63
  metrics:
64
+ - name: accuracy (0 shot)
65
  type: accuracy
66
  value: 41.148
67
  - task:
 
70
  type: mmlu
71
  name: Measuring Massive Multitask Language Understanding (MMLU)
72
  metrics:
73
+ - name: accuracy (5 shot)
74
+ type: accuracy
75
+ value: 53.215
76
+ - name: accuracy (0 shot)
77
  type: accuracy
78
  value: 52.789
79
  - task:
 
82
  type: truthful_qa
83
  name: Truthful QA
84
  metrics:
85
+ - name: accuracy (0 shot)
86
  type: accuracy
87
  value: 47.29
88
  - task:
 
103
  type: copa
104
  name: COPA
105
  metrics:
106
+ - name: accuracy (0 shot)
107
  type: accuracy
108
  value: 85
109
  - task:
 
112
  type: boolq
113
  name: Boolq
114
  metrics:
115
+ - name: accuracy (0 shot)
116
  type: accuracy
117
  value: 82.783
118
  - task:
 
121
  type: openbookqa
122
  name: Openbook QA
123
  metrics:
124
+ - name: accuracy (0 shot)
125
  type: accuracy
126
  value: 42
127
  - task:
 
142
  type: piqa
143
  name: PIQA
144
  metrics:
145
+ - name: accuracy (0 shot)
146
  type: accuracy
147
  value: 77.856
148
  - task:
 
151
  type: ai2_arc
152
  name: ARC (Easy)
153
  metrics:
154
+ - name: accuracy (0 shot)
155
  type: accuracy
156
  value: 70.328
157
  - task: