orbenchllm commited on
Commit
89b700a
·
verified ·
1 Parent(s): 595afd1

Upload 25 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.png filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ _site/
2
+ .DS_Store
Gemfile ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ source 'https://rubygems.org'
2
+ gem 'github-pages', group: :jekyll_plugins
Gemfile.lock ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ activesupport (6.1.7.8)
5
+ concurrent-ruby (~> 1.0, >= 1.0.2)
6
+ i18n (>= 1.6, < 2)
7
+ minitest (>= 5.1)
8
+ tzinfo (~> 2.0)
9
+ zeitwerk (~> 2.3)
10
+ addressable (2.8.6)
11
+ public_suffix (>= 2.0.2, < 6.0)
12
+ base64 (0.2.0)
13
+ coffee-script (2.4.1)
14
+ coffee-script-source
15
+ execjs
16
+ coffee-script-source (1.12.2)
17
+ colorator (1.1.0)
18
+ commonmarker (0.23.10)
19
+ concurrent-ruby (1.3.3)
20
+ dnsruby (1.72.1)
21
+ simpleidn (~> 0.2.1)
22
+ em-websocket (0.5.3)
23
+ eventmachine (>= 0.12.9)
24
+ http_parser.rb (~> 0)
25
+ ethon (0.16.0)
26
+ ffi (>= 1.15.0)
27
+ eventmachine (1.2.7)
28
+ execjs (2.9.1)
29
+ faraday (2.8.1)
30
+ base64
31
+ faraday-net_http (>= 2.0, < 3.1)
32
+ ruby2_keywords (>= 0.0.4)
33
+ faraday-net_http (3.0.2)
34
+ ffi (1.17.0-x86_64-darwin)
35
+ forwardable-extended (2.6.0)
36
+ gemoji (4.1.0)
37
+ github-pages (231)
38
+ github-pages-health-check (= 1.18.2)
39
+ jekyll (= 3.9.5)
40
+ jekyll-avatar (= 0.8.0)
41
+ jekyll-coffeescript (= 1.2.2)
42
+ jekyll-commonmark-ghpages (= 0.4.0)
43
+ jekyll-default-layout (= 0.1.5)
44
+ jekyll-feed (= 0.17.0)
45
+ jekyll-gist (= 1.5.0)
46
+ jekyll-github-metadata (= 2.16.1)
47
+ jekyll-include-cache (= 0.2.1)
48
+ jekyll-mentions (= 1.6.0)
49
+ jekyll-optional-front-matter (= 0.3.2)
50
+ jekyll-paginate (= 1.1.0)
51
+ jekyll-readme-index (= 0.3.0)
52
+ jekyll-redirect-from (= 0.16.0)
53
+ jekyll-relative-links (= 0.6.1)
54
+ jekyll-remote-theme (= 0.4.3)
55
+ jekyll-sass-converter (= 1.5.2)
56
+ jekyll-seo-tag (= 2.8.0)
57
+ jekyll-sitemap (= 1.4.0)
58
+ jekyll-swiss (= 1.0.0)
59
+ jekyll-theme-architect (= 0.2.0)
60
+ jekyll-theme-cayman (= 0.2.0)
61
+ jekyll-theme-dinky (= 0.2.0)
62
+ jekyll-theme-hacker (= 0.2.0)
63
+ jekyll-theme-leap-day (= 0.2.0)
64
+ jekyll-theme-merlot (= 0.2.0)
65
+ jekyll-theme-midnight (= 0.2.0)
66
+ jekyll-theme-minimal (= 0.2.0)
67
+ jekyll-theme-modernist (= 0.2.0)
68
+ jekyll-theme-primer (= 0.6.0)
69
+ jekyll-theme-slate (= 0.2.0)
70
+ jekyll-theme-tactile (= 0.2.0)
71
+ jekyll-theme-time-machine (= 0.2.0)
72
+ jekyll-titles-from-headings (= 0.5.3)
73
+ jemoji (= 0.13.0)
74
+ kramdown (= 2.4.0)
75
+ kramdown-parser-gfm (= 1.1.0)
76
+ liquid (= 4.0.4)
77
+ mercenary (~> 0.3)
78
+ minima (= 2.5.1)
79
+ nokogiri (>= 1.13.6, < 2.0)
80
+ rouge (= 3.30.0)
81
+ terminal-table (~> 1.4)
82
+ github-pages-health-check (1.18.2)
83
+ addressable (~> 2.3)
84
+ dnsruby (~> 1.60)
85
+ octokit (>= 4, < 8)
86
+ public_suffix (>= 3.0, < 6.0)
87
+ typhoeus (~> 1.3)
88
+ html-pipeline (2.14.3)
89
+ activesupport (>= 2)
90
+ nokogiri (>= 1.4)
91
+ http_parser.rb (0.8.0)
92
+ i18n (1.14.5)
93
+ concurrent-ruby (~> 1.0)
94
+ jekyll (3.9.5)
95
+ addressable (~> 2.4)
96
+ colorator (~> 1.0)
97
+ em-websocket (~> 0.5)
98
+ i18n (>= 0.7, < 2)
99
+ jekyll-sass-converter (~> 1.0)
100
+ jekyll-watch (~> 2.0)
101
+ kramdown (>= 1.17, < 3)
102
+ liquid (~> 4.0)
103
+ mercenary (~> 0.3.3)
104
+ pathutil (~> 0.9)
105
+ rouge (>= 1.7, < 4)
106
+ safe_yaml (~> 1.0)
107
+ jekyll-avatar (0.8.0)
108
+ jekyll (>= 3.0, < 5.0)
109
+ jekyll-coffeescript (1.2.2)
110
+ coffee-script (~> 2.2)
111
+ coffee-script-source (~> 1.12)
112
+ jekyll-commonmark (1.4.0)
113
+ commonmarker (~> 0.22)
114
+ jekyll-commonmark-ghpages (0.4.0)
115
+ commonmarker (~> 0.23.7)
116
+ jekyll (~> 3.9.0)
117
+ jekyll-commonmark (~> 1.4.0)
118
+ rouge (>= 2.0, < 5.0)
119
+ jekyll-default-layout (0.1.5)
120
+ jekyll (>= 3.0, < 5.0)
121
+ jekyll-feed (0.17.0)
122
+ jekyll (>= 3.7, < 5.0)
123
+ jekyll-gist (1.5.0)
124
+ octokit (~> 4.2)
125
+ jekyll-github-metadata (2.16.1)
126
+ jekyll (>= 3.4, < 5.0)
127
+ octokit (>= 4, < 7, != 4.4.0)
128
+ jekyll-include-cache (0.2.1)
129
+ jekyll (>= 3.7, < 5.0)
130
+ jekyll-mentions (1.6.0)
131
+ html-pipeline (~> 2.3)
132
+ jekyll (>= 3.7, < 5.0)
133
+ jekyll-optional-front-matter (0.3.2)
134
+ jekyll (>= 3.0, < 5.0)
135
+ jekyll-paginate (1.1.0)
136
+ jekyll-readme-index (0.3.0)
137
+ jekyll (>= 3.0, < 5.0)
138
+ jekyll-redirect-from (0.16.0)
139
+ jekyll (>= 3.3, < 5.0)
140
+ jekyll-relative-links (0.6.1)
141
+ jekyll (>= 3.3, < 5.0)
142
+ jekyll-remote-theme (0.4.3)
143
+ addressable (~> 2.0)
144
+ jekyll (>= 3.5, < 5.0)
145
+ jekyll-sass-converter (>= 1.0, <= 3.0.0, != 2.0.0)
146
+ rubyzip (>= 1.3.0, < 3.0)
147
+ jekyll-sass-converter (1.5.2)
148
+ sass (~> 3.4)
149
+ jekyll-seo-tag (2.8.0)
150
+ jekyll (>= 3.8, < 5.0)
151
+ jekyll-sitemap (1.4.0)
152
+ jekyll (>= 3.7, < 5.0)
153
+ jekyll-swiss (1.0.0)
154
+ jekyll-theme-architect (0.2.0)
155
+ jekyll (> 3.5, < 5.0)
156
+ jekyll-seo-tag (~> 2.0)
157
+ jekyll-theme-cayman (0.2.0)
158
+ jekyll (> 3.5, < 5.0)
159
+ jekyll-seo-tag (~> 2.0)
160
+ jekyll-theme-dinky (0.2.0)
161
+ jekyll (> 3.5, < 5.0)
162
+ jekyll-seo-tag (~> 2.0)
163
+ jekyll-theme-hacker (0.2.0)
164
+ jekyll (> 3.5, < 5.0)
165
+ jekyll-seo-tag (~> 2.0)
166
+ jekyll-theme-leap-day (0.2.0)
167
+ jekyll (> 3.5, < 5.0)
168
+ jekyll-seo-tag (~> 2.0)
169
+ jekyll-theme-merlot (0.2.0)
170
+ jekyll (> 3.5, < 5.0)
171
+ jekyll-seo-tag (~> 2.0)
172
+ jekyll-theme-midnight (0.2.0)
173
+ jekyll (> 3.5, < 5.0)
174
+ jekyll-seo-tag (~> 2.0)
175
+ jekyll-theme-minimal (0.2.0)
176
+ jekyll (> 3.5, < 5.0)
177
+ jekyll-seo-tag (~> 2.0)
178
+ jekyll-theme-modernist (0.2.0)
179
+ jekyll (> 3.5, < 5.0)
180
+ jekyll-seo-tag (~> 2.0)
181
+ jekyll-theme-primer (0.6.0)
182
+ jekyll (> 3.5, < 5.0)
183
+ jekyll-github-metadata (~> 2.9)
184
+ jekyll-seo-tag (~> 2.0)
185
+ jekyll-theme-slate (0.2.0)
186
+ jekyll (> 3.5, < 5.0)
187
+ jekyll-seo-tag (~> 2.0)
188
+ jekyll-theme-tactile (0.2.0)
189
+ jekyll (> 3.5, < 5.0)
190
+ jekyll-seo-tag (~> 2.0)
191
+ jekyll-theme-time-machine (0.2.0)
192
+ jekyll (> 3.5, < 5.0)
193
+ jekyll-seo-tag (~> 2.0)
194
+ jekyll-titles-from-headings (0.5.3)
195
+ jekyll (>= 3.3, < 5.0)
196
+ jekyll-watch (2.2.1)
197
+ listen (~> 3.0)
198
+ jemoji (0.13.0)
199
+ gemoji (>= 3, < 5)
200
+ html-pipeline (~> 2.2)
201
+ jekyll (>= 3.0, < 5.0)
202
+ kramdown (2.4.0)
203
+ rexml
204
+ kramdown-parser-gfm (1.1.0)
205
+ kramdown (~> 2.0)
206
+ liquid (4.0.4)
207
+ listen (3.9.0)
208
+ rb-fsevent (~> 0.10, >= 0.10.3)
209
+ rb-inotify (~> 0.9, >= 0.9.10)
210
+ mercenary (0.3.6)
211
+ minima (2.5.1)
212
+ jekyll (>= 3.5, < 5.0)
213
+ jekyll-feed (~> 0.9)
214
+ jekyll-seo-tag (~> 2.1)
215
+ minitest (5.24.0)
216
+ nokogiri (1.13.10-x86_64-darwin)
217
+ racc (~> 1.4)
218
+ octokit (4.25.1)
219
+ faraday (>= 1, < 3)
220
+ sawyer (~> 0.9)
221
+ pathutil (0.16.2)
222
+ forwardable-extended (~> 2.6)
223
+ public_suffix (5.1.1)
224
+ racc (1.8.0)
225
+ rb-fsevent (0.11.2)
226
+ rb-inotify (0.11.1)
227
+ ffi (~> 1.0)
228
+ rexml (3.3.0)
229
+ strscan
230
+ rouge (3.30.0)
231
+ ruby2_keywords (0.0.5)
232
+ rubyzip (2.3.2)
233
+ safe_yaml (1.0.5)
234
+ sass (3.7.4)
235
+ sass-listen (~> 4.0.0)
236
+ sass-listen (4.0.0)
237
+ rb-fsevent (~> 0.9, >= 0.9.4)
238
+ rb-inotify (~> 0.9, >= 0.9.7)
239
+ sawyer (0.9.2)
240
+ addressable (>= 2.3.5)
241
+ faraday (>= 0.17.3, < 3)
242
+ simpleidn (0.2.3)
243
+ strscan (3.1.0)
244
+ terminal-table (1.8.0)
245
+ unicode-display_width (~> 1.1, >= 1.1.1)
246
+ typhoeus (1.4.1)
247
+ ethon (>= 0.9.0)
248
+ tzinfo (2.0.6)
249
+ concurrent-ruby (~> 1.0)
250
+ unicode-display_width (1.8.0)
251
+ zeitwerk (2.6.16)
252
+
253
+ PLATFORMS
254
+ ruby
255
+
256
+ DEPENDENCIES
257
+ github-pages
258
+
259
+ BUNDLED WITH
260
+ 1.17.2
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2022 justincui03
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,11 +1,9 @@
1
  ---
2
- title: Or Bench
3
  emoji: 🏆
4
- colorFrom: indigo
5
- colorTo: gray
6
- sdk: gradio
7
- sdk_version: 4.44.1
8
- app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
  ---
 
1
  ---
2
+ title: OR-Bench Leaderboard
3
  emoji: 🏆
4
+ colorFrom: yellow
5
+ colorTo: purple
6
+ sdk: static
 
 
7
  pinned: false
8
  license: apache-2.0
9
  ---
css/main.css ADDED
@@ -0,0 +1,582 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ * {
2
+ box-sizing: border-box;
3
+ }
4
+
5
+ body {
6
+ padding: 0;
7
+ margin: 0 auto;
8
+ font-family: 'Merriweather', Helvetica, Optima, Palatino, Georgia, "Helvetica Neue";
9
+ font-size: 20px;
10
+ line-height: 20px;
11
+ /* max-width: 950px; */
12
+ /* background-color: rgba(170, 170, 170, 0.21); */
13
+ background-color: white;
14
+ }
15
+
16
+ section,
17
+ p,
18
+ header {
19
+ font-size: inherit;
20
+ line-height: inherit;
21
+ }
22
+ .doublealign {
23
+ text-align: justify;
24
+ color:#555555;
25
+ line-height: 1.2; /* Sets the line height to 1.2 */
26
+ }
27
+
28
+ a {
29
+ text-decoration: none;
30
+ color: #0049a5;
31
+ }
32
+
33
+ .container,
34
+ footer {
35
+ max-width: 1250px;
36
+ margin: 0px auto;
37
+ }
38
+
39
+ .container{
40
+ color:#555555
41
+ }
42
+
43
+ ::-webkit-scrollbar {
44
+ -webkit-appearance: none;
45
+ width: 5px;
46
+ height: 5px;
47
+ }
48
+ ::-webkit-scrollbar-thumb {
49
+ border-radius: 5px;
50
+ background-color: rgba(0, 0, 0, 0.5);
51
+ -webkit-box-shadow: 0 0 1px rgba(255, 255, 255, 0.5);
52
+ }
53
+
54
+ /*--- navigation bar ---*/
55
+ .navbar {
56
+ position: fixed;
57
+ background-color: whitesmoke;
58
+ margin-bottom: 30px;
59
+ background-image: linear-gradient(to bottom, #157fb3, white);
60
+ top: 0;
61
+ width: 100%;
62
+ z-index: 1;
63
+ }
64
+ .nav-link,
65
+ .navbar-brand {
66
+ color: inherit;
67
+ cursor: pointer;
68
+ margin-top: 5px;
69
+ margin-bottom: 5px;
70
+ }
71
+
72
+ .navbar-brand {
73
+ font-size: 1.7em;
74
+ font-weight: bold;
75
+ font-variant: small-caps;
76
+ }
77
+
78
+ .nav-link {
79
+ font-size: 1.3em;
80
+ margin-right: 1.5em !important;
81
+ }
82
+
83
+ .navbar-collapse {
84
+ justify-content: flex-end;
85
+ }
86
+
87
+ .nav-fix-margin {
88
+ margin-bottom: 100px;
89
+ }
90
+
91
+ .majorrule {
92
+ margin: 0px 0px;
93
+ border: 0;
94
+ height: 20px;
95
+ background: #f58025;
96
+ }
97
+
98
+ .header-block {
99
+ display: grid;
100
+ /*grid-template-columns: 0.5fr 1.5fr;*/
101
+ /*max-width: 800px;*/
102
+ text-align: center;
103
+ /*width: 20%;*/
104
+ margin: 90px auto 30px auto;
105
+ /*grid-column-gap: 0px;*/
106
+ grid-row-gap: 10px;
107
+ }
108
+
109
+ .title {
110
+ grid-column: 2;
111
+ grid-row: 1;
112
+ font-size: 4em;
113
+ color: #f58025;
114
+ font-weight: bolder;
115
+ font-variant: small-caps;
116
+ text-align: left;
117
+ margin-top: 5px;
118
+ padding: 20px 0px 0px 25px;
119
+ }
120
+
121
+ .title-logo {
122
+ text-align: right;
123
+ /*margin-right: 10px;*/
124
+ }
125
+ .title-logo img {
126
+ width: 80px;
127
+ }
128
+
129
+ @media (max-width: 768px) {
130
+ .header-block {
131
+ flex-direction: column;
132
+ align-items: center;
133
+ text-align: center;
134
+ }
135
+
136
+ .title {
137
+ font-size: 3em;
138
+ margin: 10px 0;
139
+ }
140
+
141
+ .description {
142
+ font-size: 1em;
143
+ margin: 10px 0;
144
+ }
145
+ }
146
+
147
+ @media (max-width: 480px) {
148
+ .title {
149
+ font-size: 1.5em;
150
+ }
151
+
152
+ .description {
153
+ font-size: 0.9em;
154
+ }
155
+
156
+ .title-logo img {
157
+ width: 50px;
158
+ }
159
+ }
160
+
161
+ .iframe-container {
162
+ display: flex; /* Establishes a flex container */
163
+ justify-content: center; /* Centers content along the main axis (horizontally) */
164
+ align-items: center; /* Centers content along the cross axis (vertically) */
165
+ margin-top:20px;
166
+ padding-left:-300px;
167
+ padding-right:-300px;
168
+ }
169
+
170
+
171
+ .description {
172
+ grid-column: 1/3;
173
+ grid-row: 2;
174
+ font-size: 1.3em;
175
+ line-height: 1.5em;
176
+ margin-left: 15px;
177
+ margin-top: -5px;
178
+ color:#555555
179
+ }
180
+
181
+ .flexbox-container {
182
+ display: flex;
183
+ width: 80%;
184
+ margin: 30px auto 20px auto;
185
+ justify-content: space-between;
186
+ }
187
+
188
+ .flexbox-container .element {
189
+ display: block;
190
+ width: 33%;
191
+ text-align: center;
192
+ }
193
+
194
+ .element p {
195
+ margin-left: 20px;
196
+ margin: 15px auto auto auto;
197
+ font-size: 1.5em;
198
+ }
199
+
200
+ .element img {
201
+ width: 120px;
202
+ margin: auto 0px;
203
+ }
204
+ .element .description {
205
+ font-size: 1.1em;
206
+ text-align: left;
207
+ font-style: italic;
208
+ }
209
+
210
+ .title-rule {
211
+ height: 12px;
212
+ border: 0;
213
+ box-shadow: inset 0 12px 12px -12px rgba(0, 0, 0, 0.5);
214
+ }
215
+
216
+ .details {
217
+ display: grid;
218
+ grid-template-columns: 1fr 1fr;
219
+ text-align: center;
220
+ column-gap: 50px;
221
+ width: 100%;
222
+ }
223
+
224
+ .box p {
225
+ font-size: 1.5em;
226
+ margin: 5px 0px 20px 0px;
227
+ }
228
+
229
+ .box img {
230
+ width: 75%;
231
+ }
232
+
233
+ .details .images {
234
+ text-align: center;
235
+ }
236
+
237
+ img.analysis {
238
+ margin: 10px 0px 0px 0px;
239
+ width: 75%;
240
+ }
241
+
242
+ .codeblock {
243
+ text-align: left;
244
+ padding: 0px 0px;
245
+ }
246
+
247
+ pre,
248
+ code {
249
+ font-size: 12px;
250
+ border: 1px solid #e8e8e8;
251
+ border-radius: 3px;
252
+ background-color: #eef;
253
+ }
254
+
255
+ code {
256
+ padding: 1px 5px;
257
+ }
258
+
259
+ pre {
260
+ padding: 5px 10px;
261
+ overflow-x: auto;
262
+ }
263
+ pre > code {
264
+ border: 0;
265
+ padding-right: 0;
266
+ padding-left: 0;
267
+ }
268
+
269
+ .divider hr {
270
+ border: 0;
271
+ height: 2px;
272
+ opacity: 0.8;
273
+ background-image: linear-gradient(
274
+ to right,
275
+ rgba(0, 0, 0, 0),
276
+ rgba(35, 156, 232, 0.75),
277
+ rgba(0, 0, 0, 0)
278
+ );
279
+ margin-top: 30px;
280
+ margin-bottom: 30px;
281
+ }
282
+
283
+ .scroller {
284
+ /*display: flex;*/
285
+ flex-wrap: nowrap;
286
+ overflow-x: auto;
287
+ }
288
+
289
+ .scroller img {
290
+ flex: 0 0 auto;
291
+ height: 220px;
292
+ width: auto;
293
+ margin: 5px 10px 5px 10px;
294
+ }
295
+
296
+ .vspace10 {
297
+ margin-bottom: 10px;
298
+ }
299
+ .vspace30 {
300
+ margin-bottom: 30px;
301
+ }
302
+ .vspace50 {
303
+ margin-bottom: 50px;
304
+ }
305
+ section .heading {
306
+ font-size: 1.6em;
307
+ font-weight: normal;
308
+ margin-top: 20px;
309
+ margin-bottom: 0px;
310
+ text-align: center;
311
+ }
312
+
313
+ .heading-math {
314
+ font-size: 0.9em;
315
+ }
316
+
317
+ .toprule {
318
+ margin: 0px 0px;
319
+ border: 0;
320
+ height: 4px;
321
+ background: #e87722;
322
+ }
323
+
324
+ .datatable {
325
+ overflow-x: scroll;
326
+ font-size: 0.8em;
327
+ text-align: center;
328
+ clear: both;
329
+ border-collapse: collapse;
330
+ table-layout: fixed;
331
+ word-wrap: break-word;
332
+ color:#555555;
333
+ width:100%;
334
+ }
335
+
336
+ .datatable thead,
337
+ .datatable th {
338
+ text-align: center;
339
+ color:#555555
340
+ }
341
+
342
+ .datatable th {
343
+ /* padding-left: 10px; */
344
+ font-family: monospace;
345
+ }
346
+
347
+ .datatable td {
348
+ text-align: center;
349
+ padding-right: 20px;
350
+ }
351
+
352
+ .td-footer {
353
+ font-style: italic;
354
+ font-size: 0.7em;
355
+ }
356
+
357
+ .td-warning {
358
+ font-style: italic;
359
+ font-size: 0.7em;
360
+ }
361
+
362
+ /*Leaderboard buttons*/
363
+ .button-list {
364
+ margin-top: 20px;
365
+ margin-top: 20px;
366
+ }
367
+
368
+ .btn {
369
+ background-color: #35aacb;
370
+ border-color: gainsboro;
371
+ color: white;
372
+ font-size: 1.2em;
373
+ margin-right: 20px;
374
+ margin-bottom: 10px;
375
+ border-radius: 0.5em;
376
+ font-weight: bold;
377
+ }
378
+
379
+ .imagenet_btn {
380
+ background-color: #f22e7d;
381
+ border-color: gainsboro;
382
+ color: white;
383
+ font-size: 1.2em;
384
+ margin-right: 20px;
385
+ margin-bottom: 10px;
386
+ border-radius: 0.5em;
387
+ font-weight: bold;
388
+ }
389
+
390
+ .btn:hover {
391
+ background-color: #a9c4f5;
392
+ border-color: #a9c4f5;
393
+ color: #E87722;
394
+ }
395
+
396
+ .button-list .heading {
397
+ text-align: left;
398
+ margin-top: 20;
399
+ margin-bottom: 20px;
400
+ font-size: 1.6em;
401
+ }
402
+
403
+ .button-list .heading u {
404
+ text-decoration: none;
405
+ border-bottom: 6px solid #E87722;
406
+ }
407
+
408
+
409
+ /* Leaderboard table css */
410
+ table .arch {
411
+ width: 13%;
412
+ }
413
+
414
+ table .venue {
415
+ width: 10%;
416
+ }
417
+ table .rank {
418
+ width: 5%;
419
+ }
420
+ table .method {
421
+ width: 40%;
422
+ }
423
+ table .extra-data {
424
+ width: 6%;
425
+ }
426
+ table .ca {
427
+ width: 10%;
428
+ }
429
+ table .aa {
430
+ width: 12%;
431
+ }
432
+ table .aa_3d {
433
+ width: 12%;
434
+ }
435
+
436
+ table .aatd,
437
+ .catd,
438
+ .methoddt,
439
+ .aa-extd,
440
+ .aatd_3d {
441
+ font-size: 1.1em;
442
+ }
443
+
444
+ table .datatd, .flagsd-emoji {
445
+ font-size: 2em;
446
+ }
447
+
448
+ table .catd,
449
+ .aatd, .aa-extd, .aatd_3d {
450
+ font-family: monospace;
451
+ }
452
+
453
+ .box2 {
454
+ text-align: left;
455
+ }
456
+
457
+ .box2 p {
458
+ font-size: 1.5em;
459
+ text-align: center;
460
+ }
461
+
462
+ .box2 ul {
463
+ list-style: none;
464
+ font-size: 1.05em;
465
+ line-height: 1.5em;
466
+ }
467
+
468
+ .box2 ul li::before {
469
+ content: "➥";
470
+ display: inline-block;
471
+ margin-right: 1rem;
472
+ }
473
+
474
+ .box2 li i {
475
+ color: black;
476
+ }
477
+
478
+ #contribute {
479
+ margin-top: 20px;
480
+ }
481
+
482
+ hr.bottomrule {
483
+ width: 950px;
484
+ border: 0;
485
+ height: 0;
486
+ margin-bottom: 0px;
487
+ border-top: 1px solid rgba(0, 0, 0, 0.1);
488
+ border-bottom: 1px solid rgba(255, 255, 255, 0.3);
489
+ }
490
+
491
+ footer {
492
+ margin-top: 0px;
493
+ text-align: center;
494
+ }
495
+
496
+ .question {
497
+ font-weight: bold;
498
+ }
499
+ .answer {
500
+ margin-left: 30px;
501
+ }
502
+ .qa-box {
503
+ text-align: justify;
504
+ }
505
+
506
+ @media (min-width: 1200px) {
507
+ .container footer {
508
+ max-width: 1000px;
509
+ }
510
+ }
511
+
512
+ .logo-container {
513
+ display: flex; /* Enables flexible box layout */
514
+ align-items: center; /* Centers items vertically in the container */
515
+ text-align: right;
516
+ }
517
+
518
+ .logo {
519
+ height: 20px; /* Adjust based on your actual logo's size */
520
+ width: auto; /* Maintains the aspect ratio */
521
+ margin-right: 10px; /*Space between the logo and the text*/
522
+ }
523
+
524
+ .logo-text {
525
+ font-size: 20px;
526
+ /* font-weight: bold; */
527
+ }
528
+
529
+ html body .app.svelte-182fdeq.svelte-182fdeq {
530
+ padding: 0 !important;
531
+ }
532
+
533
+ :root {
534
+ --size-4: 0px;
535
+ --size-8: 0px;
536
+ }
537
+
538
+ @media (max-width: 480px) {
539
+ #or-bench-table {
540
+ font-size: 10px; /* Adjust font size for smaller screens */
541
+ }
542
+ .logo-text {
543
+ font-size: 10px;
544
+ }
545
+
546
+ .logo {
547
+ height: 10px; /* Adjust based on your actual logo's size */
548
+ width: auto; /* Maintains the aspect ratio */
549
+ margin-right: 10px; /*Space between the logo and the text*/
550
+ }
551
+ .logo-container {
552
+ min-width: 150px; /* Adjust this value as needed */
553
+ display: flex; /* Enables flexible box layout */
554
+ align-items: center; /* Centers items vertically in the container */
555
+ text-align: right;
556
+ }
557
+
558
+ .model-header {
559
+ min-width: 40%; /* Adjust this value as needed */
560
+ text-align: right; /* Align text to the right */
561
+ }
562
+ .number-header {
563
+ min-width: 20%; /* Adjust this value as needed */
564
+ text-align: right; /* Align text to the right */
565
+ }
566
+
567
+ .datatable thead,
568
+ .datatable th {
569
+ text-align: right;
570
+ color:#555555
571
+ }
572
+
573
+ .datatable th {
574
+ /* padding-left: 10px; */
575
+ font-family: monospace;
576
+ }
577
+
578
+ .datatable td {
579
+ text-align: right;
580
+ padding-right: 10px;
581
+ }
582
+ }
data/or-bench.html ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <table id="or-bench-table" class="datatable" style="width: 100%"><thead>
2
+ <tr>
3
+ <th align="right" class="model-header">model</th>
4
+ <th align="right" class="number-header">over refusal(%)</th>
5
+ <th align="right" class="number-header">toxic acceptance(%)</th>
6
+ <th align="right" class="number-header">average</th>
7
+ </tr></thead>
8
+ <tbody>
9
+ <tr>
10
+ <td align="right"><div class="logo-container">
11
+ <img src="../images/anthropic_small.svg" alt="GPT-4o Logo" class="logo">
12
+ <span class="logo-text">Claude-2.1</span>
13
+ </div></td>
14
+ <td align="right">99.8</td>
15
+ <td align="right">0</td>
16
+ <td align="right">49.9</td>
17
+ </tr>
18
+ <tr>
19
+ <td align="right"><div class="logo-container">
20
+ <img src="../images/anthropic_small.svg" alt="GPT-4o Logo" class="logo">
21
+ <span class="logo-text">Claude-3-haiku</span>
22
+ </div></td>
23
+ <td align="right">96.3</td>
24
+ <td align="right">0.3</td>
25
+ <td align="right">48.3</td>
26
+ </tr>
27
+ <tr>
28
+ <td align="right"><div class="logo-container">
29
+ <img src="../images/anthropic_small.svg" alt="GPT-4o Logo" class="logo">
30
+ <span class="logo-text">Claude-3-sonnet</span>
31
+ </div></td>
32
+ <td align="right">94.5</td>
33
+ <td align="right">0.3</td>
34
+ <td align="right">47.4</td>
35
+ </tr>
36
+ <tr>
37
+ <td align="right"><div class="logo-container">
38
+ <img src="../images/anthropic_small.svg" alt="GPT-4o Logo" class="logo">
39
+ <span class="logo-text">Claude-3-opus</span>
40
+ </div></td>
41
+ <td align="right">91.0</td>
42
+ <td align="right">1.9</td>
43
+ <td align="right">46.5</td>
44
+ </tr>
45
+ <tr>
46
+ <td align="right"><div class="logo-container">
47
+ <img src="../images/anthropic_small.svg" alt="GPT-4o Logo" class="logo">
48
+ <span class="logo-text">Claude-3.5-sonnet</span>
49
+ </div></td>
50
+ <td align="right">43.8</td>
51
+ <td align="right">3.4</td>
52
+ <td align="right">23.6</td>
53
+ </tr>
54
+ <tr>
55
+ <td align="right"><div class="logo-container">
56
+ <img src="../images/google_small.svg" alt="GPT-4o Logo" class="logo">
57
+ <span class="logo-text">Gemma-7b</span>
58
+ </div></td>
59
+ <td align="right">26.3</td>
60
+ <td align="right">14.5</td>
61
+ <td align="right">20.4</td>
62
+ </tr>
63
+ <tr>
64
+ <td align="right"><div class="logo-container">
65
+ <img src="../images/google_small.svg" alt="GPT-4o Logo" class="logo">
66
+ <span class="logo-text">Gemma-2-9b</span>
67
+ </div></td>
68
+ <td align="right">80.0</td>
69
+ <td align="right">2.0</td>
70
+ <td align="right">41.0</td>
71
+ </tr>
72
+ <tr>
73
+ <td align="right"><div class="logo-container">
74
+ <img src="../images/google_small.svg" alt="GPT-4o Logo" class="logo">
75
+ <span class="logo-text">Gemma-2-27b</span>
76
+ </div></td>
77
+ <td align="right">62.0</td>
78
+ <td align="right">3.0</td>
79
+ <td align="right">32.5</td>
80
+ </tr>
81
+ <tr>
82
+ <td align="right"><div class="logo-container">
83
+ <img src="../images/google_small.svg" alt="GPT-4o Logo" class="logo">
84
+ <span class="logo-text">Gemini-1.0-pro</span>
85
+ </div></td>
86
+ <td align="right">9.7</td>
87
+ <td align="right">21.3</td>
88
+ <td align="right">15.5</td>
89
+ </tr>
90
+ <tr>
91
+ <td align="right"><div class="logo-container">
92
+ <img src="../images/google_small.svg" alt="GPT-4o Logo" class="logo">
93
+ <span class="logo-text">Gemini-1.5-flash-latest</span></div></td>
94
+ <td align="right">84.3</td>
95
+ <td align="right">1.2</td>
96
+ <td align="right">42.7</td>
97
+ </tr>
98
+ <tr>
99
+ <td align="right"><div class="logo-container">
100
+ <img src="../images/google_small.svg" alt="GPT-4o Logo" class="logo">
101
+ <span class="logo-text">Gemini-1.5-pro-latest</span></div></td>
102
+ <td align="right">88.0</td>
103
+ <td align="right">0.6</td>
104
+ <td align="right">44.3</td>
105
+ </tr>
106
+ <tr>
107
+ <td align="right"><div class="logo-container">
108
+ <img src="../images/openai_small.svg" alt="GPT-4o Logo" class="logo">
109
+ <span class="logo-text">GPT-3.5-turbo-0301</span></div></td>
110
+ <td align="right">57.4</td>
111
+ <td align="right">5.3</td>
112
+ <td align="right">31.4</td>
113
+ </tr>
114
+ <tr>
115
+ <td align="right"><div class="logo-container">
116
+ <img src="../images/openai_small.svg" alt="GPT-4o Logo" class="logo">
117
+ <span class="logo-text">GPT-3.5-turbo-0613</span></div></td>
118
+ <td align="right">38.4</td>
119
+ <td align="right">7.9</td>
120
+ <td align="right">23.2</td>
121
+ </tr>
122
+ <tr>
123
+ <td align="right"><div class="logo-container">
124
+ <img src="../images/openai_small.svg" alt="GPT-4o Logo" class="logo">
125
+ <span class="logo-text">GPT-3.5-turbo-0125</span></div></td>
126
+ <td align="right">12.7</td>
127
+ <td align="right">37.9</td>
128
+ <td align="right">25.3</td>
129
+ </tr>
130
+ <tr>
131
+ <td align="right"><div class="logo-container">
132
+ <img src="../images/openai_small.svg" alt="GPT-4o Logo" class="logo">
133
+ <span class="logo-text">GPT-4-0125-preview</span></div></td>
134
+ <td align="right">12.2</td>
135
+ <td align="right">7</td>
136
+ <td align="right">9.6</td>
137
+ </tr>
138
+ <tr>
139
+ <td align="right"><div class="logo-container">
140
+ <img src="../images/openai_small.svg" alt="GPT-4o Logo" class="logo">
141
+ <span class="logo-text">GPT-4-turbo-2024-04-09</span></div></td>
142
+ <td align="right">12.8</td>
143
+ <td align="right">3.5</td>
144
+ <td align="right">8.1</td>
145
+ </tr>
146
+ <tr>
147
+ <td align="right"><div class="logo-container">
148
+ <img src="../images/openai_small.svg" alt="GPT-4o Logo" class="logo">
149
+ <span class="logo-text">GPT-4o</span></div></td>
150
+ <td align="right">6.8</td>
151
+ <td align="right">15.1</td>
152
+ <td align="right">10.9</td>
153
+ </tr>
154
+ <tr>
155
+ <td align="right"><div class="logo-container">
156
+ <img src="../images/openai_small.svg" alt="GPT-4o Logo" class="logo">
157
+ <span class="logo-text">GPT-4o-08-06</span></div></td>
158
+ <td align="right">13.0</td>
159
+ <td align="right">14.0</td>
160
+ <td align="right">13.5</td>
161
+ </tr>
162
+ <tr>
163
+ <td align="right"><div class="logo-container">
164
+ <img src="../images/meta_small.svg" alt="GPT-4o Logo" class="logo">
165
+ <span class="logo-text">Llama-2-7b</span></div></td>
166
+ <td align="right">87.5</td>
167
+ <td align="right">0.4</td>
168
+ <td align="right">43.9</td>
169
+ </tr>
170
+ <tr>
171
+ <td align="right"><div class="logo-container">
172
+ <img src="../images/meta_small.svg" alt="GPT-4o Logo" class="logo">
173
+ <span class="logo-text">Llama-2-13b</span></div></td>
174
+ <td align="right">91.0</td>
175
+ <td align="right">0.3</td>
176
+ <td align="right">45.7</td>
177
+ </tr>
178
+ <tr>
179
+ <td align="right"><div class="logo-container">
180
+ <img src="../images/meta_small.svg" alt="GPT-4o Logo" class="logo">
181
+ <span class="logo-text">Llama-2-70b</span></div></td>
182
+ <td align="right">96.1</td>
183
+ <td align="right">0.3</td>
184
+ <td align="right">48.2</td>
185
+ </tr>
186
+ <tr>
187
+ <td align="right"><div class="logo-container">
188
+ <img src="../images/meta_small.svg" alt="GPT-4o Logo" class="logo">
189
+ <span class="logo-text">Llama-3-8b</span></div></td>
190
+ <td align="right">69.4</td>
191
+ <td align="right">5</td>
192
+ <td align="right">37.2</td>
193
+ </tr>
194
+ <tr>
195
+ <td align="right"><div class="logo-container">
196
+ <img src="../images/meta_small.svg" alt="GPT-4o Logo" class="logo">
197
+ <span class="logo-text">Llama-3-70b</span></div></td>
198
+ <td align="right">37.7</td>
199
+ <td align="right">21.3</td>
200
+ <td align="right">29.5</td>
201
+ </tr>
202
+ <tr>
203
+ <td align="right"><div class="logo-container">
204
+ <img src="../images/meta_small.svg" alt="GPT-4o Logo" class="logo">
205
+ <span class="logo-text">Llama-3.1-8B</span></div></td>
206
+ <td align="right">31.0</td>
207
+ <td align="right">9.0</td>
208
+ <td align="right">20.0</td>
209
+ </tr>
210
+ <tr>
211
+ <td align="right"><div class="logo-container">
212
+ <img src="../images/meta_small.svg" alt="GPT-4o Logo" class="logo">
213
+ <span class="logo-text">Llama-3.1-70B</span></div></td>
214
+ <td align="right">3.0</td>
215
+ <td align="right">30.0</td>
216
+ <td align="right">16.5</td>
217
+ </tr>
218
+ <tr>
219
+ <td align="right"><div class="logo-container">
220
+ <img src="../images/meta_small.svg" alt="GPT-4o Logo" class="logo">
221
+ <span class="logo-text">Llama-3.1-405B</span></div></td>
222
+ <td align="right">6.0</td>
223
+ <td align="right">21.0</td>
224
+ <td align="right">13.5</td>
225
+ </tr>
226
+ <tr>
227
+ <td align="right"><div class="logo-container">
228
+ <img src="../images/mistral_small.png" alt="GPT-4o Logo" class="logo">
229
+ <span class="logo-text">Mistral-small-latest</span></div></td>
230
+ <td align="right">13.3</td>
231
+ <td align="right">20.3</td>
232
+ <td align="right">16.8</td>
233
+ </tr>
234
+ <tr>
235
+ <td align="right"><div class="logo-container">
236
+ <img src="../images/mistral_small.png" alt="GPT-4o Logo" class="logo">
237
+ <span class="logo-text">Mistral-medium-latest</span></div></td>
238
+ <td align="right">14.0</td>
239
+ <td align="right">22.5</td>
240
+ <td align="right">18.2</td>
241
+ </tr>
242
+ <tr>
243
+ <td align="right"><div class="logo-container">
244
+ <img src="../images/mistral_small.png" alt="GPT-4o Logo" class="logo">
245
+ <span class="logo-text">Mistral-large-latest</span></div></td>
246
+ <td align="right">9.8</td>
247
+ <td align="right">27.2</td>
248
+ <td align="right">18.5</td>
249
+ </tr>
250
+ <tr>
251
+ <td align="right"><div class="logo-container">
252
+ <img src="../images/BABA_SMALL.svg" alt="GPT-4o Logo" class="logo">
253
+ <span class="logo-text">Qwen-1.5-7B</span></div></td>
254
+ <td align="right">39.2</td>
255
+ <td align="right">15</td>
256
+ <td align="right">27.1</td>
257
+ </tr>
258
+ <tr>
259
+ <td align="right"><div class="logo-container">
260
+ <img src="../images/BABA_SMALL.svg" alt="GPT-4o Logo" class="logo">
261
+ <span class="logo-text">Qwen-1.5-32B</span></div></td>
262
+ <td align="right">50.8</td>
263
+ <td align="right">4.4</td>
264
+ <td align="right">27.6</td>
265
+ </tr>
266
+ <tr>
267
+ <td align="right"><div class="logo-container">
268
+ <img src="../images/BABA_SMALL.svg" alt="GPT-4o Logo" class="logo">
269
+ <span class="logo-text">Qwen-1.5-72B</span></div></td>
270
+ <td align="right">46.9</td>
271
+ <td align="right">5.6</td>
272
+ <td align="right">26.3</td>
273
+ </tr>
274
+ </tbody></table>
images/80k.png ADDED

Git LFS Details

  • SHA256: 7d38b6b7baa80369e1dc3384010668c6328ec6f6d5401600f5907cf2fec17118
  • Pointer size: 128 Bytes
  • Size of remote file: 130 Bytes
images/BABA_SMALL.svg ADDED
images/anthropic_small.svg ADDED
images/click.png ADDED

Git LFS Details

  • SHA256: 53d236ff28cb0d4cbc1550215508d4b459a8dad3cc23ee46f763148ded805d3c
  • Pointer size: 128 Bytes
  • Size of remote file: 130 Bytes
images/google.svg ADDED
images/google_small.svg ADDED
images/hard.png ADDED

Git LFS Details

  • SHA256: 45ea5d14ff1b4f06f09dc2ebf56aac7ebdf0a3aa0cb681c1a886fec02051c72f
  • Pointer size: 128 Bytes
  • Size of remote file: 130 Bytes
images/logo.png ADDED

Git LFS Details

  • SHA256: 42cd69760edf09ace35098ecea0404af85b510c6f60f70d91a0ecc4a1d9efbcf
  • Pointer size: 129 Bytes
  • Size of remote file: 4.23 kB
images/meta_small.svg ADDED
images/mirror.png ADDED

Git LFS Details

  • SHA256: 0735d4d8948c3cfb093a8f220b61b3df2e55d3560842425ec9b14bcf32eaeaa5
  • Pointer size: 128 Bytes
  • Size of remote file: 130 Bytes
images/mistral_small.png ADDED

Git LFS Details

  • SHA256: d340434a8eb4986b494036e98f8074a228c8668085bfcf2558d7f2483c75a6eb
  • Pointer size: 130 Bytes
  • Size of remote file: 11.8 kB
images/openai.svg ADDED
images/openai_small.svg ADDED
images/overall_x_y_plot.png ADDED

Git LFS Details

  • SHA256: 0b60b754c3b5c649408be3863969ea869733295239fa6bd16adb2264ed83ed5a
  • Pointer size: 132 Bytes
  • Size of remote file: 1.28 MB
images/toxic.png ADDED

Git LFS Details

  • SHA256: 1478ac7b66c2d13c63bcc21a130dba8d844d397b6009070b01a3aa19b608610c
  • Pointer size: 128 Bytes
  • Size of remote file: 130 Bytes
index.html ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+
3
+ <head>
4
+ <!-- Global site tag (gtag.js) - Google Analytics -->
5
+ <script async src="https://www.googletagmanager.com/gtag/js?id=UA-178132094-1"></script>
6
+ <script>
7
+ window.dataLayer = window.dataLayer || [];
8
+ function gtag() {
9
+ dataLayer.push(arguments);
10
+ }
11
+ gtag("js", new Date());
12
+
13
+ gtag("config", "UA-178132094-1");
14
+ </script>
15
+
16
+ <meta charset="UTF-8" />
17
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
18
+ <!-- <meta name="viewport" content="width=1024" /> -->
19
+ <title>OR-Bench: Over Refusal Benchmark</title>
20
+ <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
21
+ <link href="https://fonts.googleapis.com/css2?family=Montserrat:wght@400;700&display=swap" rel="stylesheet">
22
+ <script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js"></script>
23
+ <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css" integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T" crossorigin="anonymous">
24
+ <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
25
+ <script type="text/javascript" async
26
+ src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/MathJax.js?config=TeX-MML-AM_CHTML">
27
+ </script>
28
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/foundation/6.4.3/css/foundation.min.css" />
29
+ <link rel="stylesheet" href="https://cdn.rawgit.com/jpswalsh/academicons/master/css/academicons.min.css" />
30
+ <script src="https://kit.fontawesome.com/b939870cfb.js" crossorigin="anonymous"></script>
31
+ <link rel="stylesheet" href="https://cdn.datatables.net/1.10.24/css/dataTables.foundation.min.css">
32
+ <script type="text/javascript" src="https://cdn.datatables.net/1.10.24/js/jquery.dataTables.min.js"></script>
33
+ <link rel="stylesheet" href="./css/main.css" />
34
+ </head>
35
+
36
+
37
+ <body>
38
+ <nav class="navbar navbar-expand-md">
39
+ <div class="container">
40
+ <a class="navbar-brand" href="./index.html"
41
+ >OR-Bench</a>
42
+ <button
43
+ class="navbar-toggler navbar-light"
44
+ type="button"
45
+ data-toggle="collapse"
46
+ data-target="#main-navigation"
47
+ >
48
+ <span class="navbar-toggler-icon"></span>
49
+ </button>
50
+ <div class="collapse navbar-collapse" id="main-navigation">
51
+ <ul class="navbar-nav">
52
+ <li class="nav-item">
53
+ <a class="nav-link" href="#leaderboard">Leaderboards</a>
54
+ </li>
55
+ <li>
56
+ <a class="nav-link" href="https://huggingface.co/datasets/orbench-llm/or-bench" target="_blank">Datasets</a>
57
+ </li>
58
+ <li>
59
+ <a class="nav-link" href="https://huggingface.co/spaces/orbench-llm/or-bench-demo" target="_blank">Demo</a>
60
+ </li>
61
+ <li>
62
+ <a class="nav-link text-nowrap" href="https://github.com/orbench/or-bench"
63
+ target="_blank">Github</a>
64
+ </li>
65
+ </ul>
66
+ </div>
67
+ </div>
68
+ </nav>
69
+
70
+
71
+ <!-- <hr class="toprule" /> -->
72
+ <header>
73
+ <div class="header-block container">
74
+ <div class="title-logo"><img src="./images/logo.png" alt="logo" /></div>
75
+ <div class="title">OR-BENCH</div>
76
+ <div class="description">
77
+ An over-refusal benchmark for large language models
78
+ </div>
79
+ </div>
80
+ </header>
81
+ <!-- <hr class="toprule" /> -->
82
+
83
+ <div class="container">
84
+ <section id="introduction">
85
+ <div class="overview">
86
+ <p class="doublealign">
87
+ <b>Large Language Models (LLMs) </b> require careful safety alignment to prevent malicious outputs. While significant research focuses on mitigating harmful content generation,
88
+ the enhanced safety often come with the side effect of over-refusal, where LLMs may reject innocuous prompts and become less helpful.
89
+ Although the issue of over-refusal has been empirically observed, a systematic measurement is challenging
90
+ due to the difficulty of crafting prompts that appear harmful but are benign.<br><br>
91
+
92
+ We introduce OR-Bench, the <b>first large-scale over-refusal benchmark</b>. OR-Bench comprises 80,000 over-refusal prompts across 10 common rejection categories, a subset of around 1,000 hard prompts that are challenging even for state-of-the-art LLMs, and an additional 600 toxic prompts to prevent indiscriminate responses.<br><br>
93
+
94
+ We plot the evaluation results in the following figure. The x-axis is the over-refusal rate and the y-axis is the rejection rate on real toxic prompts. In the ideal case, the model should be on the top-left corner where the model rejects the most number of toixc prompts and the least number of safe prompts.
95
+ </p>
96
+ <div style="margin-top:20px"><img src="./images/overall_x_y_plot.png" style="width: 100%;"/></div>
97
+ </div>
98
+ </section>
99
+
100
+ <div class="divider"><hr /></div>
101
+
102
+ <section class="container" id="div_cifar10_ipc1_heading">
103
+ <div id="div_or_bench" class="display responsive nowrap" style="width:100%"></div>
104
+ </section>
105
+
106
+ <div class="divider"><hr /></div>
107
+ <!-- <script
108
+ type="module"
109
+ src="https://gradio.s3-us-west-2.amazonaws.com/4.31.0/gradio.js"
110
+ ></script> -->
111
+
112
+ <div><b>Please try out our demos below 🚀</b></div>
113
+ <div class="iframe-container">
114
+ <iframe
115
+ src="https://orbench-llm-or-bench-demo.hf.space"
116
+ frameborder="0"
117
+ width="2160"
118
+ height="450"
119
+ ></iframe>
120
+ </div>
121
+ <div class="vspace50"></div>
122
+ </div>
123
+
124
+ <hr class="bottomrule" />
125
+
126
+ <footer>
127
+ <small>&copy; 2024, OR-Bench
128
+ </footer>
129
+
130
+ <script>
131
+ // When the user scrolls the page, execute myFunction
132
+ window.onscroll = function () {
133
+ myFunction();
134
+ };
135
+ // Get the navbar
136
+ var navbar = document.getElementById("navbar");
137
+ // Get the offset position of the navbar
138
+ var sticky = navbar.offsetTop;
139
+ // Add the sticky class to the navbar when you reach its scroll position. Remove "sticky" when you leave the scroll position
140
+ function myFunction() {
141
+ if (window.pageYOffset >= sticky) {
142
+ navbar.classList.add("sticky");
143
+ } else {
144
+ navbar.classList.remove("sticky");
145
+ }
146
+ }
147
+ </script>
148
+ <script>
149
+ $("#div_or_bench").load("./data/or-bench.html", function() {
150
+ $('#or-bench-table').DataTable({
151
+ "pageLength": 25, // Set the initial number of entries
152
+ "lengthMenu": [[10, 25, 50, -1], [10, 25, 50, "All"]], // Set options for lengthMenu
153
+ "order": [[3, "asc"]], // Sort by the third column (index 2) in descending order
154
+ "paging": false, // Disables pagination
155
+ "responsive": true // Enable responsive feature
156
+ });
157
+ });
158
+ </script>
159
+ </body>
style.css ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body {
2
+ padding: 2rem;
3
+ font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
4
+ }
5
+
6
+ h1 {
7
+ font-size: 16px;
8
+ margin-top: 0;
9
+ }
10
+
11
+ p {
12
+ color: rgb(107, 114, 128);
13
+ font-size: 15px;
14
+ margin-bottom: 10px;
15
+ margin-top: 5px;
16
+ }
17
+
18
+ .card {
19
+ max-width: 620px;
20
+ margin: 0 auto;
21
+ padding: 16px;
22
+ border: 1px solid lightgray;
23
+ border-radius: 16px;
24
+ }
25
+
26
+ .card p:last-child {
27
+ margin-bottom: 0;
28
+ }