Yilun Jin commited on
Commit
b0464d1
·
1 Parent(s): 5459fa4

update overall leaderboard

Browse files
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
ShoppingMMLU_overall.json ADDED
@@ -0,0 +1,653 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "time": "241031154353",
3
+ "results": {
4
+ "Claude3-Sonnet": {
5
+ "META": {
6
+ "Method": [
7
+ "Claude3-Sonnet",
8
+ "https://aws.amazon.com/bedrock/claude/"
9
+ ],
10
+ "Parameters": "",
11
+ "Org": "Anthropic",
12
+ "OpenSource": "No",
13
+ "Verified": "Yes"
14
+ },
15
+ "Shopping Concept Understanding": {
16
+ "Overall": 80.75
17
+ },
18
+ "Shopping Knowledge Reasoning": {
19
+ "Overall": 71.63
20
+ },
21
+ "User Behavior Alignment": {
22
+ "Overall": 70.17
23
+ },
24
+ "Multi-lingual Abilities": {
25
+ "Overall": 67.76
26
+ }
27
+ },
28
+ "Claude2": {
29
+ "META": {
30
+ "Method": [
31
+ "Claude2",
32
+ "https://aws.amazon.com/bedrock/claude/"
33
+ ],
34
+ "Parameters": "",
35
+ "Org": "Anthropic",
36
+ "OpenSource": "No",
37
+ "Verified": "Yes"
38
+ },
39
+ "Shopping Concept Understanding": {
40
+ "Overall": 75.46
41
+ },
42
+ "Shopping Knowledge Reasoning": {
43
+ "Overall": 65.5
44
+ },
45
+ "User Behavior Alignment": {
46
+ "Overall": 63.53
47
+ },
48
+ "Multi-lingual Abilities": {
49
+ "Overall": 65.24
50
+ }
51
+ },
52
+ "ChatGPT": {
53
+ "META": {
54
+ "Method": [
55
+ "ChatGPT",
56
+ "https://platform.openai.com/docs/models#gpt-3-5-turbo"
57
+ ],
58
+ "Parameters": "",
59
+ "Org": "OpenAI",
60
+ "OpenSource": "No",
61
+ "Verified": "Yes"
62
+ },
63
+ "Shopping Concept Understanding": {
64
+ "Overall": 75.63
65
+ },
66
+ "Shopping Knowledge Reasoning": {
67
+ "Overall": 64.97
68
+ },
69
+ "User Behavior Alignment": {
70
+ "Overall": 59.79
71
+ },
72
+ "Multi-lingual Abilities": {
73
+ "Overall": 60.81
74
+ }
75
+ },
76
+ "LLaMA3-70B-Instruct": {
77
+ "META": {
78
+ "Method": [
79
+ "LLaMA3-70B-Instruct",
80
+ "https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct"
81
+ ],
82
+ "Parameters": "70B",
83
+ "Org": "Meta",
84
+ "OpenSource": "Yes",
85
+ "Verified": "Yes"
86
+ },
87
+ "Shopping Concept Understanding": {
88
+ "Overall": 75.24
89
+ },
90
+ "Shopping Knowledge Reasoning": {
91
+ "Overall": 69.29
92
+ },
93
+ "User Behavior Alignment": {
94
+ "Overall": 67.67
95
+ },
96
+ "Multi-lingual Abilities": {
97
+ "Overall": 62.0
98
+ }
99
+ },
100
+ "QWen1.5-72B": {
101
+ "META": {
102
+ "Method": [
103
+ "QWen1.5-72B",
104
+ "https://huggingface.co/Qwen/Qwen1.5-72B"
105
+ ],
106
+ "Parameters": "72B",
107
+ "Org": "Alibaba",
108
+ "OpenSource": "Yes",
109
+ "Verified": "Yes"
110
+ },
111
+ "Shopping Concept Understanding": {
112
+ "Overall": 71.67
113
+ },
114
+ "Shopping Knowledge Reasoning": {
115
+ "Overall": 68.92
116
+ },
117
+ "User Behavior Alignment": {
118
+ "Overall": 64.12
119
+ },
120
+ "Multi-lingual Abilities": {
121
+ "Overall": 64.84
122
+ }
123
+ },
124
+ "LLaMA3-70B": {
125
+ "META": {
126
+ "Method": [
127
+ "LLaMA3-70B",
128
+ "https://huggingface.co/meta-llama/Meta-Llama-3-70B"
129
+ ],
130
+ "Parameters": "70B",
131
+ "Org": "Meta",
132
+ "OpenSource": "Yes",
133
+ "Verified": "Yes"
134
+ },
135
+ "Shopping Concept Understanding": {
136
+ "Overall": 69.59
137
+ },
138
+ "Shopping Knowledge Reasoning": {
139
+ "Overall": 63.56
140
+ },
141
+ "User Behavior Alignment": {
142
+ "Overall": 55.77
143
+ },
144
+ "Multi-lingual Abilities": {
145
+ "Overall": 58.95
146
+ }
147
+ },
148
+ "LLaMA2-70B-Chat": {
149
+ "META": {
150
+ "Method": [
151
+ "LLaMA2-70B-Chat",
152
+ "https://huggingface.co/meta-llama/Llama-2-70b-chat-hf"
153
+ ],
154
+ "Parameters": "70B",
155
+ "Org": "Meta",
156
+ "OpenSource": "Yes",
157
+ "Verified": "Yes"
158
+ },
159
+ "Shopping Concept Understanding": {
160
+ "Overall": 61.84
161
+ },
162
+ "Shopping Knowledge Reasoning": {
163
+ "Overall": 40.73
164
+ },
165
+ "User Behavior Alignment": {
166
+ "Overall": 44.2
167
+ },
168
+ "Multi-lingual Abilities": {
169
+ "Overall": 47.04
170
+ }
171
+ },
172
+ "LLaMA2-70B": {
173
+ "META": {
174
+ "Method": [
175
+ "LLaMA2-70B",
176
+ "https://huggingface.co/meta-llama/Llama-2-70b-hf"
177
+ ],
178
+ "Parameters": "70B",
179
+ "Org": "Meta",
180
+ "OpenSource": "Yes",
181
+ "Verified": "Yes"
182
+ },
183
+ "Shopping Concept Understanding": {
184
+ "Overall": 61.05
185
+ },
186
+ "Shopping Knowledge Reasoning": {
187
+ "Overall": 55.87
188
+ },
189
+ "User Behavior Alignment": {
190
+ "Overall": 43.24
191
+ },
192
+ "Multi-lingual Abilities": {
193
+ "Overall": 47.85
194
+ }
195
+ },
196
+ "Mixtral-8x7B": {
197
+ "META": {
198
+ "Method": [
199
+ "Mixtral-8x7B",
200
+ "https://huggingface.co/mistralai/Mixtral-8x7B-v0.1"
201
+ ],
202
+ "Parameters": "46.7B",
203
+ "Org": "MistralAI",
204
+ "OpenSource": "Yes",
205
+ "Verified": "Yes"
206
+ },
207
+ "Shopping Concept Understanding": {
208
+ "Overall": 59.43
209
+ },
210
+ "Shopping Knowledge Reasoning": {
211
+ "Overall": 54.32
212
+ },
213
+ "User Behavior Alignment": {
214
+ "Overall": 55.31
215
+ },
216
+ "Multi-lingual Abilities": {
217
+ "Overall": 44.69
218
+ }
219
+ },
220
+ "QWen1.5-14B": {
221
+ "META": {
222
+ "Method": [
223
+ "QWen1.5-14B",
224
+ "https://huggingface.co/Qwen/Qwen1.5-14B"
225
+ ],
226
+ "Parameters": "14B",
227
+ "Org": "Alibaba",
228
+ "OpenSource": "Yes",
229
+ "Verified": "Yes"
230
+ },
231
+ "Shopping Concept Understanding": {
232
+ "Overall": 67.22
233
+ },
234
+ "Shopping Knowledge Reasoning": {
235
+ "Overall": 60.92
236
+ },
237
+ "User Behavior Alignment": {
238
+ "Overall": 54.92
239
+ },
240
+ "Multi-lingual Abilities": {
241
+ "Overall": 55.21
242
+ }
243
+ },
244
+ "eCeLLM-L": {
245
+ "META": {
246
+ "Method": [
247
+ "eCeLLM-L",
248
+ "https://huggingface.co/NingLab/eCeLLM-L"
249
+ ],
250
+ "Parameters": "13B",
251
+ "Org": "OSU NingLab",
252
+ "OpenSource": "Yes",
253
+ "Verified": "Yes"
254
+ },
255
+ "Shopping Concept Understanding": {
256
+ "Overall": 61.54
257
+ },
258
+ "Shopping Knowledge Reasoning": {
259
+ "Overall": 54.84
260
+ },
261
+ "User Behavior Alignment": {
262
+ "Overall": 54.55
263
+ },
264
+ "Multi-lingual Abilities": {
265
+ "Overall": 59.64
266
+ }
267
+ },
268
+ "Vicuna-13B-v1.5": {
269
+ "META": {
270
+ "Method": [
271
+ "Vicuna-13B-v1.5",
272
+ "https://huggingface.co/lmsys/vicuna-13b-v1.5"
273
+ ],
274
+ "Parameters": "13B",
275
+ "Org": "LMSys",
276
+ "OpenSource": "Yes",
277
+ "Verified": "Yes"
278
+ },
279
+ "Shopping Concept Understanding": {
280
+ "Overall": 59.64
281
+ },
282
+ "Shopping Knowledge Reasoning": {
283
+ "Overall": 52.63
284
+ },
285
+ "User Behavior Alignment": {
286
+ "Overall": 49.81
287
+ },
288
+ "Multi-lingual Abilities": {
289
+ "Overall": 49.64
290
+ }
291
+ },
292
+ "LLaMA2-13B-Chat": {
293
+ "META": {
294
+ "Method": [
295
+ "LLaMA2-13B-Chat",
296
+ "https://huggingface.co/meta-llama/Llama-2-13b-chat-hf"
297
+ ],
298
+ "Parameters": "13B",
299
+ "Org": "Meta",
300
+ "OpenSource": "Yes",
301
+ "Verified": "Yes"
302
+ },
303
+ "Shopping Concept Understanding": {
304
+ "Overall": 51.79
305
+ },
306
+ "Shopping Knowledge Reasoning": {
307
+ "Overall": 45.01
308
+ },
309
+ "User Behavior Alignment": {
310
+ "Overall": 39.95
311
+ },
312
+ "Multi-lingual Abilities": {
313
+ "Overall": 42.99
314
+ }
315
+ },
316
+ "LLaMA2-13B": {
317
+ "META": {
318
+ "Method": [
319
+ "LLaMA2-13B",
320
+ "https://huggingface.co/meta-llama/Llama-2-13b-hf"
321
+ ],
322
+ "Parameters": "13B",
323
+ "Org": "Meta",
324
+ "OpenSource": "Yes",
325
+ "Verified": "Yes"
326
+ },
327
+ "Shopping Concept Understanding": {
328
+ "Overall": 45.86
329
+ },
330
+ "Shopping Knowledge Reasoning": {
331
+ "Overall": 39.47
332
+ },
333
+ "User Behavior Alignment": {
334
+ "Overall": 39.43
335
+ },
336
+ "Multi-lingual Abilities": {
337
+ "Overall": 44.23
338
+ }
339
+ },
340
+ "LLaMA3-8B-Instruct": {
341
+ "META": {
342
+ "Method": [
343
+ "LLaMA3-8B-Instruct",
344
+ "https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct"
345
+ ],
346
+ "Parameters": "8B",
347
+ "Org": "Meta",
348
+ "OpenSource": "Yes",
349
+ "Verified": "Yes"
350
+ },
351
+ "Shopping Concept Understanding": {
352
+ "Overall": 65.26
353
+ },
354
+ "Shopping Knowledge Reasoning": {
355
+ "Overall": 56.84
356
+ },
357
+ "User Behavior Alignment": {
358
+ "Overall": 54.88
359
+ },
360
+ "Multi-lingual Abilities": {
361
+ "Overall": 55.37
362
+ }
363
+ },
364
+ "LLaMA3-8B": {
365
+ "META": {
366
+ "Method": [
367
+ "LLaMA3-8B",
368
+ "https://huggingface.co/meta-llama/Meta-Llama-3-8B"
369
+ ],
370
+ "Parameters": "8B",
371
+ "Org": "Meta",
372
+ "OpenSource": "Yes",
373
+ "Verified": "Yes"
374
+ },
375
+ "Shopping Concept Understanding": {
376
+ "Overall": 58.02
377
+ },
378
+ "Shopping Knowledge Reasoning": {
379
+ "Overall": 49.74
380
+ },
381
+ "User Behavior Alignment": {
382
+ "Overall": 44.16
383
+ },
384
+ "Multi-lingual Abilities": {
385
+ "Overall": 51.03
386
+ }
387
+ },
388
+ "QWen1.5-7B": {
389
+ "META": {
390
+ "Method": [
391
+ "QWen1.5-7B",
392
+ "https://huggingface.co/Qwen/Qwen1.5-7B"
393
+ ],
394
+ "Parameters": "7B",
395
+ "Org": "Alibaba",
396
+ "OpenSource": "Yes",
397
+ "Verified": "Yes"
398
+ },
399
+ "Shopping Concept Understanding": {
400
+ "Overall": 58.89
401
+ },
402
+ "Shopping Knowledge Reasoning": {
403
+ "Overall": 52.34
404
+ },
405
+ "User Behavior Alignment": {
406
+ "Overall": 49.81
407
+ },
408
+ "Multi-lingual Abilities": {
409
+ "Overall": 50.14
410
+ }
411
+ },
412
+ "eCeLLM-M": {
413
+ "META": {
414
+ "Method": [
415
+ "eCeLLM-M",
416
+ "https://huggingface.co/NingLab/eCeLLM-M"
417
+ ],
418
+ "Parameters": "7B",
419
+ "Org": "OSU NingLab",
420
+ "OpenSource": "Yes",
421
+ "Verified": "Yes"
422
+ },
423
+ "Shopping Concept Understanding": {
424
+ "Overall": 63.29
425
+ },
426
+ "Shopping Knowledge Reasoning": {
427
+ "Overall": 48.94
428
+ },
429
+ "User Behavior Alignment": {
430
+ "Overall": 53.78
431
+ },
432
+ "Multi-lingual Abilities": {
433
+ "Overall": 56.08
434
+ }
435
+ },
436
+ "Zephyr-Beta": {
437
+ "META": {
438
+ "Method": [
439
+ "Zephyr-Beta",
440
+ "https://huggingface.co/HuggingFaceH4/zephyr-7b-beta"
441
+ ],
442
+ "Parameters": "7B",
443
+ "Org": "HuggingFace H4",
444
+ "OpenSource": "Yes",
445
+ "Verified": "Yes"
446
+ },
447
+ "Shopping Concept Understanding": {
448
+ "Overall": 61.65
449
+ },
450
+ "Shopping Knowledge Reasoning": {
451
+ "Overall": 52.57
452
+ },
453
+ "User Behavior Alignment": {
454
+ "Overall": 44.73
455
+ },
456
+ "Multi-lingual Abilities": {
457
+ "Overall": 45.35
458
+ }
459
+ },
460
+ "Mistral-7B-Instruct": {
461
+ "META": {
462
+ "Method": [
463
+ "Mistral-7B-Instruct",
464
+ "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2"
465
+ ],
466
+ "Parameters": "7B",
467
+ "Org": "MistralAI",
468
+ "OpenSource": "Yes",
469
+ "Verified": "Yes"
470
+ },
471
+ "Shopping Concept Understanding": {
472
+ "Overall": 62.03
473
+ },
474
+ "Shopping Knowledge Reasoning": {
475
+ "Overall": 46.36
476
+ },
477
+ "User Behavior Alignment": {
478
+ "Overall": 42.21
479
+ },
480
+ "Multi-lingual Abilities": {
481
+ "Overall": 43.32
482
+ }
483
+ },
484
+ "Mistral-7B": {
485
+ "META": {
486
+ "Method": [
487
+ "Mistral-7B",
488
+ "https://huggingface.co/mistralai/Mistral-7B-v0.1"
489
+ ],
490
+ "Parameters": "7B",
491
+ "Org": "MistralAI",
492
+ "OpenSource": "Yes",
493
+ "Verified": "Yes"
494
+ },
495
+ "Shopping Concept Understanding": {
496
+ "Overall": 55.82
497
+ },
498
+ "Shopping Knowledge Reasoning": {
499
+ "Overall": 46.69
500
+ },
501
+ "User Behavior Alignment": {
502
+ "Overall": 46.27
503
+ },
504
+ "Multi-lingual Abilities": {
505
+ "Overall": 41.47
506
+ }
507
+ },
508
+ "Vicuna-7B-v1.5": {
509
+ "META": {
510
+ "Method": [
511
+ "Vicuna-7B-v1.5",
512
+ "https://huggingface.co/lmsys/vicuna-7b-v1.5"
513
+ ],
514
+ "Parameters": "7B",
515
+ "Org": "LMSys",
516
+ "OpenSource": "Yes",
517
+ "Verified": "Yes"
518
+ },
519
+ "Shopping Concept Understanding": {
520
+ "Overall": 53.46
521
+ },
522
+ "Shopping Knowledge Reasoning": {
523
+ "Overall": 45.06
524
+ },
525
+ "User Behavior Alignment": {
526
+ "Overall": 41.11
527
+ },
528
+ "Multi-lingual Abilities": {
529
+ "Overall": 43.82
530
+ }
531
+ },
532
+ "LLaMA2-7B-Chat": {
533
+ "META": {
534
+ "Method": [
535
+ "LLaMA2-7B-Chat",
536
+ "https://huggingface.co/meta-llama/Llama-2-7b-chat-hf"
537
+ ],
538
+ "Parameters": "7B",
539
+ "Org": "Meta",
540
+ "OpenSource": "Yes",
541
+ "Verified": "Yes"
542
+ },
543
+ "Shopping Concept Understanding": {
544
+ "Overall": 51.67
545
+ },
546
+ "Shopping Knowledge Reasoning": {
547
+ "Overall": 43.48
548
+ },
549
+ "User Behavior Alignment": {
550
+ "Overall": 41.42
551
+ },
552
+ "Multi-lingual Abilities": {
553
+ "Overall": 40.43
554
+ }
555
+ },
556
+ "LLaMA2-7B": {
557
+ "META": {
558
+ "Method": [
559
+ "LLaMA2-7B",
560
+ "https://huggingface.co/meta-llama/Llama-2-7b-hf"
561
+ ],
562
+ "Parameters": "7B",
563
+ "Org": "Meta",
564
+ "OpenSource": "Yes",
565
+ "Verified": "Yes"
566
+ },
567
+ "Shopping Concept Understanding": {
568
+ "Overall": 38.22
569
+ },
570
+ "Shopping Knowledge Reasoning": {
571
+ "Overall": 32.81
572
+ },
573
+ "User Behavior Alignment": {
574
+ "Overall": 32.56
575
+ },
576
+ "Multi-lingual Abilities": {
577
+ "Overall": 27.71
578
+ }
579
+ },
580
+ "QWen1.5-4B": {
581
+ "META": {
582
+ "Method": [
583
+ "QWen1.5-4B",
584
+ "https://huggingface.co/Qwen/Qwen1.5-4B"
585
+ ],
586
+ "Parameters": "4B",
587
+ "Org": "Alibaba",
588
+ "OpenSource": "Yes",
589
+ "Verified": "Yes"
590
+ },
591
+ "Shopping Concept Understanding": {
592
+ "Overall": 57.21
593
+ },
594
+ "Shopping Knowledge Reasoning": {
595
+ "Overall": 52.56
596
+ },
597
+ "User Behavior Alignment": {
598
+ "Overall": 42.74
599
+ },
600
+ "Multi-lingual Abilities": {
601
+ "Overall": 49.78
602
+ }
603
+ },
604
+ "Phi-2": {
605
+ "META": {
606
+ "Method": [
607
+ "Phi-2",
608
+ "https://huggingface.co/microsoft/phi-2"
609
+ ],
610
+ "Parameters": "2.8B",
611
+ "Org": "Microsoft",
612
+ "OpenSource": "Yes",
613
+ "Verified": "Yes"
614
+ },
615
+ "Shopping Concept Understanding": {
616
+ "Overall": 49.34
617
+ },
618
+ "Shopping Knowledge Reasoning": {
619
+ "Overall": 42.83
620
+ },
621
+ "User Behavior Alignment": {
622
+ "Overall": 36.38
623
+ },
624
+ "Multi-lingual Abilities": {
625
+ "Overall": 32.91
626
+ }
627
+ },
628
+ "eCeLLM-S": {
629
+ "META": {
630
+ "Method": [
631
+ "eCeLLM-S",
632
+ "https://huggingface.co/NingLab/eCeLLM-S"
633
+ ],
634
+ "Parameters": "2.8B",
635
+ "Org": "OSU NingLab",
636
+ "OpenSource": "Yes",
637
+ "Verified": "Yes"
638
+ },
639
+ "Shopping Concept Understanding": {
640
+ "Overall": 49.4
641
+ },
642
+ "Shopping Knowledge Reasoning": {
643
+ "Overall": 39.06
644
+ },
645
+ "User Behavior Alignment": {
646
+ "Overall": 36.33
647
+ },
648
+ "Multi-lingual Abilities": {
649
+ "Overall": 32.79
650
+ }
651
+ }
652
+ }
653
+ }
__pycache__/meta_data.cpython-38.pyc CHANGED
Binary files a/__pycache__/meta_data.cpython-38.pyc and b/__pycache__/meta_data.cpython-38.pyc differ
 
app.py CHANGED
@@ -11,8 +11,8 @@ with gr.Blocks() as demo:
11
  EVAL_TIME = format_timestamp(timestamp)
12
  results = struct['results']
13
  N_MODEL = len(results)
14
- N_DATA = len(results['GPT-4o (0513, detail-high)']) - 1
15
- DATASETS = list(results['GPT-4o (0513, detail-high)'])
16
  DATASETS.remove('META')
17
  print(DATASETS)
18
 
 
11
  EVAL_TIME = format_timestamp(timestamp)
12
  results = struct['results']
13
  N_MODEL = len(results)
14
+ N_DATA = len(results['Claude3-Sonnet']) - 1
15
+ DATASETS = list(results['Claude3-Sonnet'])
16
  DATASETS.remove('META')
17
  print(DATASETS)
18
 
meta_data.py CHANGED
@@ -1,6 +1,6 @@
1
  # CONSTANTS-URL
2
  URL = "http://opencompass.openxlab.space/assets/OpenVLM.json"
3
- RESULTS = 'ShoppingMMLU.json'
4
  SHOPPINGMMLU_README = 'https://raw.githubusercontent.com/KL4805/ShoppingMMLU/refs/heads/main/README.md'
5
  # CONSTANTS-CITATION
6
  CITATION_BUTTON_TEXT = r"""@article{jin2024shopping,
 
1
  # CONSTANTS-URL
2
  URL = "http://opencompass.openxlab.space/assets/OpenVLM.json"
3
+ RESULTS = 'ShoppingMMLU_overall.json'
4
  SHOPPINGMMLU_README = 'https://raw.githubusercontent.com/KL4805/ShoppingMMLU/refs/heads/main/README.md'
5
  # CONSTANTS-CITATION
6
  CITATION_BUTTON_TEXT = r"""@article{jin2024shopping,