carbonnnnn commited on
Commit
b7fabea
·
1 Parent(s): d8f251a

first sample

Browse files
app.py CHANGED
@@ -1,11 +1,8 @@
1
  import pandas as pd
2
  import gradio as gr
3
- import asyncio # Ensure asyncio is imported
4
 
5
- from src.pricing import fetch_prices
6
-
7
-
8
- text_leaderboard = fetch_prices()
9
  llm_calc_app = gr.Blocks()
10
  with llm_calc_app:
11
 
 
1
  import pandas as pd
2
  import gradio as gr
3
+ import os
4
 
5
+ text_leaderboard = pd.read_csv(os.path.join('src', 'main_df.csv'))
 
 
 
6
  llm_calc_app = gr.Blocks()
7
  with llm_calc_app:
8
 
src/combined_data.json ADDED
@@ -0,0 +1,565 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "model_name": "Meta-Llama-3-70B-Instruct-hf",
4
+ "input_price": "0",
5
+ "output_price": "0",
6
+ "multimodality": {
7
+ "image": false,
8
+ "multiple_image": false,
9
+ "audio": false,
10
+ "video": false
11
+ },
12
+ "source": "https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct",
13
+ "licence": {
14
+ "name": "Meta Llama 3 License",
15
+ "url": "https://www.llama.com/llama3/license/"
16
+ },
17
+ "languages": ["eng"],
18
+ "release_date": "2024-04-18",
19
+ "parameters":{
20
+ "estimated": false,
21
+ "actual": "70B"
22
+ },
23
+ "open_weight": true,
24
+ "context": "8k"
25
+ },
26
+ {
27
+ "model_name": "Meta-Llama-3-8B-Instruct-hf",
28
+ "input_price": "0",
29
+ "output_price": "0",
30
+ "multimodality": {
31
+ "image": false,
32
+ "multiple_image": false,
33
+ "audio": false,
34
+ "video": false
35
+ },
36
+ "source": "https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct",
37
+ "licence": {
38
+ "name": "Meta Llama 3 License",
39
+ "url": "https://www.llama.com/llama3/license/"
40
+ },
41
+ "languages": ["eng"],
42
+ "release_date": "2024-04-18",
43
+ "parameters":{
44
+ "estimated": false,
45
+ "actual": "8B"
46
+ },
47
+ "open_weight": true,
48
+ "context": "8k"
49
+ },
50
+ {
51
+ "model_name": "Meta-Llama-3.1-405B-Instruct-Turbo",
52
+ "input_price": "0",
53
+ "output_price": "0",
54
+ "multimodality": {
55
+ "image": false,
56
+ "multiple_image": false,
57
+ "audio": false,
58
+ "video": false
59
+ },
60
+ "source": "https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct",
61
+ "licence": {
62
+ "name": "Llama 3.1 Community License",
63
+ "url": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE"
64
+ },
65
+ "languages": ["eng", "deu", "fra", "ita", "hin", "por", "spa", "tha"],
66
+ "release_date": "2024-07-23",
67
+ "parameters":{
68
+ "estimated": false,
69
+ "actual": "405B"
70
+ },
71
+ "open_weight": true,
72
+ "context": "128k"
73
+ },
74
+ {
75
+ "model_name": "Meta-Llama-3.1-70B-Instruct",
76
+ "input_price": "0",
77
+ "output_price": "0",
78
+ "multimodality": {
79
+ "image": false,
80
+ "multiple_image": false,
81
+ "audio": false,
82
+ "video": false
83
+ },
84
+ "source": "https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct",
85
+ "licence": {
86
+ "name": "Llama 3.1 Community License",
87
+ "url": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE"
88
+ },
89
+ "languages": ["eng", "deu", "fra", "ita", "hin", "por", "spa", "tha"],
90
+ "release_date": "2024-07-23",
91
+ "parameters": {
92
+ "estimated": false,
93
+ "actual": "70B"
94
+ },
95
+ "open_weight": true,
96
+ "context": "128k"
97
+ },
98
+ {
99
+ "model_name": "Meta-Llama-3.1-8B-Instruct",
100
+ "input_price": "0",
101
+ "output_price": "0",
102
+ "multimodality": {
103
+ "image": false,
104
+ "multiple_image": false,
105
+ "audio": false,
106
+ "video": false
107
+ },
108
+ "source": "https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct",
109
+ "licence": {
110
+ "name": "Llama 3.1 Community License",
111
+ "url": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE"
112
+ },
113
+ "languages": ["eng", "deu", "fra", "ita", "hin", "por", "spa", "tha"],
114
+ "release_date": "2024-07-23",
115
+ "parameters": {
116
+ "estimated": false,
117
+ "actual": "8B"
118
+ },
119
+ "open_weight": true,
120
+ "context": "128k"
121
+ },
122
+ {
123
+ "model_name": "InternVL2-40B",
124
+ "input_price": "0",
125
+ "output_price": "0",
126
+ "multimodality": {
127
+ "image": true,
128
+ "multiple_image": true,
129
+ "audio": false,
130
+ "video": false
131
+ },
132
+ "source": "https://huggingface.co/OpenGVLab/InternVL2-40B",
133
+ "licence": {
134
+ "name": "MIT",
135
+ "url": "https://choosealicense.com/licenses/mit/"
136
+ },
137
+ "languages": ["chi","eng","fre","spa","por","deu","ita","rus","jpn","kor","vie","tha","ara"],
138
+ "release_date": "2024-07-15",
139
+ "parameters": {
140
+ "estimated": false,
141
+ "actual": "40B"
142
+ },
143
+ "open_weight": true,
144
+ "context": "8k"
145
+ },
146
+ {
147
+ "model_name": "InternVL2-8B",
148
+ "input_price": "0",
149
+ "output_price": "0",
150
+ "multimodality": {
151
+ "image": true,
152
+ "multiple_image": true,
153
+ "audio": false,
154
+ "video": false
155
+ },
156
+ "source": "https://huggingface.co/OpenGVLab/InternVL2-8B",
157
+ "licence": {
158
+ "name": "MIT",
159
+ "url": "https://choosealicense.com/licenses/mit/"
160
+ },
161
+ "languages": ["chi","eng","fre","spa","por","deu","ita","rus","jpn","kor","vie","tha","ara"],
162
+ "release_date": "2024-07-15",
163
+ "parameters": {
164
+ "estimated": false,
165
+ "actual": "8B"
166
+ },
167
+ "open_weight": true,
168
+ "context": "8k"
169
+ },
170
+ {
171
+ "model_name": "InternVL2-Llama3-76B",
172
+ "input_price": "0",
173
+ "output_price": "0",
174
+ "multimodality": {
175
+ "image": true,
176
+ "multiple_image": true,
177
+ "audio": false,
178
+ "video": false
179
+ },
180
+ "source": "https://huggingface.co/OpenGVLab/InternVL2-Llama3-76B",
181
+ "licence": {
182
+ "name": "MIT",
183
+ "url": "https://choosealicense.com/licenses/mit/"
184
+ },
185
+ "languages": ["chi","eng","fre","spa","por","deu","ita","rus","jpn","kor","vie","tha","ara"],
186
+ "release_date": "2024-07-15",
187
+ "parameters": {
188
+ "estimated": false,
189
+ "actual": "76B"
190
+ },
191
+ "open_weight": true,
192
+ "context": "8k"
193
+ },
194
+ {
195
+ "model_name": "InternVL2-26B",
196
+ "input_price": "0",
197
+ "output_price": "0",
198
+ "multimodality": {
199
+ "image": true,
200
+ "multiple_image": true,
201
+ "audio": false,
202
+ "video": false
203
+ },
204
+ "source": "https://huggingface.co/OpenGVLab/InternVL2-26B",
205
+ "licence": {
206
+ "name": "MIT",
207
+ "url": "https://choosealicense.com/licenses/mit/"
208
+ },
209
+ "languages": ["chi","eng","fre","spa","por","deu","ita","rus","jpn","kor","vie","tha","ara"],
210
+ "release_date": "2024-07-15",
211
+ "parameters": {
212
+ "estimated": false,
213
+ "actual": "26B"
214
+ },
215
+ "open_weight": true,
216
+ "context": "8k"
217
+ },
218
+
219
+ {
220
+ "model_name": "InternVL2-26B",
221
+ "input_price": "0",
222
+ "output_price": "0",
223
+ "multimodality": {
224
+ "image": true,
225
+ "multiple_image": true,
226
+ "audio": false,
227
+ "video": false
228
+ },
229
+ "source": "https://huggingface.co/OpenGVLab/InternVL2-26B",
230
+ "licence": {
231
+ "name": "MIT",
232
+ "url": "https://choosealicense.com/licenses/mit/"
233
+ },
234
+ "languages": ["chi","eng","fre","spa","por","deu","ita","rus","jpn","kor","vie","tha","ara"],
235
+ "release_date": "2024-07-15",
236
+ "parameters": {
237
+ "estimated": false,
238
+ "actual": "26B"
239
+ },
240
+ "open_weight": true,
241
+ "context": "8k"
242
+ },
243
+ {
244
+ "model_name": "Mistral-Large-Instruct-2407",
245
+ "input_price": "0",
246
+ "output_price": "0",
247
+ "multimodality": {
248
+ "image": false,
249
+ "multiple_image": false,
250
+ "audio": false,
251
+ "video": false
252
+ },
253
+ "source": "https://huggingface.co/mistralai/Mistral-Large-Instruct-2407",
254
+ "licence": {
255
+ "name": "Apache 2.0",
256
+ "url": "https://www.apache.org/licenses/LICENSE-2.0"
257
+ },
258
+ "languages": ["eng", "fra", "spa", "deu", "ita", "rus", "chi", "jpn", "kor"],
259
+ "release_date": "2024-06-12",
260
+ "parameters": {
261
+ "estimated": false,
262
+ "actual": "70B"
263
+ },
264
+ "open_weight": true,
265
+ "context": "8k"
266
+ },
267
+ {
268
+ "model_name": "Mixtral-8x22B-Instruct-v0.1",
269
+ "input_price": "0",
270
+ "output_price": "0",
271
+ "multimodality": {
272
+ "image": false,
273
+ "multiple_image": false,
274
+ "audio": false,
275
+ "video": false
276
+ },
277
+ "source": "https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1",
278
+ "licence": {
279
+ "name": "Apache 2.0",
280
+ "url": "https://www.apache.org/licenses/LICENSE-2.0"
281
+ },
282
+ "languages": ["eng", "fra", "spa", "deu", "ita", "rus"],
283
+ "release_date": "2024-04-17",
284
+ "parameters": {
285
+ "estimated": false,
286
+ "actual": "141B"
287
+ },
288
+ "open_weight": true,
289
+ "context": "8k"
290
+ },
291
+ {
292
+ "model_name": "Mistral-7B-Instruct-v0.2",
293
+ "input_price": "0",
294
+ "output_price": "0",
295
+ "multimodality": {
296
+ "image": false,
297
+ "multiple_image": false,
298
+ "audio": false,
299
+ "video": false
300
+ },
301
+ "source": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2",
302
+ "licence": {
303
+ "name": "Apache 2.0",
304
+ "url": "https://www.apache.org/licenses/LICENSE-2.0"
305
+ },
306
+ "languages": ["eng", "fra", "spa", "deu", "ita", "rus", "chi"],
307
+ "release_date": "2024-01-15",
308
+ "parameters": {
309
+ "estimated": false,
310
+ "actual": "7B"
311
+ },
312
+ "open_weight": true,
313
+ "context": "8k"
314
+ },
315
+ {
316
+ "model_name": "Mistral-7B-Instruct-v0.1",
317
+ "input_price": "0",
318
+ "output_price": "0",
319
+ "multimodality": {
320
+ "image": false,
321
+ "multiple_image": false,
322
+ "audio": false,
323
+ "video": false
324
+ },
325
+ "source": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1",
326
+ "licence": {
327
+ "name": "Apache 2.0",
328
+ "url": "https://www.apache.org/licenses/LICENSE-2.0"
329
+ },
330
+ "languages": ["eng", "fra", "spa", "deu", "ita", "rus", "chi"],
331
+ "release_date": "2023-12-11",
332
+ "parameters": {
333
+ "estimated": false,
334
+ "actual": "7B"
335
+ },
336
+ "open_weight": true,
337
+ "context": "8k"
338
+ },
339
+ {
340
+ "model_name": "Mixtral-8x7B-Instruct-v0.1",
341
+ "input_price": "0",
342
+ "output_price": "0",
343
+ "multimodality": {
344
+ "image": false,
345
+ "multiple_image": false,
346
+ "audio": false,
347
+ "video": false
348
+ },
349
+ "source": "https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1",
350
+ "licence": {
351
+ "name": "Apache 2.0",
352
+ "url": "https://www.apache.org/licenses/LICENSE-2.0"
353
+ },
354
+ "languages": ["eng", "fra", "spa", "deu", "ita", "rus"],
355
+ "release_date": "2023-12-11",
356
+ "parameters": {
357
+ "estimated": false,
358
+ "actual": "46.7B"
359
+ },
360
+ "open_weight": true,
361
+ "context": "8k"
362
+ },
363
+ {
364
+ "model_name": "openchat-3.5-0106",
365
+ "input_price": "0",
366
+ "output_price": "0",
367
+ "multimodality": {
368
+ "image": false,
369
+ "multiple_image": false,
370
+ "audio": false,
371
+ "video": false
372
+ },
373
+ "source": "https://huggingface.co/openchat/openchat-3.5-0106",
374
+ "licence": {
375
+ "name": "Apache 2.0",
376
+ "url": "https://www.apache.org/licenses/LICENSE-2.0"
377
+ },
378
+ "languages": ["eng"],
379
+ "release_date": "2024-01-06",
380
+ "parameters": {
381
+ "estimated": false,
382
+ "actual": "7B"
383
+ },
384
+ "open_weight": true,
385
+ "context": "8k"
386
+ },
387
+ {
388
+ "model_name": "openchat-3.5-1210",
389
+ "input_price": "0",
390
+ "output_price": "0",
391
+ "multimodality": {
392
+ "image": false,
393
+ "multiple_image": false,
394
+ "audio": false,
395
+ "video": false
396
+ },
397
+ "source": "https://huggingface.co/openchat/openchat-3.5-1210",
398
+ "licence": {
399
+ "name": "Apache 2.0",
400
+ "url": "https://www.apache.org/licenses/LICENSE-2.0"
401
+ },
402
+ "languages": ["eng"],
403
+ "release_date": "2023-12-10",
404
+ "parameters": {
405
+ "estimated": false,
406
+ "actual": "7B"
407
+ },
408
+ "open_weight": true,
409
+ "context": "8k"
410
+ },
411
+ {
412
+ "model_name": "openchat_3.5",
413
+ "input_price": "0",
414
+ "output_price": "0",
415
+ "multimodality": {
416
+ "image": false,
417
+ "multiple_image": false,
418
+ "audio": false,
419
+ "video": false
420
+ },
421
+ "source": "https://huggingface.co/openchat/openchat_3.5",
422
+ "licence": {
423
+ "name": "Apache 2.0",
424
+ "url": "https://www.apache.org/licenses/LICENSE-2.0"
425
+ },
426
+ "languages": ["eng"],
427
+ "release_date": "2023-10-30",
428
+ "parameters": {
429
+ "estimated": false,
430
+ "actual": "7B"
431
+ },
432
+ "open_weight": true,
433
+ "context": "8k"
434
+ },
435
+ {
436
+ "model_name": "gpt-4o-mini-2024-07-18",
437
+ "input_price": "$0.150",
438
+ "output_price": "$0.600",
439
+ "multimodality": {
440
+ "image": true,
441
+ "multiple_image": true,
442
+ "audio": false,
443
+ "video": false
444
+ },
445
+ "source": "https://openai.com/api/pricing/",
446
+ "licence": {
447
+ "name": "Commercial License",
448
+ "url": "https://openai.com/policies/terms-of-use"
449
+ },
450
+ "languages": ["eng", "spa", "fra", "deu", "zho", "zht", "jpn", "kor", "ita", "por", "nld", "rus", "ara", "hin", "tur", "vie", "pol", "tha", "swe", "dan", "nor", "fin", "hun", "ces", "slk", "rom", "bul", "ukr", "lit", "lav", "est", "slv", "msa", "ind", "fil", "swz", "amh"],
451
+ "release_date": "2024-07-18",
452
+ "parameters": {
453
+ "estimated": true,
454
+ "actual": "8B"
455
+ },
456
+ "open_weight": false,
457
+ "context": "128k"
458
+ },
459
+ {
460
+ "model_name": "gpt-4o-2024-08-06",
461
+ "input_price": "$2.50",
462
+ "output_price": "$10.00",
463
+ "multimodality": {
464
+ "image": true,
465
+ "multiple_image": true,
466
+ "audio": false,
467
+ "video": false
468
+ },
469
+ "source": "https://openai.com/api/pricing/",
470
+ "licence": {
471
+ "name": "Commercial License",
472
+ "url": "https://openai.com/policies/terms-of-use"
473
+ },
474
+ "languages": ["eng", "spa", "fra", "deu", "zho", "zht", "jpn", "kor", "ita", "por", "nld", "rus", "ara", "hin", "tur", "vie", "pol", "tha", "swe", "dan", "nor", "fin", "hun", "ces", "slk", "rom", "bul", "ukr", "lit", "lav", "est", "slv", "msa", "ind", "fil", "swz", "amh"],
475
+ "release_date": "2024-08-06",
476
+ "parameters": {
477
+ "estimated": true,
478
+ "actual": "200B"
479
+ },
480
+ "open_weight": false,
481
+ "context": "128k"
482
+ },
483
+ {
484
+ "model_name": "gpt-4o-2024-05-13",
485
+ "input_price": "$2.50",
486
+ "output_price": "$10.00",
487
+ "multimodality": {
488
+ "image": true,
489
+ "multiple_image": true,
490
+ "audio": false,
491
+ "video": false
492
+ },
493
+ "source": "https://openai.com/api/pricing/",
494
+ "licence": {
495
+ "name": "Commercial License",
496
+ "url": "https://openai.com/policies/terms-of-use"
497
+ },
498
+ "languages": ["eng", "spa", "fra", "deu", "zho", "zht", "jpn", "kor", "ita", "por", "nld", "rus", "ara", "hin", "tur", "vie", "pol", "tha", "swe", "dan", "nor", "fin", "hun", "ces", "slk", "rom", "bul", "ukr", "lit", "lav", "est", "slv", "msa", "ind", "fil", "swz", "amh"],
499
+ "release_date": "2024-05-13",
500
+ "parameters": {
501
+ "estimated": true,
502
+ "actual": "200B"
503
+ },
504
+ "open_weight": false,
505
+ "context": "128k"
506
+ },
507
+ {
508
+ "model_name": "gpt-4-1106-vision-preview",
509
+ "input_price": "$10.00",
510
+ "output_price": "$30.00",
511
+ "multimodality": {
512
+ "image": true,
513
+ "multiple_image": true,
514
+ "audio": false,
515
+ "video": false
516
+ },
517
+ "source": "https://openai.com/api/pricing/",
518
+ "licence": {
519
+ "name": "Commercial License",
520
+ "url": "https://openai.com/policies/terms-of-use"
521
+ },
522
+ "languages": ["eng", "spa", "fra", "deu", "zho", "zht", "jpn", "kor", "ita", "por", "nld", "rus", "ara", "hin", "tur", "vie", "pol", "tha", "swe", "dan", "nor", "fin", "hun", "ces", "slk", "rom", "bul", "ukr", "lit", "lav", "est", "slv", "msa", "ind", "fil", "swz", "amh"],
523
+ "release_date": "2023-11-06",
524
+ "parameters": {
525
+ "estimated": "1.76T",
526
+ "actual": false
527
+ },
528
+ "open_weight": false,
529
+ "context": "128k"
530
+ },
531
+ {
532
+ "model_name": "gemini-1.5-flash-latest",
533
+ "input_price": "$0.075",
534
+ "output_price": "$0.30",
535
+ "additional_prices": {
536
+ "context_caching": "$0.01875",
537
+ "context_storage": "$1.00",
538
+ "image_input": "$0.02",
539
+ "image_output": "$0.04",
540
+ "video_input": "$0.02",
541
+ "video_output": "$0.04",
542
+ "audio_input": "$0.02",
543
+ "audio_output": "$0.04"
544
+ },
545
+ "multimodality": {
546
+ "image": true,
547
+ "multiple_image": true,
548
+ "audio": true,
549
+ "video": true
550
+ },
551
+ "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
552
+ "licence": {
553
+ "name": "Commercial License",
554
+ "url": ""
555
+ },
556
+ "languages": ["lit", "nor", "pol", "por", "ron", "rus", "srp", "slk", "slv", "spa", "swa", "swe", "tha", "tur", "ukr", "vie", "zho", "hrv", "ces", "dan", "nld", "eng", "est", "fin", "fra", "deu", "ell", "heb", "hin", "hun", "ind", "ita", "jpn", "kor", "lav", "ara", "ben", "bul"],
557
+ "release_date": "2024-05-24",
558
+ "parameters": {
559
+ "estimated": false,
560
+ "actual": false
561
+ },
562
+ "open_weight": false,
563
+ "context": ">128k"
564
+ }
565
+ ]
src/main_df.csv ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name,input_price,output_price,multimodality_image,multimodality_multiple_image,multimodality_audio,multimodality_video,source,licence_name,licence_url,languages,release_date,parameters_estimated,parameters_actual,open_weight,context,additional_prices_context_caching,additional_prices_context_storage,additional_prices_image_input,additional_prices_image_output,additional_prices_video_input,additional_prices_video_output,additional_prices_audio_input,additional_prices_audio_output,clemscore_v1.6.5_multimodal,clemscore_v1.6.5_ascii,clemscore_v1.6
2
+ Meta-Llama-3-70B-Instruct-hf,0,0,False,False,False,False,https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct,Meta Llama 3 License,https://www.llama.com/llama3/license/,eng,2024-04-18,False,70B,True,8k,,,,,,,,,0.0,0.0,35.11
3
+ Meta-Llama-3-8B-Instruct-hf,0,0,False,False,False,False,https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct,Meta Llama 3 License,https://www.llama.com/llama3/license/,eng,2024-04-18,False,8B,True,8k,,,,,,,,,0.0,0.0,19.99
4
+ Meta-Llama-3.1-405B-Instruct-Turbo,0,0,False,False,False,False,https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct,Llama 3.1 Community License,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE,"eng, deu, fra, ita, hin, por, spa, tha",2024-07-23,False,405B,True,128k,,,,,,,,,0.0,0.0,52.11
5
+ Meta-Llama-3.1-70B-Instruct,0,0,False,False,False,False,https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct,Llama 3.1 Community License,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE,"eng, deu, fra, ita, hin, por, spa, tha",2024-07-23,False,70B,True,128k,,,,,,,,,0.0,0.0,38.83
6
+ Meta-Llama-3.1-8B-Instruct,0,0,False,False,False,False,https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct,Llama 3.1 Community License,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE,"eng, deu, fra, ita, hin, por, spa, tha",2024-07-23,False,8B,True,128k,,,,,,,,,0.0,0.0,18.36
7
+ InternVL2-40B,0,0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-40B,MIT,https://choosealicense.com/licenses/mit/,"chi, eng, fre, spa, por, deu, ita, rus, jpn, kor, vie, tha, ara",2024-07-15,False,40B,True,8k,,,,,,,,,32.23,33.2,0.0
8
+ InternVL2-8B,0,0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-8B,MIT,https://choosealicense.com/licenses/mit/,"chi, eng, fre, spa, por, deu, ita, rus, jpn, kor, vie, tha, ara",2024-07-15,False,8B,True,8k,,,,,,,,,23.17,36.05,0.0
9
+ InternVL2-Llama3-76B,0,0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-Llama3-76B,MIT,https://choosealicense.com/licenses/mit/,"chi, eng, fre, spa, por, deu, ita, rus, jpn, kor, vie, tha, ara",2024-07-15,False,76B,True,8k,,,,,,,,,33.84,43.29,0.0
10
+ InternVL2-26B,0,0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-26B,MIT,https://choosealicense.com/licenses/mit/,"chi, eng, fre, spa, por, deu, ita, rus, jpn, kor, vie, tha, ara",2024-07-15,False,26B,True,8k,,,,,,,,,37.45,32.27,0.0
11
+ InternVL2-26B,0,0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-26B,MIT,https://choosealicense.com/licenses/mit/,"chi, eng, fre, spa, por, deu, ita, rus, jpn, kor, vie, tha, ara",2024-07-15,False,26B,True,8k,,,,,,,,,37.45,32.27,0.0
12
+ Mistral-Large-Instruct-2407,0,0,False,False,False,False,https://huggingface.co/mistralai/Mistral-Large-Instruct-2407,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"eng, fra, spa, deu, ita, rus, chi, jpn, kor",2024-06-12,False,70B,True,8k,,,,,,,,,0.0,0.0,45.39
13
+ Mixtral-8x22B-Instruct-v0.1,0,0,False,False,False,False,https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"eng, fra, spa, deu, ita, rus",2024-04-17,False,141B,True,8k,,,,,,,,,0.0,0.0,12.69
14
+ Mistral-7B-Instruct-v0.2,0,0,False,False,False,False,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"eng, fra, spa, deu, ita, rus, chi",2024-01-15,False,7B,True,8k,,,,,,,,,0.0,0.0,9.75
15
+ Mistral-7B-Instruct-v0.1,0,0,False,False,False,False,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"eng, fra, spa, deu, ita, rus, chi",2023-12-11,False,7B,True,8k,,,,,,,,,0.0,0.0,8.01
16
+ Mixtral-8x7B-Instruct-v0.1,0,0,False,False,False,False,https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"eng, fra, spa, deu, ita, rus",2023-12-11,False,46.7B,True,8k,,,,,,,,,0.0,0.0,8.17
17
+ openchat-3.5-0106,0,0,False,False,False,False,https://huggingface.co/openchat/openchat-3.5-0106,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,eng,2024-01-06,False,7B,True,8k,,,,,,,,,0.0,0.0,17.1
18
+ openchat-3.5-1210,0,0,False,False,False,False,https://huggingface.co/openchat/openchat-3.5-1210,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,eng,2023-12-10,False,7B,True,8k,,,,,,,,,0.0,0.0,18.22
19
+ openchat_3.5,0,0,False,False,False,False,https://huggingface.co/openchat/openchat_3.5,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,eng,2023-10-30,False,7B,True,8k,,,,,,,,,0.0,0.0,23.64
20
+ gpt-4o-mini-2024-07-18,$0.150,$0.600,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"eng, spa, fra, deu, zho, zht, jpn, kor, ita, por, nld, rus, ara, hin, tur, vie, pol, tha, swe, dan, nor, fin, hun, ces, slk, rom, bul, ukr, lit, lav, est, slv, msa, ind, fil, swz, amh",2024-07-18,True,8B,False,128k,,,,,,,,,58.46,63.87,34.64
21
+ gpt-4o-2024-08-06,$2.50,$10.00,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"eng, spa, fra, deu, zho, zht, jpn, kor, ita, por, nld, rus, ara, hin, tur, vie, pol, tha, swe, dan, nor, fin, hun, ces, slk, rom, bul, ukr, lit, lav, est, slv, msa, ind, fil, swz, amh",2024-08-06,True,200B,False,128k,,,,,,,,,80.04,80.96,47.71
22
+ gpt-4o-2024-05-13,$2.50,$10.00,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"eng, spa, fra, deu, zho, zht, jpn, kor, ita, por, nld, rus, ara, hin, tur, vie, pol, tha, swe, dan, nor, fin, hun, ces, slk, rom, bul, ukr, lit, lav, est, slv, msa, ind, fil, swz, amh",2024-05-13,True,200B,False,128k,,,,,,,,,69.56,82.72,48.34
23
+ gpt-4-1106-vision-preview,$10.00,$30.00,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"eng, spa, fra, deu, zho, zht, jpn, kor, ita, por, nld, rus, ara, hin, tur, vie, pol, tha, swe, dan, nor, fin, hun, ces, slk, rom, bul, ukr, lit, lav, est, slv, msa, ind, fil, swz, amh",2023-11-06,1.76T,False,False,128k,,,,,,,,,73.55,68.14,0.0
24
+ gemini-1.5-flash-latest,$0.075,$0.30,True,True,True,True,https://cloud.google.com/vertex-ai/generative-ai/pricing,Commercial License,,"lit, nor, pol, por, ron, rus, srp, slk, slv, spa, swa, swe, tha, tur, ukr, vie, zho, hrv, ces, dan, nld, eng, est, fin, fra, deu, ell, heb, hin, hun, ind, ita, jpn, kor, lav, ara, ben, bul",2024-05-24,False,False,False,>128k,$0.01875,$1.00,$0.02,$0.04,$0.02,$0.04,$0.02,$0.04,47.73,47.88,32.0
src/pricing.py CHANGED
@@ -1,5 +1,6 @@
1
  import pandas as pd
2
  import requests
 
3
 
4
  def fetch_prices():
5
  # Fetch the JSON data from the URL
@@ -13,18 +14,23 @@ def fetch_prices():
13
  extracted_data = []
14
  for entry in data:
15
  extracted_info = {
16
- "output_tokens": entry["fields"]["output_tokens"],
17
- "provider": entry["fields"]["provider"],
18
  "model_name": entry["fields"]["model_name"],
19
- "url": entry["fields"]["url"],
20
  "input_tokens": entry["fields"]["input_tokens"],
 
 
21
  "update_time": entry["fields"]["update_time"]
22
  }
23
  extracted_data.append(extracted_info)
24
 
25
  # Create a DataFrame from the extracted data
26
  df = pd.DataFrame(extracted_data)
27
- return df
 
 
28
  else:
29
  print(f"Failed to retrieve data: {response.status_code}")
30
- return None
 
 
 
 
1
  import pandas as pd
2
  import requests
3
+ import os
4
 
5
  def fetch_prices():
6
  # Fetch the JSON data from the URL
 
14
  extracted_data = []
15
  for entry in data:
16
  extracted_info = {
 
 
17
  "model_name": entry["fields"]["model_name"],
18
+ "provider": entry["fields"]["provider"],
19
  "input_tokens": entry["fields"]["input_tokens"],
20
+ "output_tokens": entry["fields"]["output_tokens"],
21
+ "url": entry["fields"]["url"],
22
  "update_time": entry["fields"]["update_time"]
23
  }
24
  extracted_data.append(extracted_info)
25
 
26
  # Create a DataFrame from the extracted data
27
  df = pd.DataFrame(extracted_data)
28
+ save_path = os.path.join('src', 'prices.csv')
29
+ df.to_csv(save_path, index=False) # Save the DataFrame as a CSV file
30
+ print(f"Saved the Prices as a CSV under {save_path}")
31
  else:
32
  print(f"Failed to retrieve data: {response.status_code}")
33
+ return None
34
+
35
+ if __name__ == '__main__':
36
+ fetch_prices()
src/process_data.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ... existing code ...
2
+ import pandas as pd
3
+ import json
4
+
5
+ # Load the JSON data
6
+ with open('src/combined_data.json') as f:
7
+ data = json.load(f)
8
+
9
+ # Flatten the data
10
+ flattened_data = []
11
+ for entry in data:
12
+ flattened_entry = {
13
+ "model_name": entry["model_name"],
14
+ "input_price": entry["input_price"],
15
+ "output_price": entry["output_price"],
16
+ "multimodality_image": entry["multimodality"]["image"],
17
+ "multimodality_multiple_image": entry["multimodality"]["multiple_image"],
18
+ "multimodality_audio": entry["multimodality"]["audio"],
19
+ "multimodality_video": entry["multimodality"]["video"],
20
+ "source": entry["source"],
21
+ "licence_name": entry["licence"]["name"],
22
+ "licence_url": entry["licence"]["url"],
23
+ "languages": ", ".join(entry["languages"]),
24
+ "release_date": entry["release_date"],
25
+ "parameters_estimated": entry["parameters"]["estimated"],
26
+ "parameters_actual": entry["parameters"]["actual"],
27
+ "open_weight": entry["open_weight"],
28
+ "context": entry["context"],
29
+ # ... additional prices ...
30
+ "additional_prices_context_caching": entry.get("additional_prices", {}).get("context_caching", None),
31
+ "additional_prices_context_storage": entry.get("additional_prices", {}).get("context_storage", None),
32
+ "additional_prices_image_input": entry.get("additional_prices", {}).get("image_input", None),
33
+ "additional_prices_image_output": entry.get("additional_prices", {}).get("image_output", None),
34
+ "additional_prices_video_input": entry.get("additional_prices", {}).get("video_input", None),
35
+ "additional_prices_video_output": entry.get("additional_prices", {}).get("video_output", None),
36
+ "additional_prices_audio_input": entry.get("additional_prices", {}).get("audio_input", None),
37
+ "additional_prices_audio_output": entry.get("additional_prices", {}).get("audio_output", None),
38
+ }
39
+ flattened_data.append(flattened_entry)
40
+
41
+ # Create a DataFrame
42
+ df = pd.DataFrame(flattened_data)
43
+
44
+ # Load the results CSV files
45
+ results_1_6_5_multimodal = pd.read_csv('src/results_1.6.5_multimodal.csv', header=None)
46
+ results_1_6_5_ascii = pd.read_csv('src/results_1.6.5_ascii.csv', header=None)
47
+ results_1_6 = pd.read_csv('src/results_1.6.csv', header=None)
48
+
49
+ # Split model names by '-t0.0' and use the first part
50
+ results_1_6_5_multimodal[0] = results_1_6_5_multimodal[0].str.split('-t0.0').str[0]
51
+ results_1_6_5_ascii[0] = results_1_6_5_ascii[0].str.split('-t0.0').str[0]
52
+ results_1_6[0] = results_1_6[0].str.split('-t0.0').str[0]
53
+
54
+
55
+ # Create a mapping for clemscore values
56
+ clemscore_map_1_6_5_multimodal = dict(zip(results_1_6_5_multimodal[0], results_1_6_5_multimodal[1]))
57
+ clemscore_map_1_6_5_ascii = dict(zip(results_1_6_5_ascii[0], results_1_6_5_ascii[1]))
58
+ clemscore_map_1_6 = dict(zip(results_1_6[0], results_1_6[1]))
59
+
60
+ print(clemscore_map_1_6)
61
+
62
+ # Add clemscore columns to the main DataFrame
63
+ df['clemscore_v1.6.5_multimodal'] = df['model_name'].map(clemscore_map_1_6_5_multimodal).fillna(0).astype(float)
64
+ df['clemscore_v1.6.5_ascii'] = df['model_name'].map(clemscore_map_1_6_5_ascii).fillna(0).astype(float)
65
+ df['clemscore_v1.6'] = df['model_name'].map(clemscore_map_1_6).fillna(0).astype(float)
66
+
67
+ # Save to CSV
68
+ df.to_csv('src/main_df.csv', index=False)
src/results_1.6.5_ascii.csv ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,"-, clemscore","all, Average % Played","all, Average Quality Score","matchit_ascii, % Played","matchit_ascii, Quality Score","matchit_ascii, Quality Score (std)","referencegame, % Played","referencegame, Quality Score","referencegame, Quality Score (std)","textmapworld, % Played","textmapworld, Quality Score","textmapworld, Quality Score (std)","textmapworld_graphreasoning, % Played","textmapworld_graphreasoning, Quality Score","textmapworld_graphreasoning, Quality Score (std)","textmapworld_specificroom, % Played","textmapworld_specificroom, Quality Score","textmapworld_specificroom, Quality Score (std)"
2
+ Idefics3-8B-Llama3-t0.0--Idefics3-8B-Llama3-t0.0,22.56,40.0,56.39,100.0,70.0,46.41,100.0,42.78,49.61,0.0,,,0.0,,,0.0,,
3
+ InternVL2-26B-t0.0--InternVL2-26B-t0.0,32.27,51.2,63.03,100.0,65.0,48.3,100.0,43.89,49.76,16.0,51.55,16.62,0.0,,,40.0,91.67,28.87
4
+ InternVL2-40B-t0.0--InternVL2-40B-t0.0,33.2,50.67,65.52,100.0,70.0,46.41,93.33,48.21,50.12,10.0,70.55,19.72,0.0,,,50.0,73.33,45.77
5
+ InternVL2-8B-t0.0--InternVL2-8B-t0.0,36.05,48.67,74.07,100.0,70.0,46.41,100.0,52.22,50.09,0.0,,,0.0,,,43.33,100.0,0.0
6
+ InternVL2-Llama3-76B-t0.0--InternVL2-Llama3-76B-t0.0,43.29,60.27,71.82,100.0,55.0,50.38,100.0,61.11,48.89,28.0,71.17,7.4,0.0,,,73.33,100.0,0.0
7
+ Phi-3-vision-128k-instruct-t0.0--Phi-3-vision-128k-instruct-t0.0,22.07,29.61,74.53,67.5,81.48,39.58,73.89,42.11,49.56,0.0,,,0.0,,,6.67,100.0,0.0
8
+ Phi-3.5-vision-instruct-t0.0--Phi-3.5-vision-instruct-t0.0,26.95,41.73,64.58,100.0,55.0,50.38,83.33,42.67,49.62,12.0,60.63,21.21,0.0,,,13.33,100.0,0.0
9
+ Pixtral-12B-2409-t0.0--Pixtral-12B-2409-t0.0,37.57,59.63,63.01,97.5,69.23,46.76,100.0,41.11,49.34,34.0,53.61,17.0,6.67,56.67,4.71,60.0,94.44,23.57
10
+ claude-3-5-sonnet-20240620-t0.0--claude-3-5-sonnet-20240620-t0.0,90.56,100.0,90.56,100.0,92.5,26.67,100.0,91.11,28.54,100.0,86.26,12.12,100.0,82.91,10.88,100.0,100.0,0.0
11
+ claude-3-opus-20240229-t0.0--claude-3-opus-20240229-t0.0,74.99,100.0,74.99,100.0,85.0,36.16,100.0,29.44,45.71,100.0,83.83,14.64,100.0,76.69,12.8,100.0,100.0,0.0
12
+ gemini-1.5-flash-latest-t0.0--gemini-1.5-flash-latest-t0.0,47.88,62.97,76.03,97.5,76.92,42.68,100.0,61.11,48.89,64.0,66.08,16.46,0.0,,,53.33,100.0,0.0
13
+ gpt-4-1106-vision-preview-t0.0--gpt-4-1106-vision-preview-t0.0,68.14,99.33,68.6,100.0,72.5,45.22,100.0,29.44,45.71,100.0,73.62,14.33,100.0,67.46,15.11,96.67,100.0,0.0
14
+ gpt-4o-2024-05-13-t0.0--gpt-4o-2024-05-13-t0.0,82.72,96.67,85.57,100.0,97.5,15.81,100.0,90.0,30.08,90.0,74.25,12.12,96.67,66.12,12.83,96.67,100.0,0.0
15
+ gpt-4o-2024-08-06-t0.0--gpt-4o-2024-08-06-t0.0,80.96,98.67,82.05,100.0,82.5,38.48,100.0,87.78,32.85,100.0,72.84,10.76,100.0,67.15,12.41,93.33,100.0,0.0
16
+ gpt-4o-mini-2024-07-18-t0.0--gpt-4o-mini-2024-07-18-t0.0,63.87,85.76,74.48,100.0,87.5,33.49,99.44,73.74,44.13,66.0,63.7,16.72,96.67,47.46,15.37,66.67,100.0,0.0
17
+ idefics-80b-instruct-t0.0--idefics-80b-instruct-t0.0,19.73,46.5,42.44,80.0,37.5,49.19,100.0,31.11,46.42,6.0,58.71,10.65,,,,0.0,,
18
+ idefics-9b-instruct-t0.0--idefics-9b-instruct-t0.0,7.66,22.56,33.97,100.0,37.5,49.03,12.78,30.43,47.05,0.0,,,0.0,,,0.0,,
19
+ internlm-xcomposer2d5-7b-t0.0--internlm-xcomposer2d5-7b-t0.0,19.69,25.47,77.32,100.0,62.5,49.03,0.0,,,4.0,69.47,13.4,0.0,,,23.33,100.0,0.0
src/results_1.6.5_multimodal.csv ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,"-, clemscore","all, Average % Played","all, Average Quality Score","matchit, % Played","matchit, Quality Score","matchit, Quality Score (std)","mm_mapworld, % Played","mm_mapworld, Quality Score","mm_mapworld, Quality Score (std)","mm_mapworld_graphs, % Played","mm_mapworld_graphs, Quality Score","mm_mapworld_graphs, Quality Score (std)","mm_mapworld_specificroom, % Played","mm_mapworld_specificroom, Quality Score","mm_mapworld_specificroom, Quality Score (std)","multimodal_referencegame, % Played","multimodal_referencegame, Quality Score","multimodal_referencegame, Quality Score (std)"
2
+ Idefics3-8B-Llama3-t0.0--Idefics3-8B-Llama3-t0.0,17.52,32.59,53.76,40.0,79.17,41.49,14.0,4.76,12.6,0.0,,,10.0,100.0,0.0,98.97,31.09,46.35
3
+ InternVL2-26B-t0.0--InternVL2-26B-t0.0,37.45,66.76,56.09,100.0,93.33,25.15,52.0,58.47,20.73,16.67,69.33,16.91,80.0,25.0,44.23,85.13,34.34,47.56
4
+ InternVL2-40B-t0.0--InternVL2-40B-t0.0,32.23,56.27,57.28,96.67,79.31,40.86,28.0,23.29,35.09,33.33,76.2,20.56,23.33,71.43,48.8,100.0,36.15,48.11
5
+ InternVL2-8B-t0.0--InternVL2-8B-t0.0,23.17,46.61,49.7,100.0,68.33,46.91,0.0,,,3.33,85.71,,43.33,7.69,27.74,86.41,37.09,48.38
6
+ InternVL2-Llama3-76B-t0.0--InternVL2-Llama3-76B-t0.0,33.84,54.8,61.76,100.0,90.0,30.25,34.0,57.15,18.59,3.33,54.55,,36.67,72.73,46.71,100.0,34.36,47.55
7
+ Phi-3-vision-128k-instruct-t0.0--Phi-3-vision-128k-instruct-t0.0,3.34,5.06,65.98,0.0,,,4.0,45.0,7.07,3.33,52.94,,0.0,,,17.95,100.0,0.0
8
+ Phi-3.5-vision-instruct-t0.0--Phi-3.5-vision-instruct-t0.0,15.64,40.67,38.46,100.0,0.0,0.0,0.0,,,3.33,100.0,,0.0,,,100.0,15.38,36.13
9
+ Pixtral-12B-2409-t0.0--Pixtral-12B-2409-t0.0,28.64,49.98,57.3,100.0,63.33,48.6,24.0,58.01,29.16,3.33,66.67,,43.33,53.85,51.89,79.23,44.66,49.79
10
+ claude-3-5-sonnet-20240620-t0.0--claude-3-5-sonnet-20240620-t0.0,80.77,95.33,84.73,100.0,85.0,36.01,100.0,82.41,11.7,76.67,85.23,15.36,100.0,90.0,30.51,100.0,81.03,39.26
11
+ claude-3-opus-20240229-t0.0--claude-3-opus-20240229-t0.0,68.16,99.33,68.62,100.0,81.67,39.02,100.0,75.79,14.43,96.67,85.12,13.27,100.0,53.33,50.74,100.0,47.18,49.98
12
+ dolphin-vision-72b-t0.0--dolphin-vision-72b-t0.0,4.65,7.88,58.95,16.67,90.0,31.62,4.0,60.61,8.57,3.33,0.0,,6.67,100.0,0.0,8.72,44.12,50.4
13
+ gemini-1.5-flash-latest-t0.0--gemini-1.5-flash-latest-t0.0,47.73,85.0,56.15,85.0,84.31,36.73,100.0,60.05,20.46,46.67,62.72,13.21,93.33,32.14,47.56,100.0,41.54,49.34
14
+ gpt-4-1106-vision-preview-t0.0--gpt-4-1106-vision-preview-t0.0,73.55,97.79,75.21,100.0,80.0,40.34,100.0,73.74,13.24,90.0,77.25,10.74,100.0,76.67,43.02,98.97,68.39,46.55
15
+ gpt-4o-2024-05-13-t0.0--gpt-4o-2024-05-13-t0.0,69.56,87.73,79.29,100.0,78.33,41.55,52.0,73.58,12.43,90.0,76.06,16.67,96.67,93.1,25.79,100.0,75.38,43.13
16
+ gpt-4o-2024-08-06-t0.0--gpt-4o-2024-08-06-t0.0,80.04,96.93,82.57,93.33,80.36,40.09,98.0,81.59,12.0,96.67,82.93,11.51,96.67,93.1,25.79,100.0,74.87,43.43
17
+ gpt-4o-mini-2024-07-18-t0.0--gpt-4o-mini-2024-07-18-t0.0,58.46,90.04,64.93,100.0,86.67,34.28,92.0,64.65,16.71,73.33,59.93,16.17,86.67,65.38,48.52,98.21,48.04,50.03
18
+ idefics-80b-instruct-t0.0--idefics-80b-instruct-t0.0,29.55,58.29,50.7,88.14,55.77,50.15,20.0,32.78,29.72,50.0,81.36,,33.33,50.0,52.7,100.0,33.59,47.29
19
+ idefics-9b-instruct-t0.0--idefics-9b-instruct-t0.0,12.29,38.0,32.34,100.0,33.33,47.54,0.0,,,0.0,,,0.0,,,90.0,31.34,46.45
20
+ internlm-xcomposer2d5-7b-t0.0--internlm-xcomposer2d5-7b-t0.0,16.95,20.18,83.98,98.33,77.97,41.8,0.0,,,0.0,,,0.0,,,2.56,90.0,31.62
src/results_1.6.csv ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,"-, clemscore","all, Average % Played","all, Average Quality Score","imagegame, % Played","imagegame, Quality Score","imagegame, Quality Score (std)","privateshared, % Played","privateshared, Quality Score","privateshared, Quality Score (std)","referencegame, % Played","referencegame, Quality Score","referencegame, Quality Score (std)","taboo, % Played","taboo, Quality Score","taboo, Quality Score (std)","wordle, % Played","wordle, Quality Score","wordle, Quality Score (std)","wordle_withclue, % Played","wordle_withclue, Quality Score","wordle_withclue, Quality Score (std)","wordle_withcritic, % Played","wordle_withcritic, Quality Score","wordle_withcritic, Quality Score (std)"
2
+ CodeLlama-34b-Instruct-hf-t0.0--CodeLlama-34b-Instruct-hf-t0.0,14.35,33.57,42.76,0.0,,,0.0,,,100.0,94.44,22.97,51.67,51.61,50.8,56.67,0.0,0.0,26.67,25.0,46.29,0.0,,
3
+ Meta-Llama-3-70B-Instruct-hf-t0.0--Meta-Llama-3-70B-Instruct-hf-t0.0,35.11,80.72,43.5,0.0,,,100.0,84.37,13.69,100.0,64.44,48.0,91.67,70.3,39.37,90.0,1.85,6.81,96.67,14.37,32.34,86.67,25.64,39.08
4
+ Meta-Llama-3-8B-Instruct-hf-t0.0--Meta-Llama-3-8B-Instruct-hf-t0.0,19.99,76.1,26.27,0.0,,,96.0,58.91,30.05,100.0,46.11,49.99,100.0,37.78,45.08,86.67,0.0,0.0,83.33,14.0,33.91,66.67,0.83,3.73
5
+ Meta-Llama-3.1-405B-Instruct-Turbo-t0.0--Meta-Llama-3.1-405B-Instruct-Turbo-t0.0,52.11,90.12,57.82,62.5,94.12,12.55,100.0,84.24,29.65,100.0,80.0,40.11,95.0,76.61,36.03,93.33,7.14,12.72,93.33,29.7,34.87,86.67,32.95,35.63
6
+ Meta-Llama-3.1-70B-Instruct-t0.0--Meta-Llama-3.1-70B-Instruct-t0.0,38.83,82.14,47.27,0.0,,,100.0,85.69,13.94,100.0,72.78,44.63,88.33,75.16,34.69,93.33,1.43,5.25,93.33,19.29,33.55,100.0,29.28,36.5
7
+ Meta-Llama-3.1-8B-Instruct-t0.0--Meta-Llama-3.1-8B-Instruct-t0.0,18.36,72.91,25.18,0.0,,,82.0,29.31,25.61,100.0,47.78,50.09,88.33,50.0,47.7,96.67,0.0,0.0,76.67,12.32,29.82,66.67,11.67,31.11
8
+ Mistral-7B-Instruct-v0.1-t0.0--Mistral-7B-Instruct-v0.1-t0.0,8.01,37.14,21.58,0.0,,,20.0,1.21,2.58,100.0,55.0,49.89,100.0,31.67,45.07,0.0,,,23.33,0.0,0.0,16.67,20.0,44.72
9
+ Mistral-7B-Instruct-v0.2-t0.0--Mistral-7B-Instruct-v0.2-t0.0,9.75,36.91,26.42,0.0,,,0.0,,,100.0,38.33,48.76,65.0,0.0,0.0,50.0,0.0,0.0,26.67,43.75,49.55,16.67,50.0,50.0
10
+ Mistral-Large-Instruct-2407-t0.0--Mistral-Large-Instruct-2407-t0.0,45.39,82.21,55.21,7.5,100.0,0.0,78.0,81.39,13.12,100.0,81.11,39.25,96.67,71.84,36.57,100.0,0.67,3.65,93.33,21.73,32.97,100.0,29.72,41.79
11
+ Mixtral-8x22B-Instruct-v0.1-t0.0--Mixtral-8x22B-Instruct-v0.1-t0.0,12.69,52.14,24.33,0.0,,,0.0,,,100.0,36.67,48.32,58.33,40.0,49.71,96.67,0.0,0.0,60.0,15.0,33.3,50.0,30.0,41.4
12
+ Mixtral-8x7B-Instruct-v0.1-t0.0--Mixtral-8x7B-Instruct-v0.1-t0.0,8.17,47.62,17.15,0.0,,,0.0,,,61.67,41.44,49.49,51.67,9.68,30.05,96.67,0.0,0.0,76.67,19.13,35.28,46.67,15.48,36.08
13
+ Nous-Hermes-2-Mixtral-8x7B-SFT-t0.0--Nous-Hermes-2-Mixtral-8x7B-SFT-t0.0,11.95,39.68,30.12,0.0,,,0.0,,,97.78,36.93,48.4,93.33,47.92,47.36,0.0,,,53.33,15.62,30.1,33.33,20.0,42.16
14
+ Phi-3-mini-128k-instruct-t0.0--Phi-3-mini-128k-instruct-t0.0,6.33,34.52,18.34,0.0,,,0.0,,,100.0,36.67,48.32,98.33,0.0,0.0,0.0,,,33.33,20.0,42.16,10.0,16.67,28.87
15
+ Qwen1.5-0.5B-Chat-t0.0--Qwen1.5-0.5B-Chat-t0.0,0.12,25.72,0.48,0.0,,,0.0,,,0.0,,,86.67,1.92,13.87,46.67,0.0,0.0,40.0,0.0,0.0,6.67,0.0,0.0
16
+ Qwen1.5-1.8B-Chat-t0.0--Qwen1.5-1.8B-Chat-t0.0,0.0,15.24,0.0,0.0,,,0.0,,,0.0,,,93.33,0.0,0.0,0.0,,,10.0,0.0,0.0,3.33,0.0,
17
+ Qwen1.5-14B-Chat-t0.0--Qwen1.5-14B-Chat-t0.0,16.8,40.95,41.02,30.0,20.58,14.69,0.0,,,100.0,44.44,49.83,46.67,41.07,47.25,90.0,0.0,0.0,16.67,40.0,54.77,3.33,100.0,
18
+ Qwen1.5-32B-Chat-t0.0--Qwen1.5-32B-Chat-t0.0,15.41,63.69,24.19,67.5,42.15,29.29,20.0,35.52,9.63,100.0,12.78,33.48,61.67,42.79,47.39,93.33,0.0,0.0,60.0,16.85,33.34,43.33,19.23,38.4
19
+ Qwen1.5-72B-Chat-t0.0--Qwen1.5-72B-Chat-t0.0,30.37,80.05,37.94,65.0,50.0,25.53,92.0,52.87,20.39,100.0,37.22,48.47,73.33,73.11,43.02,96.67,0.69,3.71,90.0,20.93,39.03,43.33,30.77,48.04
20
+ Qwen1.5-7B-Chat-t0.0--Qwen1.5-7B-Chat-t0.0,2.58,30.24,8.53,0.0,,,0.0,,,100.0,20.56,40.52,98.33,13.56,33.26,0.0,,,10.0,0.0,0.0,3.33,0.0,
21
+ Qwen2-72B-Instruct-t0.0--Qwen2-72B-Instruct-t0.0,30.03,74.52,40.3,0.0,,,80.0,65.69,22.85,100.0,67.22,47.07,91.67,70.61,40.31,100.0,2.67,10.48,86.67,12.82,29.56,63.33,22.81,33.31
22
+ Qwen2-7B-Instruct-t0.0--Qwen2-7B-Instruct-t0.0,6.18,35.32,17.51,5.0,23.0,1.41,0.0,,,98.89,41.01,49.32,86.67,41.03,46.14,26.67,0.0,0.0,26.67,0.0,0.0,3.33,0.0,
23
+ SUS-Chat-34B-t0.0--SUS-Chat-34B-t0.0,14.11,54.4,25.93,2.5,29.0,,20.0,0.0,0.0,100.0,70.0,45.95,98.33,52.26,45.64,93.33,0.0,0.0,43.33,23.08,43.85,23.33,7.14,18.9
24
+ Starling-LM-7B-beta-t0.0--Starling-LM-7B-beta-t0.0,6.56,30.89,21.25,0.0,,,4.0,97.12,4.08,62.22,30.36,46.19,46.67,0.0,0.0,66.67,0.0,0.0,33.33,0.0,0.0,3.33,0.0,
25
+ WizardLM-13b-v1.2-t0.0--WizardLM-13b-v1.2-t0.0,11.48,39.57,29.0,0.0,,,42.0,10.11,21.96,100.0,71.11,45.45,35.0,64.29,45.12,26.67,0.0,0.0,53.33,6.25,25.0,20.0,22.22,40.37
26
+ WizardLM-70b-v1.0-t0.0--WizardLM-70b-v1.0-t0.0,17.4,46.19,37.66,0.0,,,0.0,,,100.0,81.67,38.8,56.67,70.59,44.58,73.33,0.0,0.0,56.67,17.84,34.09,36.67,18.18,40.45
27
+ Yi-1.5-34B-Chat-t0.0--Yi-1.5-34B-Chat-t0.0,7.67,52.38,14.65,0.0,,,0.0,,,100.0,43.33,49.69,66.67,0.0,0.0,96.67,0.0,0.0,70.0,18.25,36.48,33.33,11.67,31.48
28
+ Yi-1.5-6B-Chat-t0.0--Yi-1.5-6B-Chat-t0.0,6.73,41.43,16.25,0.0,,,0.0,,,88.33,34.59,47.72,65.0,0.0,0.0,86.67,0.0,0.0,33.33,20.0,42.16,16.67,26.67,43.46
29
+ Yi-1.5-9B-Chat-t0.0--Yi-1.5-9B-Chat-t0.0,4.37,38.1,11.48,0.0,,,0.0,,,51.67,41.94,49.61,41.67,0.0,0.0,86.67,0.0,0.0,46.67,7.14,26.73,40.0,8.33,28.87
30
+ Yi-34B-Chat-t0.0--Yi-34B-Chat-t0.0,8.27,40.86,20.25,35.0,9.07,10.84,26.0,8.02,17.17,3.33,33.33,51.64,68.33,41.46,49.88,83.33,0.0,0.0,43.33,26.92,43.85,26.67,22.92,36.66
31
+ aya-23-35B-t0.0--aya-23-35B-t0.0,13.35,47.9,27.88,0.0,,,82.0,31.48,15.69,100.0,42.78,49.61,90.0,40.43,45.63,0.0,,,33.33,19.17,31.93,30.0,5.56,16.67
32
+ aya-23-8B-t0.0--aya-23-8B-t0.0,11.72,45.24,25.9,0.0,,,50.0,35.71,33.91,100.0,35.0,47.83,100.0,22.22,40.44,40.0,0.0,0.0,13.33,50.0,57.74,13.33,12.5,25.0
33
+ claude-2.1-t0.0--claude-2.1-t0.0,32.5,82.14,39.57,0.0,,,100.0,74.92,26.26,100.0,50.56,50.14,95.0,64.91,45.93,96.67,7.59,21.16,86.67,21.6,39.58,96.67,17.82,35.34
34
+ claude-3-5-sonnet-20240620-t0.0--claude-3-5-sonnet-20240620-t0.0,57.08,89.64,63.68,97.5,97.1,10.08,100.0,89.57,10.67,100.0,91.11,28.54,33.33,72.5,28.75,100.0,15.0,23.45,100.0,41.44,34.92,96.67,39.02,34.81
35
+ claude-3-haiku-20240307-t0.0--claude-3-haiku-20240307-t0.0,22.49,79.52,28.28,0.0,,,100.0,50.46,34.83,100.0,17.22,37.86,63.33,78.95,32.11,100.0,0.0,0.0,100.0,8.44,21.27,93.33,14.58,31.64
36
+ claude-3-opus-20240229-t0.0--claude-3-opus-20240229-t0.0,42.42,83.1,51.05,0.0,,,100.0,95.32,6.4,100.0,29.44,45.71,88.33,83.65,32.11,100.0,20.0,28.65,96.67,46.09,38.59,96.67,31.78,35.15
37
+ claude-3-sonnet-20240229-t0.0--claude-3-sonnet-20240229-t0.0,30.53,85.24,35.82,0.0,,,100.0,60.81,25.28,100.0,27.22,44.63,100.0,73.61,36.73,100.0,10.67,23.33,100.0,20.5,33.65,96.67,22.13,33.35
38
+ codegemma-7b-it-t0.0--codegemma-7b-it-t0.0,15.3,51.95,29.45,0.0,,,42.0,0.0,0.0,81.67,96.6,18.19,83.33,26.0,44.31,96.67,0.0,0.0,43.33,14.1,30.31,16.67,40.0,54.77
39
+ command-r-plus-t0.0--command-r-plus-t0.0,24.94,74.9,33.3,0.0,,,,,,99.44,47.49,50.08,63.33,67.11,45.44,100.0,7.33,19.82,93.33,26.79,37.91,93.33,17.8,32.58
40
+ command-r-t0.0--command-r-t0.0,14.15,61.67,22.95,0.0,,,,,,100.0,23.33,42.41,63.33,44.74,47.63,93.33,0.0,0.0,66.67,30.0,44.13,46.67,16.67,36.4
41
+ dolphin-2.5-mixtral-8x7b-t0.0--dolphin-2.5-mixtral-8x7b-t0.0,15.1,46.38,32.55,0.0,,,48.0,58.95,25.96,100.0,35.0,47.83,100.0,41.11,46.79,0.0,,,43.33,7.69,27.74,33.33,20.0,42.16
42
+ gemini-1.0-pro-t0.0--gemini-1.0-pro-t0.0,26.95,80.14,33.63,30.0,49.08,26.5,76.0,63.7,19.97,100.0,46.11,49.99,85.0,55.23,44.53,90.0,0.74,3.85,86.67,12.82,32.76,93.33,7.74,21.98
43
+ gemini-1.5-flash-latest-t0.0--gemini-1.5-flash-latest-t0.0,32.0,76.14,42.03,0.0,,,98.0,78.18,20.17,100.0,61.11,48.89,91.67,57.88,43.61,96.67,0.69,3.71,66.67,33.33,38.9,80.0,20.97,31.07
44
+ gemini-1.5-pro-latest-t0.0--gemini-1.5-pro-latest-t0.0,41.9,81.29,51.55,0.0,,,94.0,88.7,10.41,100.0,65.0,47.83,85.0,70.59,35.84,100.0,10.67,22.43,93.33,41.37,39.25,96.67,32.99,35.32
45
+ gemma-1.1-2b-it-t0.0--gemma-1.1-2b-it-t0.0,2.91,22.62,12.87,0.0,,,0.0,,,100.0,20.0,40.11,45.0,14.81,36.2,0.0,,,6.67,0.0,0.0,6.67,16.66,23.57
46
+ gemma-1.1-7b-it-t0.0--gemma-1.1-7b-it-t0.0,14.14,49.67,28.46,0.0,,,6.0,10.83,10.1,100.0,92.22,26.86,35.0,52.38,51.18,73.33,0.0,0.0,76.67,6.52,22.88,56.67,8.82,26.43
47
+ gemma-2-27b-it-t0.0--gemma-2-27b-it-t0.0,3.51,11.9,29.51,0.0,,,0.0,,,75.0,38.52,48.85,5.0,0.0,0.0,0.0,,,0.0,,,3.33,50.0,
48
+ gemma-2-2b-it-t0.0--gemma-2-2b-it-t0.0,2.67,38.33,6.96,0.0,,,0.0,,,0.0,,,98.33,0.0,0.0,100.0,0.0,0.0,46.67,10.71,28.95,23.33,17.14,37.29
49
+ gemma-2-9b-it-t0.0--gemma-2-9b-it-t0.0,27.34,75.48,36.22,0.0,,,70.0,53.52,40.57,100.0,42.22,49.53,78.33,77.66,38.27,100.0,1.67,9.13,93.33,17.26,33.48,86.67,25.0,43.01
50
+ gemma-7b-it-t0.0--gemma-7b-it-t0.0,1.82,17.78,10.23,0.0,,,0.0,,,97.78,40.91,49.31,0.0,,,3.33,0.0,,3.33,0.0,,20.0,0.0,0.0
51
+ gpt-3.5-turbo-0125-t0.0--gpt-3.5-turbo-0125-t0.0,27.22,89.67,30.36,70.0,64.18,29.33,96.0,36.7,31.04,100.0,3.33,18.0,68.33,73.17,41.98,100.0,0.0,0.0,96.67,24.25,40.95,96.67,10.92,27.56
52
+ gpt-4-0125-preview-t0.0--gpt-4-0125-preview-t0.0,52.5,94.92,55.31,100.0,99.6,1.53,100.0,90.22,6.92,99.44,31.84,46.72,75.0,93.33,20.23,100.0,20.67,27.66,100.0,33.17,42.87,90.0,18.33,32.69
53
+ gpt-4-0613-t0.0--gpt-4-0613-t0.0,51.09,94.88,53.85,77.5,98.19,10.06,100.0,97.33,4.12,100.0,35.56,48.0,86.67,79.81,33.22,100.0,9.0,25.78,100.0,36.78,40.4,100.0,20.28,29.17
54
+ gpt-4-1106-preview-t0.0--gpt-4-1106-preview-t0.0,51.99,98.1,53.0,95.0,94.34,10.24,100.0,87.08,10.69,100.0,29.44,45.71,91.67,83.94,29.57,100.0,13.0,27.56,100.0,29.0,39.53,100.0,34.22,39.55
55
+ gpt-4-turbo-2024-04-09-t0.0--gpt-4-turbo-2024-04-09-t0.0,58.3,94.88,61.45,82.5,99.79,1.22,100.0,92.68,9.45,100.0,88.89,31.51,85.0,82.35,30.81,100.0,16.33,31.35,100.0,29.89,39.18,96.67,20.23,28.21
56
+ gpt-4o-2024-05-13-t0.0--gpt-4o-2024-05-13-t0.0,48.34,85.71,56.4,0.0,,,100.0,94.66,5.56,100.0,90.0,30.08,100.0,75.28,35.19,100.0,19.33,28.52,100.0,28.0,36.38,100.0,31.11,33.14
57
+ gpt-4o-2024-08-06-t0.0--gpt-4o-2024-08-06-t0.0,47.71,85.71,55.66,0.0,,,100.0,90.36,8.32,100.0,87.78,32.85,100.0,85.28,25.51,100.0,23.0,30.53,100.0,23.94,34.28,100.0,23.61,30.72
58
+ gpt-4o-mini-2024-07-18-t0.0--gpt-4o-mini-2024-07-18-t0.0,34.64,85.06,40.73,0.0,,,96.0,59.27,19.82,99.44,73.74,44.13,100.0,69.72,40.18,100.0,10.33,23.56,100.0,15.67,31.78,100.0,15.67,31.17
59
+ llama-2-70b-chat-hf-t0.0--llama-2-70b-chat-hf-t0.0,0.81,7.14,11.31,0.0,,,0.0,,,46.67,22.62,42.09,0.0,,,0.0,,,3.33,0.0,,0.0,,
60
+ mistral-large-2402-t0.0--mistral-large-2402-t0.0,28.17,66.86,42.14,0.0,,,98.0,77.07,27.28,100.0,25.0,43.42,60.0,88.89,31.87,63.33,5.26,22.94,83.33,26.8,36.21,63.33,29.82,40.57
61
+ mistral-medium-2312-t0.0--mistral-medium-2312-t0.0,16.43,49.25,33.36,0.0,,,22.0,15.28,24.12,76.11,48.91,50.17,30.0,88.89,32.34,80.0,0.0,0.0,83.33,26.8,42.67,53.33,20.31,40.02
62
+ openchat-3.5-0106-t0.0--openchat-3.5-0106-t0.0,17.1,52.57,32.52,35.0,0.86,3.21,98.0,56.86,23.59,100.0,93.33,25.01,65.0,64.1,48.6,10.0,0.0,0.0,40.0,12.5,31.08,20.0,0.0,0.0
63
+ openchat-3.5-1210-t0.0--openchat-3.5-1210-t0.0,18.22,51.19,35.6,15.0,3.17,7.76,80.0,60.82,24.23,100.0,90.56,29.33,40.0,66.67,48.15,46.67,0.0,0.0,53.33,20.83,40.14,23.33,7.14,18.9
64
+ openchat_3.5-t0.0--openchat_3.5-t0.0,23.64,63.52,37.22,50.0,8.7,11.65,38.0,73.36,22.12,100.0,73.89,44.05,100.0,45.0,49.32,90.0,0.0,0.0,36.67,15.15,31.14,30.0,44.44,52.7
65
+ sheep-duck-llama-2-13b-t0.0--sheep-duck-llama-2-13b-t0.0,5.39,31.9,16.9,0.0,,,0.0,,,96.67,1.72,13.05,83.33,4.0,19.79,0.0,,,23.33,28.57,48.8,20.0,33.33,51.64
66
+ sheep-duck-llama-2-70b-v1.1-t0.0--sheep-duck-llama-2-70b-v1.1-t0.0,21.5,41.19,52.2,0.0,,,0.0,,,100.0,83.33,37.37,55.0,90.91,29.19,60.0,0.0,0.0,43.33,42.31,44.94,30.0,44.44,46.4
67
+ tulu-2-dpo-70b-t0.0--tulu-2-dpo-70b-t0.0,12.62,49.76,25.37,0.0,,,0.0,,,100.0,16.67,37.37,68.33,68.29,47.11,80.0,0.0,0.0,53.33,16.88,29.83,46.67,25.0,42.74
68
+ vicuna-13b-v1.5-t0.0--vicuna-13b-v1.5-t0.0,7.01,39.52,17.73,0.0,,,20.0,20.27,8.84,100.0,0.0,0.0,46.67,60.71,49.73,53.33,0.0,0.0,36.67,21.21,40.2,20.0,4.17,10.21
69
+ vicuna-33b-v1.3-t0.0--vicuna-33b-v1.3-t0.0,11.27,23.81,47.32,0.0,,,0.0,,,100.0,0.0,0.0,46.67,89.29,31.5,0.0,,,10.0,16.67,28.87,10.0,83.33,28.87