Commit
·
b7fabea
1
Parent(s):
d8f251a
first sample
Browse files- app.py +2 -5
- src/combined_data.json +565 -0
- src/main_df.csv +24 -0
- src/pricing.py +11 -5
- src/process_data.py +68 -0
- src/results_1.6.5_ascii.csv +19 -0
- src/results_1.6.5_multimodal.csv +20 -0
- src/results_1.6.csv +69 -0
app.py
CHANGED
@@ -1,11 +1,8 @@
|
|
1 |
import pandas as pd
|
2 |
import gradio as gr
|
3 |
-
import
|
4 |
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
text_leaderboard = fetch_prices()
|
9 |
llm_calc_app = gr.Blocks()
|
10 |
with llm_calc_app:
|
11 |
|
|
|
1 |
import pandas as pd
|
2 |
import gradio as gr
|
3 |
+
import os
|
4 |
|
5 |
+
text_leaderboard = pd.read_csv(os.path.join('src', 'main_df.csv'))
|
|
|
|
|
|
|
6 |
llm_calc_app = gr.Blocks()
|
7 |
with llm_calc_app:
|
8 |
|
src/combined_data.json
ADDED
@@ -0,0 +1,565 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"model_name": "Meta-Llama-3-70B-Instruct-hf",
|
4 |
+
"input_price": "0",
|
5 |
+
"output_price": "0",
|
6 |
+
"multimodality": {
|
7 |
+
"image": false,
|
8 |
+
"multiple_image": false,
|
9 |
+
"audio": false,
|
10 |
+
"video": false
|
11 |
+
},
|
12 |
+
"source": "https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct",
|
13 |
+
"licence": {
|
14 |
+
"name": "Meta Llama 3 License",
|
15 |
+
"url": "https://www.llama.com/llama3/license/"
|
16 |
+
},
|
17 |
+
"languages": ["eng"],
|
18 |
+
"release_date": "2024-04-18",
|
19 |
+
"parameters":{
|
20 |
+
"estimated": false,
|
21 |
+
"actual": "70B"
|
22 |
+
},
|
23 |
+
"open_weight": true,
|
24 |
+
"context": "8k"
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"model_name": "Meta-Llama-3-8B-Instruct-hf",
|
28 |
+
"input_price": "0",
|
29 |
+
"output_price": "0",
|
30 |
+
"multimodality": {
|
31 |
+
"image": false,
|
32 |
+
"multiple_image": false,
|
33 |
+
"audio": false,
|
34 |
+
"video": false
|
35 |
+
},
|
36 |
+
"source": "https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct",
|
37 |
+
"licence": {
|
38 |
+
"name": "Meta Llama 3 License",
|
39 |
+
"url": "https://www.llama.com/llama3/license/"
|
40 |
+
},
|
41 |
+
"languages": ["eng"],
|
42 |
+
"release_date": "2024-04-18",
|
43 |
+
"parameters":{
|
44 |
+
"estimated": false,
|
45 |
+
"actual": "8B"
|
46 |
+
},
|
47 |
+
"open_weight": true,
|
48 |
+
"context": "8k"
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"model_name": "Meta-Llama-3.1-405B-Instruct-Turbo",
|
52 |
+
"input_price": "0",
|
53 |
+
"output_price": "0",
|
54 |
+
"multimodality": {
|
55 |
+
"image": false,
|
56 |
+
"multiple_image": false,
|
57 |
+
"audio": false,
|
58 |
+
"video": false
|
59 |
+
},
|
60 |
+
"source": "https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct",
|
61 |
+
"licence": {
|
62 |
+
"name": "Llama 3.1 Community License",
|
63 |
+
"url": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE"
|
64 |
+
},
|
65 |
+
"languages": ["eng", "deu", "fra", "ita", "hin", "por", "spa", "tha"],
|
66 |
+
"release_date": "2024-07-23",
|
67 |
+
"parameters":{
|
68 |
+
"estimated": false,
|
69 |
+
"actual": "405B"
|
70 |
+
},
|
71 |
+
"open_weight": true,
|
72 |
+
"context": "128k"
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"model_name": "Meta-Llama-3.1-70B-Instruct",
|
76 |
+
"input_price": "0",
|
77 |
+
"output_price": "0",
|
78 |
+
"multimodality": {
|
79 |
+
"image": false,
|
80 |
+
"multiple_image": false,
|
81 |
+
"audio": false,
|
82 |
+
"video": false
|
83 |
+
},
|
84 |
+
"source": "https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct",
|
85 |
+
"licence": {
|
86 |
+
"name": "Llama 3.1 Community License",
|
87 |
+
"url": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE"
|
88 |
+
},
|
89 |
+
"languages": ["eng", "deu", "fra", "ita", "hin", "por", "spa", "tha"],
|
90 |
+
"release_date": "2024-07-23",
|
91 |
+
"parameters": {
|
92 |
+
"estimated": false,
|
93 |
+
"actual": "70B"
|
94 |
+
},
|
95 |
+
"open_weight": true,
|
96 |
+
"context": "128k"
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"model_name": "Meta-Llama-3.1-8B-Instruct",
|
100 |
+
"input_price": "0",
|
101 |
+
"output_price": "0",
|
102 |
+
"multimodality": {
|
103 |
+
"image": false,
|
104 |
+
"multiple_image": false,
|
105 |
+
"audio": false,
|
106 |
+
"video": false
|
107 |
+
},
|
108 |
+
"source": "https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct",
|
109 |
+
"licence": {
|
110 |
+
"name": "Llama 3.1 Community License",
|
111 |
+
"url": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE"
|
112 |
+
},
|
113 |
+
"languages": ["eng", "deu", "fra", "ita", "hin", "por", "spa", "tha"],
|
114 |
+
"release_date": "2024-07-23",
|
115 |
+
"parameters": {
|
116 |
+
"estimated": false,
|
117 |
+
"actual": "8B"
|
118 |
+
},
|
119 |
+
"open_weight": true,
|
120 |
+
"context": "128k"
|
121 |
+
},
|
122 |
+
{
|
123 |
+
"model_name": "InternVL2-40B",
|
124 |
+
"input_price": "0",
|
125 |
+
"output_price": "0",
|
126 |
+
"multimodality": {
|
127 |
+
"image": true,
|
128 |
+
"multiple_image": true,
|
129 |
+
"audio": false,
|
130 |
+
"video": false
|
131 |
+
},
|
132 |
+
"source": "https://huggingface.co/OpenGVLab/InternVL2-40B",
|
133 |
+
"licence": {
|
134 |
+
"name": "MIT",
|
135 |
+
"url": "https://choosealicense.com/licenses/mit/"
|
136 |
+
},
|
137 |
+
"languages": ["chi","eng","fre","spa","por","deu","ita","rus","jpn","kor","vie","tha","ara"],
|
138 |
+
"release_date": "2024-07-15",
|
139 |
+
"parameters": {
|
140 |
+
"estimated": false,
|
141 |
+
"actual": "40B"
|
142 |
+
},
|
143 |
+
"open_weight": true,
|
144 |
+
"context": "8k"
|
145 |
+
},
|
146 |
+
{
|
147 |
+
"model_name": "InternVL2-8B",
|
148 |
+
"input_price": "0",
|
149 |
+
"output_price": "0",
|
150 |
+
"multimodality": {
|
151 |
+
"image": true,
|
152 |
+
"multiple_image": true,
|
153 |
+
"audio": false,
|
154 |
+
"video": false
|
155 |
+
},
|
156 |
+
"source": "https://huggingface.co/OpenGVLab/InternVL2-8B",
|
157 |
+
"licence": {
|
158 |
+
"name": "MIT",
|
159 |
+
"url": "https://choosealicense.com/licenses/mit/"
|
160 |
+
},
|
161 |
+
"languages": ["chi","eng","fre","spa","por","deu","ita","rus","jpn","kor","vie","tha","ara"],
|
162 |
+
"release_date": "2024-07-15",
|
163 |
+
"parameters": {
|
164 |
+
"estimated": false,
|
165 |
+
"actual": "8B"
|
166 |
+
},
|
167 |
+
"open_weight": true,
|
168 |
+
"context": "8k"
|
169 |
+
},
|
170 |
+
{
|
171 |
+
"model_name": "InternVL2-Llama3-76B",
|
172 |
+
"input_price": "0",
|
173 |
+
"output_price": "0",
|
174 |
+
"multimodality": {
|
175 |
+
"image": true,
|
176 |
+
"multiple_image": true,
|
177 |
+
"audio": false,
|
178 |
+
"video": false
|
179 |
+
},
|
180 |
+
"source": "https://huggingface.co/OpenGVLab/InternVL2-Llama3-76B",
|
181 |
+
"licence": {
|
182 |
+
"name": "MIT",
|
183 |
+
"url": "https://choosealicense.com/licenses/mit/"
|
184 |
+
},
|
185 |
+
"languages": ["chi","eng","fre","spa","por","deu","ita","rus","jpn","kor","vie","tha","ara"],
|
186 |
+
"release_date": "2024-07-15",
|
187 |
+
"parameters": {
|
188 |
+
"estimated": false,
|
189 |
+
"actual": "76B"
|
190 |
+
},
|
191 |
+
"open_weight": true,
|
192 |
+
"context": "8k"
|
193 |
+
},
|
194 |
+
{
|
195 |
+
"model_name": "InternVL2-26B",
|
196 |
+
"input_price": "0",
|
197 |
+
"output_price": "0",
|
198 |
+
"multimodality": {
|
199 |
+
"image": true,
|
200 |
+
"multiple_image": true,
|
201 |
+
"audio": false,
|
202 |
+
"video": false
|
203 |
+
},
|
204 |
+
"source": "https://huggingface.co/OpenGVLab/InternVL2-26B",
|
205 |
+
"licence": {
|
206 |
+
"name": "MIT",
|
207 |
+
"url": "https://choosealicense.com/licenses/mit/"
|
208 |
+
},
|
209 |
+
"languages": ["chi","eng","fre","spa","por","deu","ita","rus","jpn","kor","vie","tha","ara"],
|
210 |
+
"release_date": "2024-07-15",
|
211 |
+
"parameters": {
|
212 |
+
"estimated": false,
|
213 |
+
"actual": "26B"
|
214 |
+
},
|
215 |
+
"open_weight": true,
|
216 |
+
"context": "8k"
|
217 |
+
},
|
218 |
+
|
219 |
+
{
|
220 |
+
"model_name": "InternVL2-26B",
|
221 |
+
"input_price": "0",
|
222 |
+
"output_price": "0",
|
223 |
+
"multimodality": {
|
224 |
+
"image": true,
|
225 |
+
"multiple_image": true,
|
226 |
+
"audio": false,
|
227 |
+
"video": false
|
228 |
+
},
|
229 |
+
"source": "https://huggingface.co/OpenGVLab/InternVL2-26B",
|
230 |
+
"licence": {
|
231 |
+
"name": "MIT",
|
232 |
+
"url": "https://choosealicense.com/licenses/mit/"
|
233 |
+
},
|
234 |
+
"languages": ["chi","eng","fre","spa","por","deu","ita","rus","jpn","kor","vie","tha","ara"],
|
235 |
+
"release_date": "2024-07-15",
|
236 |
+
"parameters": {
|
237 |
+
"estimated": false,
|
238 |
+
"actual": "26B"
|
239 |
+
},
|
240 |
+
"open_weight": true,
|
241 |
+
"context": "8k"
|
242 |
+
},
|
243 |
+
{
|
244 |
+
"model_name": "Mistral-Large-Instruct-2407",
|
245 |
+
"input_price": "0",
|
246 |
+
"output_price": "0",
|
247 |
+
"multimodality": {
|
248 |
+
"image": false,
|
249 |
+
"multiple_image": false,
|
250 |
+
"audio": false,
|
251 |
+
"video": false
|
252 |
+
},
|
253 |
+
"source": "https://huggingface.co/mistralai/Mistral-Large-Instruct-2407",
|
254 |
+
"licence": {
|
255 |
+
"name": "Apache 2.0",
|
256 |
+
"url": "https://www.apache.org/licenses/LICENSE-2.0"
|
257 |
+
},
|
258 |
+
"languages": ["eng", "fra", "spa", "deu", "ita", "rus", "chi", "jpn", "kor"],
|
259 |
+
"release_date": "2024-06-12",
|
260 |
+
"parameters": {
|
261 |
+
"estimated": false,
|
262 |
+
"actual": "70B"
|
263 |
+
},
|
264 |
+
"open_weight": true,
|
265 |
+
"context": "8k"
|
266 |
+
},
|
267 |
+
{
|
268 |
+
"model_name": "Mixtral-8x22B-Instruct-v0.1",
|
269 |
+
"input_price": "0",
|
270 |
+
"output_price": "0",
|
271 |
+
"multimodality": {
|
272 |
+
"image": false,
|
273 |
+
"multiple_image": false,
|
274 |
+
"audio": false,
|
275 |
+
"video": false
|
276 |
+
},
|
277 |
+
"source": "https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1",
|
278 |
+
"licence": {
|
279 |
+
"name": "Apache 2.0",
|
280 |
+
"url": "https://www.apache.org/licenses/LICENSE-2.0"
|
281 |
+
},
|
282 |
+
"languages": ["eng", "fra", "spa", "deu", "ita", "rus"],
|
283 |
+
"release_date": "2024-04-17",
|
284 |
+
"parameters": {
|
285 |
+
"estimated": false,
|
286 |
+
"actual": "141B"
|
287 |
+
},
|
288 |
+
"open_weight": true,
|
289 |
+
"context": "8k"
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"model_name": "Mistral-7B-Instruct-v0.2",
|
293 |
+
"input_price": "0",
|
294 |
+
"output_price": "0",
|
295 |
+
"multimodality": {
|
296 |
+
"image": false,
|
297 |
+
"multiple_image": false,
|
298 |
+
"audio": false,
|
299 |
+
"video": false
|
300 |
+
},
|
301 |
+
"source": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2",
|
302 |
+
"licence": {
|
303 |
+
"name": "Apache 2.0",
|
304 |
+
"url": "https://www.apache.org/licenses/LICENSE-2.0"
|
305 |
+
},
|
306 |
+
"languages": ["eng", "fra", "spa", "deu", "ita", "rus", "chi"],
|
307 |
+
"release_date": "2024-01-15",
|
308 |
+
"parameters": {
|
309 |
+
"estimated": false,
|
310 |
+
"actual": "7B"
|
311 |
+
},
|
312 |
+
"open_weight": true,
|
313 |
+
"context": "8k"
|
314 |
+
},
|
315 |
+
{
|
316 |
+
"model_name": "Mistral-7B-Instruct-v0.1",
|
317 |
+
"input_price": "0",
|
318 |
+
"output_price": "0",
|
319 |
+
"multimodality": {
|
320 |
+
"image": false,
|
321 |
+
"multiple_image": false,
|
322 |
+
"audio": false,
|
323 |
+
"video": false
|
324 |
+
},
|
325 |
+
"source": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1",
|
326 |
+
"licence": {
|
327 |
+
"name": "Apache 2.0",
|
328 |
+
"url": "https://www.apache.org/licenses/LICENSE-2.0"
|
329 |
+
},
|
330 |
+
"languages": ["eng", "fra", "spa", "deu", "ita", "rus", "chi"],
|
331 |
+
"release_date": "2023-12-11",
|
332 |
+
"parameters": {
|
333 |
+
"estimated": false,
|
334 |
+
"actual": "7B"
|
335 |
+
},
|
336 |
+
"open_weight": true,
|
337 |
+
"context": "8k"
|
338 |
+
},
|
339 |
+
{
|
340 |
+
"model_name": "Mixtral-8x7B-Instruct-v0.1",
|
341 |
+
"input_price": "0",
|
342 |
+
"output_price": "0",
|
343 |
+
"multimodality": {
|
344 |
+
"image": false,
|
345 |
+
"multiple_image": false,
|
346 |
+
"audio": false,
|
347 |
+
"video": false
|
348 |
+
},
|
349 |
+
"source": "https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1",
|
350 |
+
"licence": {
|
351 |
+
"name": "Apache 2.0",
|
352 |
+
"url": "https://www.apache.org/licenses/LICENSE-2.0"
|
353 |
+
},
|
354 |
+
"languages": ["eng", "fra", "spa", "deu", "ita", "rus"],
|
355 |
+
"release_date": "2023-12-11",
|
356 |
+
"parameters": {
|
357 |
+
"estimated": false,
|
358 |
+
"actual": "46.7B"
|
359 |
+
},
|
360 |
+
"open_weight": true,
|
361 |
+
"context": "8k"
|
362 |
+
},
|
363 |
+
{
|
364 |
+
"model_name": "openchat-3.5-0106",
|
365 |
+
"input_price": "0",
|
366 |
+
"output_price": "0",
|
367 |
+
"multimodality": {
|
368 |
+
"image": false,
|
369 |
+
"multiple_image": false,
|
370 |
+
"audio": false,
|
371 |
+
"video": false
|
372 |
+
},
|
373 |
+
"source": "https://huggingface.co/openchat/openchat-3.5-0106",
|
374 |
+
"licence": {
|
375 |
+
"name": "Apache 2.0",
|
376 |
+
"url": "https://www.apache.org/licenses/LICENSE-2.0"
|
377 |
+
},
|
378 |
+
"languages": ["eng"],
|
379 |
+
"release_date": "2024-01-06",
|
380 |
+
"parameters": {
|
381 |
+
"estimated": false,
|
382 |
+
"actual": "7B"
|
383 |
+
},
|
384 |
+
"open_weight": true,
|
385 |
+
"context": "8k"
|
386 |
+
},
|
387 |
+
{
|
388 |
+
"model_name": "openchat-3.5-1210",
|
389 |
+
"input_price": "0",
|
390 |
+
"output_price": "0",
|
391 |
+
"multimodality": {
|
392 |
+
"image": false,
|
393 |
+
"multiple_image": false,
|
394 |
+
"audio": false,
|
395 |
+
"video": false
|
396 |
+
},
|
397 |
+
"source": "https://huggingface.co/openchat/openchat-3.5-1210",
|
398 |
+
"licence": {
|
399 |
+
"name": "Apache 2.0",
|
400 |
+
"url": "https://www.apache.org/licenses/LICENSE-2.0"
|
401 |
+
},
|
402 |
+
"languages": ["eng"],
|
403 |
+
"release_date": "2023-12-10",
|
404 |
+
"parameters": {
|
405 |
+
"estimated": false,
|
406 |
+
"actual": "7B"
|
407 |
+
},
|
408 |
+
"open_weight": true,
|
409 |
+
"context": "8k"
|
410 |
+
},
|
411 |
+
{
|
412 |
+
"model_name": "openchat_3.5",
|
413 |
+
"input_price": "0",
|
414 |
+
"output_price": "0",
|
415 |
+
"multimodality": {
|
416 |
+
"image": false,
|
417 |
+
"multiple_image": false,
|
418 |
+
"audio": false,
|
419 |
+
"video": false
|
420 |
+
},
|
421 |
+
"source": "https://huggingface.co/openchat/openchat_3.5",
|
422 |
+
"licence": {
|
423 |
+
"name": "Apache 2.0",
|
424 |
+
"url": "https://www.apache.org/licenses/LICENSE-2.0"
|
425 |
+
},
|
426 |
+
"languages": ["eng"],
|
427 |
+
"release_date": "2023-10-30",
|
428 |
+
"parameters": {
|
429 |
+
"estimated": false,
|
430 |
+
"actual": "7B"
|
431 |
+
},
|
432 |
+
"open_weight": true,
|
433 |
+
"context": "8k"
|
434 |
+
},
|
435 |
+
{
|
436 |
+
"model_name": "gpt-4o-mini-2024-07-18",
|
437 |
+
"input_price": "$0.150",
|
438 |
+
"output_price": "$0.600",
|
439 |
+
"multimodality": {
|
440 |
+
"image": true,
|
441 |
+
"multiple_image": true,
|
442 |
+
"audio": false,
|
443 |
+
"video": false
|
444 |
+
},
|
445 |
+
"source": "https://openai.com/api/pricing/",
|
446 |
+
"licence": {
|
447 |
+
"name": "Commercial License",
|
448 |
+
"url": "https://openai.com/policies/terms-of-use"
|
449 |
+
},
|
450 |
+
"languages": ["eng", "spa", "fra", "deu", "zho", "zht", "jpn", "kor", "ita", "por", "nld", "rus", "ara", "hin", "tur", "vie", "pol", "tha", "swe", "dan", "nor", "fin", "hun", "ces", "slk", "rom", "bul", "ukr", "lit", "lav", "est", "slv", "msa", "ind", "fil", "swz", "amh"],
|
451 |
+
"release_date": "2024-07-18",
|
452 |
+
"parameters": {
|
453 |
+
"estimated": true,
|
454 |
+
"actual": "8B"
|
455 |
+
},
|
456 |
+
"open_weight": false,
|
457 |
+
"context": "128k"
|
458 |
+
},
|
459 |
+
{
|
460 |
+
"model_name": "gpt-4o-2024-08-06",
|
461 |
+
"input_price": "$2.50",
|
462 |
+
"output_price": "$10.00",
|
463 |
+
"multimodality": {
|
464 |
+
"image": true,
|
465 |
+
"multiple_image": true,
|
466 |
+
"audio": false,
|
467 |
+
"video": false
|
468 |
+
},
|
469 |
+
"source": "https://openai.com/api/pricing/",
|
470 |
+
"licence": {
|
471 |
+
"name": "Commercial License",
|
472 |
+
"url": "https://openai.com/policies/terms-of-use"
|
473 |
+
},
|
474 |
+
"languages": ["eng", "spa", "fra", "deu", "zho", "zht", "jpn", "kor", "ita", "por", "nld", "rus", "ara", "hin", "tur", "vie", "pol", "tha", "swe", "dan", "nor", "fin", "hun", "ces", "slk", "rom", "bul", "ukr", "lit", "lav", "est", "slv", "msa", "ind", "fil", "swz", "amh"],
|
475 |
+
"release_date": "2024-08-06",
|
476 |
+
"parameters": {
|
477 |
+
"estimated": true,
|
478 |
+
"actual": "200B"
|
479 |
+
},
|
480 |
+
"open_weight": false,
|
481 |
+
"context": "128k"
|
482 |
+
},
|
483 |
+
{
|
484 |
+
"model_name": "gpt-4o-2024-05-13",
|
485 |
+
"input_price": "$2.50",
|
486 |
+
"output_price": "$10.00",
|
487 |
+
"multimodality": {
|
488 |
+
"image": true,
|
489 |
+
"multiple_image": true,
|
490 |
+
"audio": false,
|
491 |
+
"video": false
|
492 |
+
},
|
493 |
+
"source": "https://openai.com/api/pricing/",
|
494 |
+
"licence": {
|
495 |
+
"name": "Commercial License",
|
496 |
+
"url": "https://openai.com/policies/terms-of-use"
|
497 |
+
},
|
498 |
+
"languages": ["eng", "spa", "fra", "deu", "zho", "zht", "jpn", "kor", "ita", "por", "nld", "rus", "ara", "hin", "tur", "vie", "pol", "tha", "swe", "dan", "nor", "fin", "hun", "ces", "slk", "rom", "bul", "ukr", "lit", "lav", "est", "slv", "msa", "ind", "fil", "swz", "amh"],
|
499 |
+
"release_date": "2024-05-13",
|
500 |
+
"parameters": {
|
501 |
+
"estimated": true,
|
502 |
+
"actual": "200B"
|
503 |
+
},
|
504 |
+
"open_weight": false,
|
505 |
+
"context": "128k"
|
506 |
+
},
|
507 |
+
{
|
508 |
+
"model_name": "gpt-4-1106-vision-preview",
|
509 |
+
"input_price": "$10.00",
|
510 |
+
"output_price": "$30.00",
|
511 |
+
"multimodality": {
|
512 |
+
"image": true,
|
513 |
+
"multiple_image": true,
|
514 |
+
"audio": false,
|
515 |
+
"video": false
|
516 |
+
},
|
517 |
+
"source": "https://openai.com/api/pricing/",
|
518 |
+
"licence": {
|
519 |
+
"name": "Commercial License",
|
520 |
+
"url": "https://openai.com/policies/terms-of-use"
|
521 |
+
},
|
522 |
+
"languages": ["eng", "spa", "fra", "deu", "zho", "zht", "jpn", "kor", "ita", "por", "nld", "rus", "ara", "hin", "tur", "vie", "pol", "tha", "swe", "dan", "nor", "fin", "hun", "ces", "slk", "rom", "bul", "ukr", "lit", "lav", "est", "slv", "msa", "ind", "fil", "swz", "amh"],
|
523 |
+
"release_date": "2023-11-06",
|
524 |
+
"parameters": {
|
525 |
+
"estimated": "1.76T",
|
526 |
+
"actual": false
|
527 |
+
},
|
528 |
+
"open_weight": false,
|
529 |
+
"context": "128k"
|
530 |
+
},
|
531 |
+
{
|
532 |
+
"model_name": "gemini-1.5-flash-latest",
|
533 |
+
"input_price": "$0.075",
|
534 |
+
"output_price": "$0.30",
|
535 |
+
"additional_prices": {
|
536 |
+
"context_caching": "$0.01875",
|
537 |
+
"context_storage": "$1.00",
|
538 |
+
"image_input": "$0.02",
|
539 |
+
"image_output": "$0.04",
|
540 |
+
"video_input": "$0.02",
|
541 |
+
"video_output": "$0.04",
|
542 |
+
"audio_input": "$0.02",
|
543 |
+
"audio_output": "$0.04"
|
544 |
+
},
|
545 |
+
"multimodality": {
|
546 |
+
"image": true,
|
547 |
+
"multiple_image": true,
|
548 |
+
"audio": true,
|
549 |
+
"video": true
|
550 |
+
},
|
551 |
+
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
|
552 |
+
"licence": {
|
553 |
+
"name": "Commercial License",
|
554 |
+
"url": ""
|
555 |
+
},
|
556 |
+
"languages": ["lit", "nor", "pol", "por", "ron", "rus", "srp", "slk", "slv", "spa", "swa", "swe", "tha", "tur", "ukr", "vie", "zho", "hrv", "ces", "dan", "nld", "eng", "est", "fin", "fra", "deu", "ell", "heb", "hin", "hun", "ind", "ita", "jpn", "kor", "lav", "ara", "ben", "bul"],
|
557 |
+
"release_date": "2024-05-24",
|
558 |
+
"parameters": {
|
559 |
+
"estimated": false,
|
560 |
+
"actual": false
|
561 |
+
},
|
562 |
+
"open_weight": false,
|
563 |
+
"context": ">128k"
|
564 |
+
}
|
565 |
+
]
|
src/main_df.csv
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model_name,input_price,output_price,multimodality_image,multimodality_multiple_image,multimodality_audio,multimodality_video,source,licence_name,licence_url,languages,release_date,parameters_estimated,parameters_actual,open_weight,context,additional_prices_context_caching,additional_prices_context_storage,additional_prices_image_input,additional_prices_image_output,additional_prices_video_input,additional_prices_video_output,additional_prices_audio_input,additional_prices_audio_output,clemscore_v1.6.5_multimodal,clemscore_v1.6.5_ascii,clemscore_v1.6
|
2 |
+
Meta-Llama-3-70B-Instruct-hf,0,0,False,False,False,False,https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct,Meta Llama 3 License,https://www.llama.com/llama3/license/,eng,2024-04-18,False,70B,True,8k,,,,,,,,,0.0,0.0,35.11
|
3 |
+
Meta-Llama-3-8B-Instruct-hf,0,0,False,False,False,False,https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct,Meta Llama 3 License,https://www.llama.com/llama3/license/,eng,2024-04-18,False,8B,True,8k,,,,,,,,,0.0,0.0,19.99
|
4 |
+
Meta-Llama-3.1-405B-Instruct-Turbo,0,0,False,False,False,False,https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct,Llama 3.1 Community License,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE,"eng, deu, fra, ita, hin, por, spa, tha",2024-07-23,False,405B,True,128k,,,,,,,,,0.0,0.0,52.11
|
5 |
+
Meta-Llama-3.1-70B-Instruct,0,0,False,False,False,False,https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct,Llama 3.1 Community License,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE,"eng, deu, fra, ita, hin, por, spa, tha",2024-07-23,False,70B,True,128k,,,,,,,,,0.0,0.0,38.83
|
6 |
+
Meta-Llama-3.1-8B-Instruct,0,0,False,False,False,False,https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct,Llama 3.1 Community License,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE,"eng, deu, fra, ita, hin, por, spa, tha",2024-07-23,False,8B,True,128k,,,,,,,,,0.0,0.0,18.36
|
7 |
+
InternVL2-40B,0,0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-40B,MIT,https://choosealicense.com/licenses/mit/,"chi, eng, fre, spa, por, deu, ita, rus, jpn, kor, vie, tha, ara",2024-07-15,False,40B,True,8k,,,,,,,,,32.23,33.2,0.0
|
8 |
+
InternVL2-8B,0,0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-8B,MIT,https://choosealicense.com/licenses/mit/,"chi, eng, fre, spa, por, deu, ita, rus, jpn, kor, vie, tha, ara",2024-07-15,False,8B,True,8k,,,,,,,,,23.17,36.05,0.0
|
9 |
+
InternVL2-Llama3-76B,0,0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-Llama3-76B,MIT,https://choosealicense.com/licenses/mit/,"chi, eng, fre, spa, por, deu, ita, rus, jpn, kor, vie, tha, ara",2024-07-15,False,76B,True,8k,,,,,,,,,33.84,43.29,0.0
|
10 |
+
InternVL2-26B,0,0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-26B,MIT,https://choosealicense.com/licenses/mit/,"chi, eng, fre, spa, por, deu, ita, rus, jpn, kor, vie, tha, ara",2024-07-15,False,26B,True,8k,,,,,,,,,37.45,32.27,0.0
|
11 |
+
InternVL2-26B,0,0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-26B,MIT,https://choosealicense.com/licenses/mit/,"chi, eng, fre, spa, por, deu, ita, rus, jpn, kor, vie, tha, ara",2024-07-15,False,26B,True,8k,,,,,,,,,37.45,32.27,0.0
|
12 |
+
Mistral-Large-Instruct-2407,0,0,False,False,False,False,https://huggingface.co/mistralai/Mistral-Large-Instruct-2407,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"eng, fra, spa, deu, ita, rus, chi, jpn, kor",2024-06-12,False,70B,True,8k,,,,,,,,,0.0,0.0,45.39
|
13 |
+
Mixtral-8x22B-Instruct-v0.1,0,0,False,False,False,False,https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"eng, fra, spa, deu, ita, rus",2024-04-17,False,141B,True,8k,,,,,,,,,0.0,0.0,12.69
|
14 |
+
Mistral-7B-Instruct-v0.2,0,0,False,False,False,False,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"eng, fra, spa, deu, ita, rus, chi",2024-01-15,False,7B,True,8k,,,,,,,,,0.0,0.0,9.75
|
15 |
+
Mistral-7B-Instruct-v0.1,0,0,False,False,False,False,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"eng, fra, spa, deu, ita, rus, chi",2023-12-11,False,7B,True,8k,,,,,,,,,0.0,0.0,8.01
|
16 |
+
Mixtral-8x7B-Instruct-v0.1,0,0,False,False,False,False,https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"eng, fra, spa, deu, ita, rus",2023-12-11,False,46.7B,True,8k,,,,,,,,,0.0,0.0,8.17
|
17 |
+
openchat-3.5-0106,0,0,False,False,False,False,https://huggingface.co/openchat/openchat-3.5-0106,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,eng,2024-01-06,False,7B,True,8k,,,,,,,,,0.0,0.0,17.1
|
18 |
+
openchat-3.5-1210,0,0,False,False,False,False,https://huggingface.co/openchat/openchat-3.5-1210,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,eng,2023-12-10,False,7B,True,8k,,,,,,,,,0.0,0.0,18.22
|
19 |
+
openchat_3.5,0,0,False,False,False,False,https://huggingface.co/openchat/openchat_3.5,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,eng,2023-10-30,False,7B,True,8k,,,,,,,,,0.0,0.0,23.64
|
20 |
+
gpt-4o-mini-2024-07-18,$0.150,$0.600,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"eng, spa, fra, deu, zho, zht, jpn, kor, ita, por, nld, rus, ara, hin, tur, vie, pol, tha, swe, dan, nor, fin, hun, ces, slk, rom, bul, ukr, lit, lav, est, slv, msa, ind, fil, swz, amh",2024-07-18,True,8B,False,128k,,,,,,,,,58.46,63.87,34.64
|
21 |
+
gpt-4o-2024-08-06,$2.50,$10.00,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"eng, spa, fra, deu, zho, zht, jpn, kor, ita, por, nld, rus, ara, hin, tur, vie, pol, tha, swe, dan, nor, fin, hun, ces, slk, rom, bul, ukr, lit, lav, est, slv, msa, ind, fil, swz, amh",2024-08-06,True,200B,False,128k,,,,,,,,,80.04,80.96,47.71
|
22 |
+
gpt-4o-2024-05-13,$2.50,$10.00,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"eng, spa, fra, deu, zho, zht, jpn, kor, ita, por, nld, rus, ara, hin, tur, vie, pol, tha, swe, dan, nor, fin, hun, ces, slk, rom, bul, ukr, lit, lav, est, slv, msa, ind, fil, swz, amh",2024-05-13,True,200B,False,128k,,,,,,,,,69.56,82.72,48.34
|
23 |
+
gpt-4-1106-vision-preview,$10.00,$30.00,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"eng, spa, fra, deu, zho, zht, jpn, kor, ita, por, nld, rus, ara, hin, tur, vie, pol, tha, swe, dan, nor, fin, hun, ces, slk, rom, bul, ukr, lit, lav, est, slv, msa, ind, fil, swz, amh",2023-11-06,1.76T,False,False,128k,,,,,,,,,73.55,68.14,0.0
|
24 |
+
gemini-1.5-flash-latest,$0.075,$0.30,True,True,True,True,https://cloud.google.com/vertex-ai/generative-ai/pricing,Commercial License,,"lit, nor, pol, por, ron, rus, srp, slk, slv, spa, swa, swe, tha, tur, ukr, vie, zho, hrv, ces, dan, nld, eng, est, fin, fra, deu, ell, heb, hin, hun, ind, ita, jpn, kor, lav, ara, ben, bul",2024-05-24,False,False,False,>128k,$0.01875,$1.00,$0.02,$0.04,$0.02,$0.04,$0.02,$0.04,47.73,47.88,32.0
|
src/pricing.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import pandas as pd
|
2 |
import requests
|
|
|
3 |
|
4 |
def fetch_prices():
|
5 |
# Fetch the JSON data from the URL
|
@@ -13,18 +14,23 @@ def fetch_prices():
|
|
13 |
extracted_data = []
|
14 |
for entry in data:
|
15 |
extracted_info = {
|
16 |
-
"output_tokens": entry["fields"]["output_tokens"],
|
17 |
-
"provider": entry["fields"]["provider"],
|
18 |
"model_name": entry["fields"]["model_name"],
|
19 |
-
"
|
20 |
"input_tokens": entry["fields"]["input_tokens"],
|
|
|
|
|
21 |
"update_time": entry["fields"]["update_time"]
|
22 |
}
|
23 |
extracted_data.append(extracted_info)
|
24 |
|
25 |
# Create a DataFrame from the extracted data
|
26 |
df = pd.DataFrame(extracted_data)
|
27 |
-
|
|
|
|
|
28 |
else:
|
29 |
print(f"Failed to retrieve data: {response.status_code}")
|
30 |
-
return None
|
|
|
|
|
|
|
|
1 |
import pandas as pd
|
2 |
import requests
|
3 |
+
import os
|
4 |
|
5 |
def fetch_prices():
|
6 |
# Fetch the JSON data from the URL
|
|
|
14 |
extracted_data = []
|
15 |
for entry in data:
|
16 |
extracted_info = {
|
|
|
|
|
17 |
"model_name": entry["fields"]["model_name"],
|
18 |
+
"provider": entry["fields"]["provider"],
|
19 |
"input_tokens": entry["fields"]["input_tokens"],
|
20 |
+
"output_tokens": entry["fields"]["output_tokens"],
|
21 |
+
"url": entry["fields"]["url"],
|
22 |
"update_time": entry["fields"]["update_time"]
|
23 |
}
|
24 |
extracted_data.append(extracted_info)
|
25 |
|
26 |
# Create a DataFrame from the extracted data
|
27 |
df = pd.DataFrame(extracted_data)
|
28 |
+
save_path = os.path.join('src', 'prices.csv')
|
29 |
+
df.to_csv(save_path, index=False) # Save the DataFrame as a CSV file
|
30 |
+
print(f"Saved the Prices as a CSV under {save_path}")
|
31 |
else:
|
32 |
print(f"Failed to retrieve data: {response.status_code}")
|
33 |
+
return None
|
34 |
+
|
35 |
+
if __name__ == '__main__':
|
36 |
+
fetch_prices()
|
src/process_data.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ... existing code ...
|
2 |
+
import pandas as pd
|
3 |
+
import json
|
4 |
+
|
5 |
+
# Load the JSON data
|
6 |
+
with open('src/combined_data.json') as f:
|
7 |
+
data = json.load(f)
|
8 |
+
|
9 |
+
# Flatten the data
|
10 |
+
flattened_data = []
|
11 |
+
for entry in data:
|
12 |
+
flattened_entry = {
|
13 |
+
"model_name": entry["model_name"],
|
14 |
+
"input_price": entry["input_price"],
|
15 |
+
"output_price": entry["output_price"],
|
16 |
+
"multimodality_image": entry["multimodality"]["image"],
|
17 |
+
"multimodality_multiple_image": entry["multimodality"]["multiple_image"],
|
18 |
+
"multimodality_audio": entry["multimodality"]["audio"],
|
19 |
+
"multimodality_video": entry["multimodality"]["video"],
|
20 |
+
"source": entry["source"],
|
21 |
+
"licence_name": entry["licence"]["name"],
|
22 |
+
"licence_url": entry["licence"]["url"],
|
23 |
+
"languages": ", ".join(entry["languages"]),
|
24 |
+
"release_date": entry["release_date"],
|
25 |
+
"parameters_estimated": entry["parameters"]["estimated"],
|
26 |
+
"parameters_actual": entry["parameters"]["actual"],
|
27 |
+
"open_weight": entry["open_weight"],
|
28 |
+
"context": entry["context"],
|
29 |
+
# ... additional prices ...
|
30 |
+
"additional_prices_context_caching": entry.get("additional_prices", {}).get("context_caching", None),
|
31 |
+
"additional_prices_context_storage": entry.get("additional_prices", {}).get("context_storage", None),
|
32 |
+
"additional_prices_image_input": entry.get("additional_prices", {}).get("image_input", None),
|
33 |
+
"additional_prices_image_output": entry.get("additional_prices", {}).get("image_output", None),
|
34 |
+
"additional_prices_video_input": entry.get("additional_prices", {}).get("video_input", None),
|
35 |
+
"additional_prices_video_output": entry.get("additional_prices", {}).get("video_output", None),
|
36 |
+
"additional_prices_audio_input": entry.get("additional_prices", {}).get("audio_input", None),
|
37 |
+
"additional_prices_audio_output": entry.get("additional_prices", {}).get("audio_output", None),
|
38 |
+
}
|
39 |
+
flattened_data.append(flattened_entry)
|
40 |
+
|
41 |
+
# Create a DataFrame
|
42 |
+
df = pd.DataFrame(flattened_data)
|
43 |
+
|
44 |
+
# Load the results CSV files
|
45 |
+
results_1_6_5_multimodal = pd.read_csv('src/results_1.6.5_multimodal.csv', header=None)
|
46 |
+
results_1_6_5_ascii = pd.read_csv('src/results_1.6.5_ascii.csv', header=None)
|
47 |
+
results_1_6 = pd.read_csv('src/results_1.6.csv', header=None)
|
48 |
+
|
49 |
+
# Split model names by '-t0.0' and use the first part
|
50 |
+
results_1_6_5_multimodal[0] = results_1_6_5_multimodal[0].str.split('-t0.0').str[0]
|
51 |
+
results_1_6_5_ascii[0] = results_1_6_5_ascii[0].str.split('-t0.0').str[0]
|
52 |
+
results_1_6[0] = results_1_6[0].str.split('-t0.0').str[0]
|
53 |
+
|
54 |
+
|
55 |
+
# Create a mapping for clemscore values
|
56 |
+
clemscore_map_1_6_5_multimodal = dict(zip(results_1_6_5_multimodal[0], results_1_6_5_multimodal[1]))
|
57 |
+
clemscore_map_1_6_5_ascii = dict(zip(results_1_6_5_ascii[0], results_1_6_5_ascii[1]))
|
58 |
+
clemscore_map_1_6 = dict(zip(results_1_6[0], results_1_6[1]))
|
59 |
+
|
60 |
+
print(clemscore_map_1_6)
|
61 |
+
|
62 |
+
# Add clemscore columns to the main DataFrame
|
63 |
+
df['clemscore_v1.6.5_multimodal'] = df['model_name'].map(clemscore_map_1_6_5_multimodal).fillna(0).astype(float)
|
64 |
+
df['clemscore_v1.6.5_ascii'] = df['model_name'].map(clemscore_map_1_6_5_ascii).fillna(0).astype(float)
|
65 |
+
df['clemscore_v1.6'] = df['model_name'].map(clemscore_map_1_6).fillna(0).astype(float)
|
66 |
+
|
67 |
+
# Save to CSV
|
68 |
+
df.to_csv('src/main_df.csv', index=False)
|
src/results_1.6.5_ascii.csv
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
,"-, clemscore","all, Average % Played","all, Average Quality Score","matchit_ascii, % Played","matchit_ascii, Quality Score","matchit_ascii, Quality Score (std)","referencegame, % Played","referencegame, Quality Score","referencegame, Quality Score (std)","textmapworld, % Played","textmapworld, Quality Score","textmapworld, Quality Score (std)","textmapworld_graphreasoning, % Played","textmapworld_graphreasoning, Quality Score","textmapworld_graphreasoning, Quality Score (std)","textmapworld_specificroom, % Played","textmapworld_specificroom, Quality Score","textmapworld_specificroom, Quality Score (std)"
|
2 |
+
Idefics3-8B-Llama3-t0.0--Idefics3-8B-Llama3-t0.0,22.56,40.0,56.39,100.0,70.0,46.41,100.0,42.78,49.61,0.0,,,0.0,,,0.0,,
|
3 |
+
InternVL2-26B-t0.0--InternVL2-26B-t0.0,32.27,51.2,63.03,100.0,65.0,48.3,100.0,43.89,49.76,16.0,51.55,16.62,0.0,,,40.0,91.67,28.87
|
4 |
+
InternVL2-40B-t0.0--InternVL2-40B-t0.0,33.2,50.67,65.52,100.0,70.0,46.41,93.33,48.21,50.12,10.0,70.55,19.72,0.0,,,50.0,73.33,45.77
|
5 |
+
InternVL2-8B-t0.0--InternVL2-8B-t0.0,36.05,48.67,74.07,100.0,70.0,46.41,100.0,52.22,50.09,0.0,,,0.0,,,43.33,100.0,0.0
|
6 |
+
InternVL2-Llama3-76B-t0.0--InternVL2-Llama3-76B-t0.0,43.29,60.27,71.82,100.0,55.0,50.38,100.0,61.11,48.89,28.0,71.17,7.4,0.0,,,73.33,100.0,0.0
|
7 |
+
Phi-3-vision-128k-instruct-t0.0--Phi-3-vision-128k-instruct-t0.0,22.07,29.61,74.53,67.5,81.48,39.58,73.89,42.11,49.56,0.0,,,0.0,,,6.67,100.0,0.0
|
8 |
+
Phi-3.5-vision-instruct-t0.0--Phi-3.5-vision-instruct-t0.0,26.95,41.73,64.58,100.0,55.0,50.38,83.33,42.67,49.62,12.0,60.63,21.21,0.0,,,13.33,100.0,0.0
|
9 |
+
Pixtral-12B-2409-t0.0--Pixtral-12B-2409-t0.0,37.57,59.63,63.01,97.5,69.23,46.76,100.0,41.11,49.34,34.0,53.61,17.0,6.67,56.67,4.71,60.0,94.44,23.57
|
10 |
+
claude-3-5-sonnet-20240620-t0.0--claude-3-5-sonnet-20240620-t0.0,90.56,100.0,90.56,100.0,92.5,26.67,100.0,91.11,28.54,100.0,86.26,12.12,100.0,82.91,10.88,100.0,100.0,0.0
|
11 |
+
claude-3-opus-20240229-t0.0--claude-3-opus-20240229-t0.0,74.99,100.0,74.99,100.0,85.0,36.16,100.0,29.44,45.71,100.0,83.83,14.64,100.0,76.69,12.8,100.0,100.0,0.0
|
12 |
+
gemini-1.5-flash-latest-t0.0--gemini-1.5-flash-latest-t0.0,47.88,62.97,76.03,97.5,76.92,42.68,100.0,61.11,48.89,64.0,66.08,16.46,0.0,,,53.33,100.0,0.0
|
13 |
+
gpt-4-1106-vision-preview-t0.0--gpt-4-1106-vision-preview-t0.0,68.14,99.33,68.6,100.0,72.5,45.22,100.0,29.44,45.71,100.0,73.62,14.33,100.0,67.46,15.11,96.67,100.0,0.0
|
14 |
+
gpt-4o-2024-05-13-t0.0--gpt-4o-2024-05-13-t0.0,82.72,96.67,85.57,100.0,97.5,15.81,100.0,90.0,30.08,90.0,74.25,12.12,96.67,66.12,12.83,96.67,100.0,0.0
|
15 |
+
gpt-4o-2024-08-06-t0.0--gpt-4o-2024-08-06-t0.0,80.96,98.67,82.05,100.0,82.5,38.48,100.0,87.78,32.85,100.0,72.84,10.76,100.0,67.15,12.41,93.33,100.0,0.0
|
16 |
+
gpt-4o-mini-2024-07-18-t0.0--gpt-4o-mini-2024-07-18-t0.0,63.87,85.76,74.48,100.0,87.5,33.49,99.44,73.74,44.13,66.0,63.7,16.72,96.67,47.46,15.37,66.67,100.0,0.0
|
17 |
+
idefics-80b-instruct-t0.0--idefics-80b-instruct-t0.0,19.73,46.5,42.44,80.0,37.5,49.19,100.0,31.11,46.42,6.0,58.71,10.65,,,,0.0,,
|
18 |
+
idefics-9b-instruct-t0.0--idefics-9b-instruct-t0.0,7.66,22.56,33.97,100.0,37.5,49.03,12.78,30.43,47.05,0.0,,,0.0,,,0.0,,
|
19 |
+
internlm-xcomposer2d5-7b-t0.0--internlm-xcomposer2d5-7b-t0.0,19.69,25.47,77.32,100.0,62.5,49.03,0.0,,,4.0,69.47,13.4,0.0,,,23.33,100.0,0.0
|
src/results_1.6.5_multimodal.csv
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
,"-, clemscore","all, Average % Played","all, Average Quality Score","matchit, % Played","matchit, Quality Score","matchit, Quality Score (std)","mm_mapworld, % Played","mm_mapworld, Quality Score","mm_mapworld, Quality Score (std)","mm_mapworld_graphs, % Played","mm_mapworld_graphs, Quality Score","mm_mapworld_graphs, Quality Score (std)","mm_mapworld_specificroom, % Played","mm_mapworld_specificroom, Quality Score","mm_mapworld_specificroom, Quality Score (std)","multimodal_referencegame, % Played","multimodal_referencegame, Quality Score","multimodal_referencegame, Quality Score (std)"
|
2 |
+
Idefics3-8B-Llama3-t0.0--Idefics3-8B-Llama3-t0.0,17.52,32.59,53.76,40.0,79.17,41.49,14.0,4.76,12.6,0.0,,,10.0,100.0,0.0,98.97,31.09,46.35
|
3 |
+
InternVL2-26B-t0.0--InternVL2-26B-t0.0,37.45,66.76,56.09,100.0,93.33,25.15,52.0,58.47,20.73,16.67,69.33,16.91,80.0,25.0,44.23,85.13,34.34,47.56
|
4 |
+
InternVL2-40B-t0.0--InternVL2-40B-t0.0,32.23,56.27,57.28,96.67,79.31,40.86,28.0,23.29,35.09,33.33,76.2,20.56,23.33,71.43,48.8,100.0,36.15,48.11
|
5 |
+
InternVL2-8B-t0.0--InternVL2-8B-t0.0,23.17,46.61,49.7,100.0,68.33,46.91,0.0,,,3.33,85.71,,43.33,7.69,27.74,86.41,37.09,48.38
|
6 |
+
InternVL2-Llama3-76B-t0.0--InternVL2-Llama3-76B-t0.0,33.84,54.8,61.76,100.0,90.0,30.25,34.0,57.15,18.59,3.33,54.55,,36.67,72.73,46.71,100.0,34.36,47.55
|
7 |
+
Phi-3-vision-128k-instruct-t0.0--Phi-3-vision-128k-instruct-t0.0,3.34,5.06,65.98,0.0,,,4.0,45.0,7.07,3.33,52.94,,0.0,,,17.95,100.0,0.0
|
8 |
+
Phi-3.5-vision-instruct-t0.0--Phi-3.5-vision-instruct-t0.0,15.64,40.67,38.46,100.0,0.0,0.0,0.0,,,3.33,100.0,,0.0,,,100.0,15.38,36.13
|
9 |
+
Pixtral-12B-2409-t0.0--Pixtral-12B-2409-t0.0,28.64,49.98,57.3,100.0,63.33,48.6,24.0,58.01,29.16,3.33,66.67,,43.33,53.85,51.89,79.23,44.66,49.79
|
10 |
+
claude-3-5-sonnet-20240620-t0.0--claude-3-5-sonnet-20240620-t0.0,80.77,95.33,84.73,100.0,85.0,36.01,100.0,82.41,11.7,76.67,85.23,15.36,100.0,90.0,30.51,100.0,81.03,39.26
|
11 |
+
claude-3-opus-20240229-t0.0--claude-3-opus-20240229-t0.0,68.16,99.33,68.62,100.0,81.67,39.02,100.0,75.79,14.43,96.67,85.12,13.27,100.0,53.33,50.74,100.0,47.18,49.98
|
12 |
+
dolphin-vision-72b-t0.0--dolphin-vision-72b-t0.0,4.65,7.88,58.95,16.67,90.0,31.62,4.0,60.61,8.57,3.33,0.0,,6.67,100.0,0.0,8.72,44.12,50.4
|
13 |
+
gemini-1.5-flash-latest-t0.0--gemini-1.5-flash-latest-t0.0,47.73,85.0,56.15,85.0,84.31,36.73,100.0,60.05,20.46,46.67,62.72,13.21,93.33,32.14,47.56,100.0,41.54,49.34
|
14 |
+
gpt-4-1106-vision-preview-t0.0--gpt-4-1106-vision-preview-t0.0,73.55,97.79,75.21,100.0,80.0,40.34,100.0,73.74,13.24,90.0,77.25,10.74,100.0,76.67,43.02,98.97,68.39,46.55
|
15 |
+
gpt-4o-2024-05-13-t0.0--gpt-4o-2024-05-13-t0.0,69.56,87.73,79.29,100.0,78.33,41.55,52.0,73.58,12.43,90.0,76.06,16.67,96.67,93.1,25.79,100.0,75.38,43.13
|
16 |
+
gpt-4o-2024-08-06-t0.0--gpt-4o-2024-08-06-t0.0,80.04,96.93,82.57,93.33,80.36,40.09,98.0,81.59,12.0,96.67,82.93,11.51,96.67,93.1,25.79,100.0,74.87,43.43
|
17 |
+
gpt-4o-mini-2024-07-18-t0.0--gpt-4o-mini-2024-07-18-t0.0,58.46,90.04,64.93,100.0,86.67,34.28,92.0,64.65,16.71,73.33,59.93,16.17,86.67,65.38,48.52,98.21,48.04,50.03
|
18 |
+
idefics-80b-instruct-t0.0--idefics-80b-instruct-t0.0,29.55,58.29,50.7,88.14,55.77,50.15,20.0,32.78,29.72,50.0,81.36,,33.33,50.0,52.7,100.0,33.59,47.29
|
19 |
+
idefics-9b-instruct-t0.0--idefics-9b-instruct-t0.0,12.29,38.0,32.34,100.0,33.33,47.54,0.0,,,0.0,,,0.0,,,90.0,31.34,46.45
|
20 |
+
internlm-xcomposer2d5-7b-t0.0--internlm-xcomposer2d5-7b-t0.0,16.95,20.18,83.98,98.33,77.97,41.8,0.0,,,0.0,,,0.0,,,2.56,90.0,31.62
|
src/results_1.6.csv
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
,"-, clemscore","all, Average % Played","all, Average Quality Score","imagegame, % Played","imagegame, Quality Score","imagegame, Quality Score (std)","privateshared, % Played","privateshared, Quality Score","privateshared, Quality Score (std)","referencegame, % Played","referencegame, Quality Score","referencegame, Quality Score (std)","taboo, % Played","taboo, Quality Score","taboo, Quality Score (std)","wordle, % Played","wordle, Quality Score","wordle, Quality Score (std)","wordle_withclue, % Played","wordle_withclue, Quality Score","wordle_withclue, Quality Score (std)","wordle_withcritic, % Played","wordle_withcritic, Quality Score","wordle_withcritic, Quality Score (std)"
|
2 |
+
CodeLlama-34b-Instruct-hf-t0.0--CodeLlama-34b-Instruct-hf-t0.0,14.35,33.57,42.76,0.0,,,0.0,,,100.0,94.44,22.97,51.67,51.61,50.8,56.67,0.0,0.0,26.67,25.0,46.29,0.0,,
|
3 |
+
Meta-Llama-3-70B-Instruct-hf-t0.0--Meta-Llama-3-70B-Instruct-hf-t0.0,35.11,80.72,43.5,0.0,,,100.0,84.37,13.69,100.0,64.44,48.0,91.67,70.3,39.37,90.0,1.85,6.81,96.67,14.37,32.34,86.67,25.64,39.08
|
4 |
+
Meta-Llama-3-8B-Instruct-hf-t0.0--Meta-Llama-3-8B-Instruct-hf-t0.0,19.99,76.1,26.27,0.0,,,96.0,58.91,30.05,100.0,46.11,49.99,100.0,37.78,45.08,86.67,0.0,0.0,83.33,14.0,33.91,66.67,0.83,3.73
|
5 |
+
Meta-Llama-3.1-405B-Instruct-Turbo-t0.0--Meta-Llama-3.1-405B-Instruct-Turbo-t0.0,52.11,90.12,57.82,62.5,94.12,12.55,100.0,84.24,29.65,100.0,80.0,40.11,95.0,76.61,36.03,93.33,7.14,12.72,93.33,29.7,34.87,86.67,32.95,35.63
|
6 |
+
Meta-Llama-3.1-70B-Instruct-t0.0--Meta-Llama-3.1-70B-Instruct-t0.0,38.83,82.14,47.27,0.0,,,100.0,85.69,13.94,100.0,72.78,44.63,88.33,75.16,34.69,93.33,1.43,5.25,93.33,19.29,33.55,100.0,29.28,36.5
|
7 |
+
Meta-Llama-3.1-8B-Instruct-t0.0--Meta-Llama-3.1-8B-Instruct-t0.0,18.36,72.91,25.18,0.0,,,82.0,29.31,25.61,100.0,47.78,50.09,88.33,50.0,47.7,96.67,0.0,0.0,76.67,12.32,29.82,66.67,11.67,31.11
|
8 |
+
Mistral-7B-Instruct-v0.1-t0.0--Mistral-7B-Instruct-v0.1-t0.0,8.01,37.14,21.58,0.0,,,20.0,1.21,2.58,100.0,55.0,49.89,100.0,31.67,45.07,0.0,,,23.33,0.0,0.0,16.67,20.0,44.72
|
9 |
+
Mistral-7B-Instruct-v0.2-t0.0--Mistral-7B-Instruct-v0.2-t0.0,9.75,36.91,26.42,0.0,,,0.0,,,100.0,38.33,48.76,65.0,0.0,0.0,50.0,0.0,0.0,26.67,43.75,49.55,16.67,50.0,50.0
|
10 |
+
Mistral-Large-Instruct-2407-t0.0--Mistral-Large-Instruct-2407-t0.0,45.39,82.21,55.21,7.5,100.0,0.0,78.0,81.39,13.12,100.0,81.11,39.25,96.67,71.84,36.57,100.0,0.67,3.65,93.33,21.73,32.97,100.0,29.72,41.79
|
11 |
+
Mixtral-8x22B-Instruct-v0.1-t0.0--Mixtral-8x22B-Instruct-v0.1-t0.0,12.69,52.14,24.33,0.0,,,0.0,,,100.0,36.67,48.32,58.33,40.0,49.71,96.67,0.0,0.0,60.0,15.0,33.3,50.0,30.0,41.4
|
12 |
+
Mixtral-8x7B-Instruct-v0.1-t0.0--Mixtral-8x7B-Instruct-v0.1-t0.0,8.17,47.62,17.15,0.0,,,0.0,,,61.67,41.44,49.49,51.67,9.68,30.05,96.67,0.0,0.0,76.67,19.13,35.28,46.67,15.48,36.08
|
13 |
+
Nous-Hermes-2-Mixtral-8x7B-SFT-t0.0--Nous-Hermes-2-Mixtral-8x7B-SFT-t0.0,11.95,39.68,30.12,0.0,,,0.0,,,97.78,36.93,48.4,93.33,47.92,47.36,0.0,,,53.33,15.62,30.1,33.33,20.0,42.16
|
14 |
+
Phi-3-mini-128k-instruct-t0.0--Phi-3-mini-128k-instruct-t0.0,6.33,34.52,18.34,0.0,,,0.0,,,100.0,36.67,48.32,98.33,0.0,0.0,0.0,,,33.33,20.0,42.16,10.0,16.67,28.87
|
15 |
+
Qwen1.5-0.5B-Chat-t0.0--Qwen1.5-0.5B-Chat-t0.0,0.12,25.72,0.48,0.0,,,0.0,,,0.0,,,86.67,1.92,13.87,46.67,0.0,0.0,40.0,0.0,0.0,6.67,0.0,0.0
|
16 |
+
Qwen1.5-1.8B-Chat-t0.0--Qwen1.5-1.8B-Chat-t0.0,0.0,15.24,0.0,0.0,,,0.0,,,0.0,,,93.33,0.0,0.0,0.0,,,10.0,0.0,0.0,3.33,0.0,
|
17 |
+
Qwen1.5-14B-Chat-t0.0--Qwen1.5-14B-Chat-t0.0,16.8,40.95,41.02,30.0,20.58,14.69,0.0,,,100.0,44.44,49.83,46.67,41.07,47.25,90.0,0.0,0.0,16.67,40.0,54.77,3.33,100.0,
|
18 |
+
Qwen1.5-32B-Chat-t0.0--Qwen1.5-32B-Chat-t0.0,15.41,63.69,24.19,67.5,42.15,29.29,20.0,35.52,9.63,100.0,12.78,33.48,61.67,42.79,47.39,93.33,0.0,0.0,60.0,16.85,33.34,43.33,19.23,38.4
|
19 |
+
Qwen1.5-72B-Chat-t0.0--Qwen1.5-72B-Chat-t0.0,30.37,80.05,37.94,65.0,50.0,25.53,92.0,52.87,20.39,100.0,37.22,48.47,73.33,73.11,43.02,96.67,0.69,3.71,90.0,20.93,39.03,43.33,30.77,48.04
|
20 |
+
Qwen1.5-7B-Chat-t0.0--Qwen1.5-7B-Chat-t0.0,2.58,30.24,8.53,0.0,,,0.0,,,100.0,20.56,40.52,98.33,13.56,33.26,0.0,,,10.0,0.0,0.0,3.33,0.0,
|
21 |
+
Qwen2-72B-Instruct-t0.0--Qwen2-72B-Instruct-t0.0,30.03,74.52,40.3,0.0,,,80.0,65.69,22.85,100.0,67.22,47.07,91.67,70.61,40.31,100.0,2.67,10.48,86.67,12.82,29.56,63.33,22.81,33.31
|
22 |
+
Qwen2-7B-Instruct-t0.0--Qwen2-7B-Instruct-t0.0,6.18,35.32,17.51,5.0,23.0,1.41,0.0,,,98.89,41.01,49.32,86.67,41.03,46.14,26.67,0.0,0.0,26.67,0.0,0.0,3.33,0.0,
|
23 |
+
SUS-Chat-34B-t0.0--SUS-Chat-34B-t0.0,14.11,54.4,25.93,2.5,29.0,,20.0,0.0,0.0,100.0,70.0,45.95,98.33,52.26,45.64,93.33,0.0,0.0,43.33,23.08,43.85,23.33,7.14,18.9
|
24 |
+
Starling-LM-7B-beta-t0.0--Starling-LM-7B-beta-t0.0,6.56,30.89,21.25,0.0,,,4.0,97.12,4.08,62.22,30.36,46.19,46.67,0.0,0.0,66.67,0.0,0.0,33.33,0.0,0.0,3.33,0.0,
|
25 |
+
WizardLM-13b-v1.2-t0.0--WizardLM-13b-v1.2-t0.0,11.48,39.57,29.0,0.0,,,42.0,10.11,21.96,100.0,71.11,45.45,35.0,64.29,45.12,26.67,0.0,0.0,53.33,6.25,25.0,20.0,22.22,40.37
|
26 |
+
WizardLM-70b-v1.0-t0.0--WizardLM-70b-v1.0-t0.0,17.4,46.19,37.66,0.0,,,0.0,,,100.0,81.67,38.8,56.67,70.59,44.58,73.33,0.0,0.0,56.67,17.84,34.09,36.67,18.18,40.45
|
27 |
+
Yi-1.5-34B-Chat-t0.0--Yi-1.5-34B-Chat-t0.0,7.67,52.38,14.65,0.0,,,0.0,,,100.0,43.33,49.69,66.67,0.0,0.0,96.67,0.0,0.0,70.0,18.25,36.48,33.33,11.67,31.48
|
28 |
+
Yi-1.5-6B-Chat-t0.0--Yi-1.5-6B-Chat-t0.0,6.73,41.43,16.25,0.0,,,0.0,,,88.33,34.59,47.72,65.0,0.0,0.0,86.67,0.0,0.0,33.33,20.0,42.16,16.67,26.67,43.46
|
29 |
+
Yi-1.5-9B-Chat-t0.0--Yi-1.5-9B-Chat-t0.0,4.37,38.1,11.48,0.0,,,0.0,,,51.67,41.94,49.61,41.67,0.0,0.0,86.67,0.0,0.0,46.67,7.14,26.73,40.0,8.33,28.87
|
30 |
+
Yi-34B-Chat-t0.0--Yi-34B-Chat-t0.0,8.27,40.86,20.25,35.0,9.07,10.84,26.0,8.02,17.17,3.33,33.33,51.64,68.33,41.46,49.88,83.33,0.0,0.0,43.33,26.92,43.85,26.67,22.92,36.66
|
31 |
+
aya-23-35B-t0.0--aya-23-35B-t0.0,13.35,47.9,27.88,0.0,,,82.0,31.48,15.69,100.0,42.78,49.61,90.0,40.43,45.63,0.0,,,33.33,19.17,31.93,30.0,5.56,16.67
|
32 |
+
aya-23-8B-t0.0--aya-23-8B-t0.0,11.72,45.24,25.9,0.0,,,50.0,35.71,33.91,100.0,35.0,47.83,100.0,22.22,40.44,40.0,0.0,0.0,13.33,50.0,57.74,13.33,12.5,25.0
|
33 |
+
claude-2.1-t0.0--claude-2.1-t0.0,32.5,82.14,39.57,0.0,,,100.0,74.92,26.26,100.0,50.56,50.14,95.0,64.91,45.93,96.67,7.59,21.16,86.67,21.6,39.58,96.67,17.82,35.34
|
34 |
+
claude-3-5-sonnet-20240620-t0.0--claude-3-5-sonnet-20240620-t0.0,57.08,89.64,63.68,97.5,97.1,10.08,100.0,89.57,10.67,100.0,91.11,28.54,33.33,72.5,28.75,100.0,15.0,23.45,100.0,41.44,34.92,96.67,39.02,34.81
|
35 |
+
claude-3-haiku-20240307-t0.0--claude-3-haiku-20240307-t0.0,22.49,79.52,28.28,0.0,,,100.0,50.46,34.83,100.0,17.22,37.86,63.33,78.95,32.11,100.0,0.0,0.0,100.0,8.44,21.27,93.33,14.58,31.64
|
36 |
+
claude-3-opus-20240229-t0.0--claude-3-opus-20240229-t0.0,42.42,83.1,51.05,0.0,,,100.0,95.32,6.4,100.0,29.44,45.71,88.33,83.65,32.11,100.0,20.0,28.65,96.67,46.09,38.59,96.67,31.78,35.15
|
37 |
+
claude-3-sonnet-20240229-t0.0--claude-3-sonnet-20240229-t0.0,30.53,85.24,35.82,0.0,,,100.0,60.81,25.28,100.0,27.22,44.63,100.0,73.61,36.73,100.0,10.67,23.33,100.0,20.5,33.65,96.67,22.13,33.35
|
38 |
+
codegemma-7b-it-t0.0--codegemma-7b-it-t0.0,15.3,51.95,29.45,0.0,,,42.0,0.0,0.0,81.67,96.6,18.19,83.33,26.0,44.31,96.67,0.0,0.0,43.33,14.1,30.31,16.67,40.0,54.77
|
39 |
+
command-r-plus-t0.0--command-r-plus-t0.0,24.94,74.9,33.3,0.0,,,,,,99.44,47.49,50.08,63.33,67.11,45.44,100.0,7.33,19.82,93.33,26.79,37.91,93.33,17.8,32.58
|
40 |
+
command-r-t0.0--command-r-t0.0,14.15,61.67,22.95,0.0,,,,,,100.0,23.33,42.41,63.33,44.74,47.63,93.33,0.0,0.0,66.67,30.0,44.13,46.67,16.67,36.4
|
41 |
+
dolphin-2.5-mixtral-8x7b-t0.0--dolphin-2.5-mixtral-8x7b-t0.0,15.1,46.38,32.55,0.0,,,48.0,58.95,25.96,100.0,35.0,47.83,100.0,41.11,46.79,0.0,,,43.33,7.69,27.74,33.33,20.0,42.16
|
42 |
+
gemini-1.0-pro-t0.0--gemini-1.0-pro-t0.0,26.95,80.14,33.63,30.0,49.08,26.5,76.0,63.7,19.97,100.0,46.11,49.99,85.0,55.23,44.53,90.0,0.74,3.85,86.67,12.82,32.76,93.33,7.74,21.98
|
43 |
+
gemini-1.5-flash-latest-t0.0--gemini-1.5-flash-latest-t0.0,32.0,76.14,42.03,0.0,,,98.0,78.18,20.17,100.0,61.11,48.89,91.67,57.88,43.61,96.67,0.69,3.71,66.67,33.33,38.9,80.0,20.97,31.07
|
44 |
+
gemini-1.5-pro-latest-t0.0--gemini-1.5-pro-latest-t0.0,41.9,81.29,51.55,0.0,,,94.0,88.7,10.41,100.0,65.0,47.83,85.0,70.59,35.84,100.0,10.67,22.43,93.33,41.37,39.25,96.67,32.99,35.32
|
45 |
+
gemma-1.1-2b-it-t0.0--gemma-1.1-2b-it-t0.0,2.91,22.62,12.87,0.0,,,0.0,,,100.0,20.0,40.11,45.0,14.81,36.2,0.0,,,6.67,0.0,0.0,6.67,16.66,23.57
|
46 |
+
gemma-1.1-7b-it-t0.0--gemma-1.1-7b-it-t0.0,14.14,49.67,28.46,0.0,,,6.0,10.83,10.1,100.0,92.22,26.86,35.0,52.38,51.18,73.33,0.0,0.0,76.67,6.52,22.88,56.67,8.82,26.43
|
47 |
+
gemma-2-27b-it-t0.0--gemma-2-27b-it-t0.0,3.51,11.9,29.51,0.0,,,0.0,,,75.0,38.52,48.85,5.0,0.0,0.0,0.0,,,0.0,,,3.33,50.0,
|
48 |
+
gemma-2-2b-it-t0.0--gemma-2-2b-it-t0.0,2.67,38.33,6.96,0.0,,,0.0,,,0.0,,,98.33,0.0,0.0,100.0,0.0,0.0,46.67,10.71,28.95,23.33,17.14,37.29
|
49 |
+
gemma-2-9b-it-t0.0--gemma-2-9b-it-t0.0,27.34,75.48,36.22,0.0,,,70.0,53.52,40.57,100.0,42.22,49.53,78.33,77.66,38.27,100.0,1.67,9.13,93.33,17.26,33.48,86.67,25.0,43.01
|
50 |
+
gemma-7b-it-t0.0--gemma-7b-it-t0.0,1.82,17.78,10.23,0.0,,,0.0,,,97.78,40.91,49.31,0.0,,,3.33,0.0,,3.33,0.0,,20.0,0.0,0.0
|
51 |
+
gpt-3.5-turbo-0125-t0.0--gpt-3.5-turbo-0125-t0.0,27.22,89.67,30.36,70.0,64.18,29.33,96.0,36.7,31.04,100.0,3.33,18.0,68.33,73.17,41.98,100.0,0.0,0.0,96.67,24.25,40.95,96.67,10.92,27.56
|
52 |
+
gpt-4-0125-preview-t0.0--gpt-4-0125-preview-t0.0,52.5,94.92,55.31,100.0,99.6,1.53,100.0,90.22,6.92,99.44,31.84,46.72,75.0,93.33,20.23,100.0,20.67,27.66,100.0,33.17,42.87,90.0,18.33,32.69
|
53 |
+
gpt-4-0613-t0.0--gpt-4-0613-t0.0,51.09,94.88,53.85,77.5,98.19,10.06,100.0,97.33,4.12,100.0,35.56,48.0,86.67,79.81,33.22,100.0,9.0,25.78,100.0,36.78,40.4,100.0,20.28,29.17
|
54 |
+
gpt-4-1106-preview-t0.0--gpt-4-1106-preview-t0.0,51.99,98.1,53.0,95.0,94.34,10.24,100.0,87.08,10.69,100.0,29.44,45.71,91.67,83.94,29.57,100.0,13.0,27.56,100.0,29.0,39.53,100.0,34.22,39.55
|
55 |
+
gpt-4-turbo-2024-04-09-t0.0--gpt-4-turbo-2024-04-09-t0.0,58.3,94.88,61.45,82.5,99.79,1.22,100.0,92.68,9.45,100.0,88.89,31.51,85.0,82.35,30.81,100.0,16.33,31.35,100.0,29.89,39.18,96.67,20.23,28.21
|
56 |
+
gpt-4o-2024-05-13-t0.0--gpt-4o-2024-05-13-t0.0,48.34,85.71,56.4,0.0,,,100.0,94.66,5.56,100.0,90.0,30.08,100.0,75.28,35.19,100.0,19.33,28.52,100.0,28.0,36.38,100.0,31.11,33.14
|
57 |
+
gpt-4o-2024-08-06-t0.0--gpt-4o-2024-08-06-t0.0,47.71,85.71,55.66,0.0,,,100.0,90.36,8.32,100.0,87.78,32.85,100.0,85.28,25.51,100.0,23.0,30.53,100.0,23.94,34.28,100.0,23.61,30.72
|
58 |
+
gpt-4o-mini-2024-07-18-t0.0--gpt-4o-mini-2024-07-18-t0.0,34.64,85.06,40.73,0.0,,,96.0,59.27,19.82,99.44,73.74,44.13,100.0,69.72,40.18,100.0,10.33,23.56,100.0,15.67,31.78,100.0,15.67,31.17
|
59 |
+
llama-2-70b-chat-hf-t0.0--llama-2-70b-chat-hf-t0.0,0.81,7.14,11.31,0.0,,,0.0,,,46.67,22.62,42.09,0.0,,,0.0,,,3.33,0.0,,0.0,,
|
60 |
+
mistral-large-2402-t0.0--mistral-large-2402-t0.0,28.17,66.86,42.14,0.0,,,98.0,77.07,27.28,100.0,25.0,43.42,60.0,88.89,31.87,63.33,5.26,22.94,83.33,26.8,36.21,63.33,29.82,40.57
|
61 |
+
mistral-medium-2312-t0.0--mistral-medium-2312-t0.0,16.43,49.25,33.36,0.0,,,22.0,15.28,24.12,76.11,48.91,50.17,30.0,88.89,32.34,80.0,0.0,0.0,83.33,26.8,42.67,53.33,20.31,40.02
|
62 |
+
openchat-3.5-0106-t0.0--openchat-3.5-0106-t0.0,17.1,52.57,32.52,35.0,0.86,3.21,98.0,56.86,23.59,100.0,93.33,25.01,65.0,64.1,48.6,10.0,0.0,0.0,40.0,12.5,31.08,20.0,0.0,0.0
|
63 |
+
openchat-3.5-1210-t0.0--openchat-3.5-1210-t0.0,18.22,51.19,35.6,15.0,3.17,7.76,80.0,60.82,24.23,100.0,90.56,29.33,40.0,66.67,48.15,46.67,0.0,0.0,53.33,20.83,40.14,23.33,7.14,18.9
|
64 |
+
openchat_3.5-t0.0--openchat_3.5-t0.0,23.64,63.52,37.22,50.0,8.7,11.65,38.0,73.36,22.12,100.0,73.89,44.05,100.0,45.0,49.32,90.0,0.0,0.0,36.67,15.15,31.14,30.0,44.44,52.7
|
65 |
+
sheep-duck-llama-2-13b-t0.0--sheep-duck-llama-2-13b-t0.0,5.39,31.9,16.9,0.0,,,0.0,,,96.67,1.72,13.05,83.33,4.0,19.79,0.0,,,23.33,28.57,48.8,20.0,33.33,51.64
|
66 |
+
sheep-duck-llama-2-70b-v1.1-t0.0--sheep-duck-llama-2-70b-v1.1-t0.0,21.5,41.19,52.2,0.0,,,0.0,,,100.0,83.33,37.37,55.0,90.91,29.19,60.0,0.0,0.0,43.33,42.31,44.94,30.0,44.44,46.4
|
67 |
+
tulu-2-dpo-70b-t0.0--tulu-2-dpo-70b-t0.0,12.62,49.76,25.37,0.0,,,0.0,,,100.0,16.67,37.37,68.33,68.29,47.11,80.0,0.0,0.0,53.33,16.88,29.83,46.67,25.0,42.74
|
68 |
+
vicuna-13b-v1.5-t0.0--vicuna-13b-v1.5-t0.0,7.01,39.52,17.73,0.0,,,20.0,20.27,8.84,100.0,0.0,0.0,46.67,60.71,49.73,53.33,0.0,0.0,36.67,21.21,40.2,20.0,4.17,10.21
|
69 |
+
vicuna-33b-v1.3-t0.0--vicuna-33b-v1.3-t0.0,11.27,23.81,47.32,0.0,,,0.0,,,100.0,0.0,0.0,46.67,89.29,31.5,0.0,,,10.0,16.67,28.87,10.0,83.33,28.87
|