piotr-szleg-bards-ai commited on
Commit
fc8c467
·
1 Parent(s): 7768b05

2024-02-23 08:47:54 Publish script update

Browse files
data/general_plots.csv CHANGED
@@ -10,7 +10,7 @@ execution_costs,./html/plots/execution_costs.html,"Figure({
10
  'showlegend': True,
11
  'textposition': 'auto',
12
  'type': 'bar',
13
- 'x': array([17.4942]),
14
  'xaxis': 'x',
15
  'y': array(['gpt-4'], dtype=object),
16
  'yaxis': 'y'},
@@ -24,7 +24,7 @@ execution_costs,./html/plots/execution_costs.html,"Figure({
24
  'showlegend': True,
25
  'textposition': 'auto',
26
  'type': 'bar',
27
- 'x': array([8.7136]),
28
  'xaxis': 'x',
29
  'y': array(['gpt-4-turbo'], dtype=object),
30
  'yaxis': 'y'},
@@ -38,107 +38,107 @@ execution_costs,./html/plots/execution_costs.html,"Figure({
38
  'showlegend': True,
39
  'textposition': 'auto',
40
  'type': 'bar',
41
- 'x': array([0.74798]),
42
  'xaxis': 'x',
43
  'y': array(['gpt-3.5-turbo'], dtype=object),
44
  'yaxis': 'y'},
45
  {'alignmentgroup': 'True',
46
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
47
- 'legendgroup': 'Mixtral-8x7B-Instruct-v0.1',
48
  'marker': {'color': '#ab63fa', 'pattern': {'shape': ''}},
49
- 'name': 'Mixtral-8x7B-Instruct-v0.1',
50
- 'offsetgroup': 'Mixtral-8x7B-Instruct-v0.1',
51
  'orientation': 'h',
52
  'showlegend': True,
53
  'textposition': 'auto',
54
  'type': 'bar',
55
- 'x': array([0.74277]),
56
  'xaxis': 'x',
57
- 'y': array(['Mixtral-8x7B-Instruct-v0.1'], dtype=object),
58
  'yaxis': 'y'},
59
  {'alignmentgroup': 'True',
60
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
61
- 'legendgroup': 'zephyr-7b-beta',
62
  'marker': {'color': '#FFA15A', 'pattern': {'shape': ''}},
63
- 'name': 'zephyr-7b-beta',
64
- 'offsetgroup': 'zephyr-7b-beta',
65
  'orientation': 'h',
66
  'showlegend': True,
67
  'textposition': 'auto',
68
  'type': 'bar',
69
- 'x': array([0.67250116]),
70
  'xaxis': 'x',
71
- 'y': array(['zephyr-7b-beta'], dtype=object),
72
  'yaxis': 'y'},
73
  {'alignmentgroup': 'True',
74
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
75
- 'legendgroup': 'llama-2-70b-chat',
76
  'marker': {'color': '#19d3f3', 'pattern': {'shape': ''}},
77
- 'name': 'llama-2-70b-chat',
78
- 'offsetgroup': 'llama-2-70b-chat',
79
  'orientation': 'h',
80
  'showlegend': True,
81
  'textposition': 'auto',
82
  'type': 'bar',
83
- 'x': array([0.665964]),
84
  'xaxis': 'x',
85
- 'y': array(['llama-2-70b-chat'], dtype=object),
86
  'yaxis': 'y'},
87
  {'alignmentgroup': 'True',
88
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
89
- 'legendgroup': '01-ai Yi Chat (34B)',
90
  'marker': {'color': '#FF6692', 'pattern': {'shape': ''}},
91
- 'name': '01-ai Yi Chat (34B)',
92
- 'offsetgroup': '01-ai Yi Chat (34B)',
93
  'orientation': 'h',
94
  'showlegend': True,
95
  'textposition': 'auto',
96
  'type': 'bar',
97
- 'x': array([0.405168]),
98
  'xaxis': 'x',
99
- 'y': array(['01-ai Yi Chat (34B)'], dtype=object),
100
  'yaxis': 'y'},
101
  {'alignmentgroup': 'True',
102
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
103
- 'legendgroup': 'Mistral-7B-Instruct-v0.2',
104
  'marker': {'color': '#B6E880', 'pattern': {'shape': ''}},
105
- 'name': 'Mistral-7B-Instruct-v0.2',
106
- 'offsetgroup': 'Mistral-7B-Instruct-v0.2',
107
  'orientation': 'h',
108
  'showlegend': True,
109
  'textposition': 'auto',
110
  'type': 'bar',
111
- 'x': array([0.23022898]),
112
  'xaxis': 'x',
113
- 'y': array(['Mistral-7B-Instruct-v0.2'], dtype=object),
114
  'yaxis': 'y'},
115
  {'alignmentgroup': 'True',
116
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
117
- 'legendgroup': 'gemini-pro',
118
  'marker': {'color': '#FF97FF', 'pattern': {'shape': ''}},
119
- 'name': 'gemini-pro',
120
- 'offsetgroup': 'gemini-pro',
121
  'orientation': 'h',
122
  'showlegend': True,
123
  'textposition': 'auto',
124
  'type': 'bar',
125
- 'x': array([0.178845]),
126
  'xaxis': 'x',
127
- 'y': array(['gemini-pro'], dtype=object),
128
  'yaxis': 'y'},
129
  {'alignmentgroup': 'True',
130
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
131
- 'legendgroup': 'Snorkel Mistral PairRM DPO (7B)',
132
  'marker': {'color': '#FECB52', 'pattern': {'shape': ''}},
133
- 'name': 'Snorkel Mistral PairRM DPO (7B)',
134
- 'offsetgroup': 'Snorkel Mistral PairRM DPO (7B)',
135
  'orientation': 'h',
136
  'showlegend': True,
137
  'textposition': 'auto',
138
  'type': 'bar',
139
- 'x': array([0.15948]),
140
  'xaxis': 'x',
141
- 'y': array(['Snorkel Mistral PairRM DPO (7B)'], dtype=object),
142
  'yaxis': 'y'},
143
  {'alignmentgroup': 'True',
144
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
@@ -150,7 +150,7 @@ execution_costs,./html/plots/execution_costs.html,"Figure({
150
  'showlegend': True,
151
  'textposition': 'auto',
152
  'type': 'bar',
153
- 'x': array([0.158515]),
154
  'xaxis': 'x',
155
  'y': array(['chat-bison (PaLM 2)'], dtype=object),
156
  'yaxis': 'y'},
@@ -164,212 +164,165 @@ execution_costs,./html/plots/execution_costs.html,"Figure({
164
  'showlegend': True,
165
  'textposition': 'auto',
166
  'type': 'bar',
167
- 'x': array([0.151035]),
168
  'xaxis': 'x',
169
  'y': array(['chat-bison-32k (PaLM 2 32K)'], dtype=object),
170
  'yaxis': 'y'},
171
  {'alignmentgroup': 'True',
172
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
173
- 'legendgroup': 'Mistral (7B) Instruct v0.2 (Together AI)',
174
  'marker': {'color': '#00cc96', 'pattern': {'shape': ''}},
175
- 'name': 'Mistral (7B) Instruct v0.2 (Together AI)',
176
- 'offsetgroup': 'Mistral (7B) Instruct v0.2 (Together AI)',
177
  'orientation': 'h',
178
  'showlegend': True,
179
  'textposition': 'auto',
180
  'type': 'bar',
181
- 'x': array([0.148248]),
182
  'xaxis': 'x',
183
- 'y': array(['Mistral (7B) Instruct v0.2 (Together AI)'], dtype=object),
184
  'yaxis': 'y'},
185
  {'alignmentgroup': 'True',
186
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
187
- 'legendgroup': 'Chronos Hermes (13B)',
188
  'marker': {'color': '#ab63fa', 'pattern': {'shape': ''}},
189
- 'name': 'Chronos Hermes (13B)',
190
- 'offsetgroup': 'Chronos Hermes (13B)',
191
- 'orientation': 'h',
192
- 'showlegend': True,
193
- 'textposition': 'auto',
194
- 'type': 'bar',
195
- 'x': array([0.141156]),
196
- 'xaxis': 'x',
197
- 'y': array(['Chronos Hermes (13B)'], dtype=object),
198
- 'yaxis': 'y'},
199
- {'alignmentgroup': 'True',
200
- 'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
201
- 'legendgroup': 'WizardLM v1.2 (13B)',
202
- 'marker': {'color': '#FFA15A', 'pattern': {'shape': ''}},
203
- 'name': 'WizardLM v1.2 (13B)',
204
- 'offsetgroup': 'WizardLM v1.2 (13B)',
205
- 'orientation': 'h',
206
- 'showlegend': True,
207
- 'textposition': 'auto',
208
- 'type': 'bar',
209
- 'x': array([0.131244]),
210
- 'xaxis': 'x',
211
- 'y': array(['WizardLM v1.2 (13B)'], dtype=object),
212
- 'yaxis': 'y'},
213
- {'alignmentgroup': 'True',
214
- 'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
215
- 'legendgroup': 'TinyLlama/TinyLlama-1.1B-Chat-v1.0',
216
- 'marker': {'color': '#19d3f3', 'pattern': {'shape': ''}},
217
- 'name': 'TinyLlama/TinyLlama-1.1B-Chat-v1.0',
218
- 'offsetgroup': 'TinyLlama/TinyLlama-1.1B-Chat-v1.0',
219
  'orientation': 'h',
220
  'showlegend': True,
221
  'textposition': 'auto',
222
  'type': 'bar',
223
- 'x': array([0.11099814]),
224
  'xaxis': 'x',
225
- 'y': array(['TinyLlama/TinyLlama-1.1B-Chat-v1.0'], dtype=object),
226
  'yaxis': 'y'},
227
  {'alignmentgroup': 'True',
228
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
229
  'legendgroup': 'LLaMA-2 Chat (7B)',
230
- 'marker': {'color': '#FF6692', 'pattern': {'shape': ''}},
231
  'name': 'LLaMA-2 Chat (7B)',
232
  'offsetgroup': 'LLaMA-2 Chat (7B)',
233
  'orientation': 'h',
234
  'showlegend': True,
235
  'textposition': 'auto',
236
  'type': 'bar',
237
- 'x': array([0.103212]),
238
  'xaxis': 'x',
239
  'y': array(['LLaMA-2 Chat (7B)'], dtype=object),
240
  'yaxis': 'y'},
241
  {'alignmentgroup': 'True',
242
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
243
- 'legendgroup': 'Upstage SOLAR Instruct v1 (11B)',
244
- 'marker': {'color': '#B6E880', 'pattern': {'shape': ''}},
245
- 'name': 'Upstage SOLAR Instruct v1 (11B)',
246
- 'offsetgroup': 'Upstage SOLAR Instruct v1 (11B)',
247
- 'orientation': 'h',
248
- 'showlegend': True,
249
- 'textposition': 'auto',
250
- 'type': 'bar',
251
- 'x': array([0.10254]),
252
- 'xaxis': 'x',
253
- 'y': array(['Upstage SOLAR Instruct v1 (11B)'], dtype=object),
254
- 'yaxis': 'y'},
255
- {'alignmentgroup': 'True',
256
- 'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
257
- 'legendgroup': 'Vicuna v1.5 (7B)',
258
- 'marker': {'color': '#FF97FF', 'pattern': {'shape': ''}},
259
- 'name': 'Vicuna v1.5 (7B)',
260
- 'offsetgroup': 'Vicuna v1.5 (7B)',
261
  'orientation': 'h',
262
  'showlegend': True,
263
  'textposition': 'auto',
264
  'type': 'bar',
265
- 'x': array([0.097484]),
266
  'xaxis': 'x',
267
- 'y': array(['Vicuna v1.5 (7B)'], dtype=object),
268
  'yaxis': 'y'},
269
  {'alignmentgroup': 'True',
270
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
271
  'legendgroup': 'Qwen 1.5 Chat (7B)',
272
- 'marker': {'color': '#FECB52', 'pattern': {'shape': ''}},
273
  'name': 'Qwen 1.5 Chat (7B)',
274
  'offsetgroup': 'Qwen 1.5 Chat (7B)',
275
  'orientation': 'h',
276
  'showlegend': True,
277
  'textposition': 'auto',
278
  'type': 'bar',
279
- 'x': array([0.092256]),
280
  'xaxis': 'x',
281
  'y': array(['Qwen 1.5 Chat (7B)'], dtype=object),
282
  'yaxis': 'y'},
283
  {'alignmentgroup': 'True',
284
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
285
- 'legendgroup': 'OpenHermes-2.5-Mistral (7B)',
286
- 'marker': {'color': '#636efa', 'pattern': {'shape': ''}},
287
- 'name': 'OpenHermes-2.5-Mistral (7B)',
288
- 'offsetgroup': 'OpenHermes-2.5-Mistral (7B)',
289
  'orientation': 'h',
290
  'showlegend': True,
291
  'textposition': 'auto',
292
  'type': 'bar',
293
- 'x': array([0.089096]),
294
  'xaxis': 'x',
295
- 'y': array(['OpenHermes-2.5-Mistral (7B)'], dtype=object),
296
  'yaxis': 'y'},
297
  {'alignmentgroup': 'True',
298
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
299
  'legendgroup': 'Falcon Instruct (7B)',
300
- 'marker': {'color': '#EF553B', 'pattern': {'shape': ''}},
301
  'name': 'Falcon Instruct (7B)',
302
  'offsetgroup': 'Falcon Instruct (7B)',
303
  'orientation': 'h',
304
  'showlegend': True,
305
  'textposition': 'auto',
306
  'type': 'bar',
307
- 'x': array([0.07428]),
308
  'xaxis': 'x',
309
  'y': array(['Falcon Instruct (7B)'], dtype=object),
310
  'yaxis': 'y'},
311
  {'alignmentgroup': 'True',
312
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
313
  'legendgroup': 'RedPajama-INCITE Chat (7B)',
314
- 'marker': {'color': '#00cc96', 'pattern': {'shape': ''}},
315
  'name': 'RedPajama-INCITE Chat (7B)',
316
  'offsetgroup': 'RedPajama-INCITE Chat (7B)',
317
  'orientation': 'h',
318
  'showlegend': True,
319
  'textposition': 'auto',
320
  'type': 'bar',
321
- 'x': array([0.07172]),
322
  'xaxis': 'x',
323
  'y': array(['RedPajama-INCITE Chat (7B)'], dtype=object),
324
  'yaxis': 'y'}],
325
  'layout': {'barmode': 'relative',
326
  'legend': {'title': {'text': 'Model'}, 'tracegroupgap': 0},
327
  'template': '...',
328
- 'title': {'text': 'Costs of execution of 5560 test queries per model'},
329
  'xaxis': {'anchor': 'y', 'domain': [0.0, 1.0], 'title': {'text': 'Execution cost ($)'}},
330
  'yaxis': {'anchor': 'x',
331
  'categoryarray': [RedPajama-INCITE Chat (7B), Falcon
332
- Instruct (7B), OpenHermes-2.5-Mistral
333
- (7B), Qwen 1.5 Chat (7B), Vicuna v1.5
334
- (7B), Upstage SOLAR Instruct v1 (11B),
335
- LLaMA-2 Chat (7B),
336
- TinyLlama/TinyLlama-1.1B-Chat-v1.0,
337
- WizardLM v1.2 (13B), Chronos Hermes
338
- (13B), Mistral (7B) Instruct v0.2
339
- (Together AI), chat-bison-32k (PaLM 2
340
- 32K), chat-bison (PaLM 2), Snorkel
341
- Mistral PairRM DPO (7B), gemini-pro,
342
- Mistral-7B-Instruct-v0.2, 01-ai Yi Chat
343
- (34B), llama-2-70b-chat, zephyr-7b-beta,
344
  Mixtral-8x7B-Instruct-v0.1,
345
- gpt-3.5-turbo, gpt-4-turbo, gpt-4],
 
346
  'categoryorder': 'array',
347
  'domain': [0.0, 1.0],
348
  'title': {'text': 'Model'}}}
349
- })",Costs of execution of 5560 test queries per model,,"{""data"":[{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""gpt-4"",""marker"":{""color"":""#636efa"",""pattern"":{""shape"":""""}},""name"":""gpt-4"",""offsetgroup"":""gpt-4"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[17.4942],""xaxis"":""x"",""y"":[""gpt-4""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""gpt-4-turbo"",""marker"":{""color"":""#EF553B"",""pattern"":{""shape"":""""}},""name"":""gpt-4-turbo"",""offsetgroup"":""gpt-4-turbo"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[8.7136],""xaxis"":""x"",""y"":[""gpt-4-turbo""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""gpt-3.5-turbo"",""marker"":{""color"":""#00cc96"",""pattern"":{""shape"":""""}},""name"":""gpt-3.5-turbo"",""offsetgroup"":""gpt-3.5-turbo"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.74798],""xaxis"":""x"",""y"":[""gpt-3.5-turbo""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Mixtral-8x7B-Instruct-v0.1"",""marker"":{""color"":""#ab63fa"",""pattern"":{""shape"":""""}},""name"":""Mixtral-8x7B-Instruct-v0.1"",""offsetgroup"":""Mixtral-8x7B-Instruct-v0.1"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.7427699999999999],""xaxis"":""x"",""y"":[""Mixtral-8x7B-Instruct-v0.1""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""zephyr-7b-beta"",""marker"":{""color"":""#FFA15A"",""pattern"":{""shape"":""""}},""name"":""zephyr-7b-beta"",""offsetgroup"":""zephyr-7b-beta"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.6725011554823982],""xaxis"":""x"",""y"":[""zephyr-7b-beta""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""llama-2-70b-chat"",""marker"":{""color"":""#19d3f3"",""pattern"":{""shape"":""""}},""name"":""llama-2-70b-chat"",""offsetgroup"":""llama-2-70b-chat"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.665964],""xaxis"":""x"",""y"":[""llama-2-70b-chat""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""01-ai Yi Chat (34B)"",""marker"":{""color"":""#FF6692"",""pattern"":{""shape"":""""}},""name"":""01-ai Yi Chat (34B)"",""offsetgroup"":""01-ai Yi Chat (34B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.40516800000000003],""xaxis"":""x"",""y"":[""01-ai Yi Chat (34B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Mistral-7B-Instruct-v0.2"",""marker"":{""color"":""#B6E880"",""pattern"":{""shape"":""""}},""name"":""Mistral-7B-Instruct-v0.2"",""offsetgroup"":""Mistral-7B-Instruct-v0.2"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.23022897595829436],""xaxis"":""x"",""y"":[""Mistral-7B-Instruct-v0.2""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""gemini-pro"",""marker"":{""color"":""#FF97FF"",""pattern"":{""shape"":""""}},""name"":""gemini-pro"",""offsetgroup"":""gemini-pro"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.178845],""xaxis"":""x"",""y"":[""gemini-pro""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Snorkel Mistral PairRM DPO (7B)"",""marker"":{""color"":""#FECB52"",""pattern"":{""shape"":""""}},""name"":""Snorkel Mistral PairRM DPO (7B)"",""offsetgroup"":""Snorkel Mistral PairRM DPO (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.15948],""xaxis"":""x"",""y"":[""Snorkel Mistral PairRM DPO (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""chat-bison (PaLM 2)"",""marker"":{""color"":""#636efa"",""pattern"":{""shape"":""""}},""name"":""chat-bison (PaLM 2)"",""offsetgroup"":""chat-bison (PaLM 2)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.15851500000000002],""xaxis"":""x"",""y"":[""chat-bison (PaLM 2)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""chat-bison-32k (PaLM 2 32K)"",""marker"":{""color"":""#EF553B"",""pattern"":{""shape"":""""}},""name"":""chat-bison-32k (PaLM 2 32K)"",""offsetgroup"":""chat-bison-32k (PaLM 2 32K)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.151035],""xaxis"":""x"",""y"":[""chat-bison-32k (PaLM 2 32K)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Mistral (7B) Instruct v0.2 (Together AI)"",""marker"":{""color"":""#00cc96"",""pattern"":{""shape"":""""}},""name"":""Mistral (7B) Instruct v0.2 (Together AI)"",""offsetgroup"":""Mistral (7B) Instruct v0.2 (Together AI)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.14824800000000002],""xaxis"":""x"",""y"":[""Mistral (7B) Instruct v0.2 (Together AI)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Chronos Hermes (13B)"",""marker"":{""color"":""#ab63fa"",""pattern"":{""shape"":""""}},""name"":""Chronos Hermes (13B)"",""offsetgroup"":""Chronos Hermes (13B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.141156],""xaxis"":""x"",""y"":[""Chronos Hermes (13B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""WizardLM v1.2 (13B)"",""marker"":{""color"":""#FFA15A"",""pattern"":{""shape"":""""}},""name"":""WizardLM v1.2 (13B)"",""offsetgroup"":""WizardLM v1.2 (13B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.131244],""xaxis"":""x"",""y"":[""WizardLM v1.2 (13B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""TinyLlama\u002fTinyLlama-1.1B-Chat-v1.0"",""marker"":{""color"":""#19d3f3"",""pattern"":{""shape"":""""}},""name"":""TinyLlama\u002fTinyLlama-1.1B-Chat-v1.0"",""offsetgroup"":""TinyLlama\u002fTinyLlama-1.1B-Chat-v1.0"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.11099814383188883],""xaxis"":""x"",""y"":[""TinyLlama\u002fTinyLlama-1.1B-Chat-v1.0""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""LLaMA-2 Chat (7B)"",""marker"":{""color"":""#FF6692"",""pattern"":{""shape"":""""}},""name"":""LLaMA-2 Chat (7B)"",""offsetgroup"":""LLaMA-2 Chat (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.103212],""xaxis"":""x"",""y"":[""LLaMA-2 Chat (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Upstage SOLAR Instruct v1 (11B)"",""marker"":{""color"":""#B6E880"",""pattern"":{""shape"":""""}},""name"":""Upstage SOLAR Instruct v1 (11B)"",""offsetgroup"":""Upstage SOLAR Instruct v1 (11B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.10253999999999999],""xaxis"":""x"",""y"":[""Upstage SOLAR Instruct v1 (11B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Vicuna v1.5 (7B)"",""marker"":{""color"":""#FF97FF"",""pattern"":{""shape"":""""}},""name"":""Vicuna v1.5 (7B)"",""offsetgroup"":""Vicuna v1.5 (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.097484],""xaxis"":""x"",""y"":[""Vicuna v1.5 (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Qwen 1.5 Chat (7B)"",""marker"":{""color"":""#FECB52"",""pattern"":{""shape"":""""}},""name"":""Qwen 1.5 Chat (7B)"",""offsetgroup"":""Qwen 1.5 Chat (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.092256],""xaxis"":""x"",""y"":[""Qwen 1.5 Chat (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""OpenHermes-2.5-Mistral (7B)"",""marker"":{""color"":""#636efa"",""pattern"":{""shape"":""""}},""name"":""OpenHermes-2.5-Mistral (7B)"",""offsetgroup"":""OpenHermes-2.5-Mistral (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.08909600000000001],""xaxis"":""x"",""y"":[""OpenHermes-2.5-Mistral (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Falcon Instruct (7B)"",""marker"":{""color"":""#EF553B"",""pattern"":{""shape"":""""}},""name"":""Falcon Instruct (7B)"",""offsetgroup"":""Falcon Instruct (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.07428],""xaxis"":""x"",""y"":[""Falcon Instruct (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""RedPajama-INCITE Chat (7B)"",""marker"":{""color"":""#00cc96"",""pattern"":{""shape"":""""}},""name"":""RedPajama-INCITE Chat (7B)"",""offsetgroup"":""RedPajama-INCITE Chat (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.07172],""xaxis"":""x"",""y"":[""RedPajama-INCITE Chat (7B)""],""yaxis"":""y"",""type"":""bar""}],""layout"":{""template"":{""data"":{""histogram2dcontour"":[{""type"":""histogram2dcontour"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""choropleth"":[{""type"":""choropleth"",""colorbar"":{""outlinewidth"":0,""ticks"":""""}}],""histogram2d"":[{""type"":""histogram2d"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""heatmap"":[{""type"":""heatmap"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""heatmapgl"":[{""type"":""heatmapgl"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""contourcarpet"":[{""type"":""contourcarpet"",""colorbar"":{""outlinewidth"":0,""ticks"":""""}}],""contour"":[{""type"":""contour"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""surface"":[{""type"":""surface"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""mesh3d"":[{""type"":""mesh3d"",""colorbar"":{""outlinewidth"":0,""ticks"":""""}}],""scatter"":[{""fillpattern"":{""fillmode"":""overlay"",""size"":10,""solidity"":0.2},""type"":""scatter""}],""parcoords"":[{""type"":""parcoords"",""line"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scatterpolargl"":[{""type"":""scatterpolargl"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""bar"":[{""error_x"":{""color"":""#2a3f5f""},""error_y"":{""color"":""#2a3f5f""},""marker"":{""line"":{""color"":""#E5ECF6"",""width"":0.5},""pattern"":{""fillmode"":""overlay"",""size"":10,""solidity"":0.2}},""type"":""bar""}],""scattergeo"":[{""type"":""scattergeo"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scatterpolar"":[{""type"":""scatterpolar"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""histogram"":[{""marker"":{""pattern"":{""fillmode"":""overlay"",""size"":10,""solidity"":0.2}},""type"":""histogram""}],""scattergl"":[{""type"":""scattergl"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scatter3d"":[{""type"":""scatter3d"",""line"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}},""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scattermapbox"":[{""type"":""scattermapbox"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scatterternary"":[{""type"":""scatterternary"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scattercarpet"":[{""type"":""scattercarpet"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""carpet"":[{""aaxis"":{""endlinecolor"":""#2a3f5f"",""gridcolor"":""white"",""linecolor"":""white"",""minorgridcolor"":""white"",""startlinecolor"":""#2a3f5f""},""baxis"":{""endlinecolor"":""#2a3f5f"",""gridcolor"":""white"",""linecolor"":""white"",""minorgridcolor"":""white"",""startlinecolor"":""#2a3f5f""},""type"":""carpet""}],""table"":[{""cells"":{""fill"":{""color"":""#EBF0F8""},""line"":{""color"":""white""}},""header"":{""fill"":{""color"":""#C8D4E3""},""line"":{""color"":""white""}},""type"":""table""}],""barpolar"":[{""marker"":{""line"":{""color"":""#E5ECF6"",""width"":0.5},""pattern"":{""fillmode"":""overlay"",""size"":10,""solidity"":0.2}},""type"":""barpolar""}],""pie"":[{""automargin"":true,""type"":""pie""}]},""layout"":{""autotypenumbers"":""strict"",""colorway"":[""#636efa"",""#EF553B"",""#00cc96"",""#ab63fa"",""#FFA15A"",""#19d3f3"",""#FF6692"",""#B6E880"",""#FF97FF"",""#FECB52""],""font"":{""color"":""#2a3f5f""},""hovermode"":""closest"",""hoverlabel"":{""align"":""left""},""paper_bgcolor"":""white"",""plot_bgcolor"":""#E5ECF6"",""polar"":{""bgcolor"":""#E5ECF6"",""angularaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""},""radialaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""}},""ternary"":{""bgcolor"":""#E5ECF6"",""aaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""},""baxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""},""caxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""}},""coloraxis"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}},""colorscale"":{""sequential"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]],""sequentialminus"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]],""diverging"":[[0,""#8e0152""],[0.1,""#c51b7d""],[0.2,""#de77ae""],[0.3,""#f1b6da""],[0.4,""#fde0ef""],[0.5,""#f7f7f7""],[0.6,""#e6f5d0""],[0.7,""#b8e186""],[0.8,""#7fbc41""],[0.9,""#4d9221""],[1,""#276419""]]},""xaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":"""",""title"":{""standoff"":15},""zerolinecolor"":""white"",""automargin"":true,""zerolinewidth"":2},""yaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":"""",""title"":{""standoff"":15},""zerolinecolor"":""white"",""automargin"":true,""zerolinewidth"":2},""scene"":{""xaxis"":{""backgroundcolor"":""#E5ECF6"",""gridcolor"":""white"",""linecolor"":""white"",""showbackground"":true,""ticks"":"""",""zerolinecolor"":""white"",""gridwidth"":2},""yaxis"":{""backgroundcolor"":""#E5ECF6"",""gridcolor"":""white"",""linecolor"":""white"",""showbackground"":true,""ticks"":"""",""zerolinecolor"":""white"",""gridwidth"":2},""zaxis"":{""backgroundcolor"":""#E5ECF6"",""gridcolor"":""white"",""linecolor"":""white"",""showbackground"":true,""ticks"":"""",""zerolinecolor"":""white"",""gridwidth"":2}},""shapedefaults"":{""line"":{""color"":""#2a3f5f""}},""annotationdefaults"":{""arrowcolor"":""#2a3f5f"",""arrowhead"":0,""arrowwidth"":1},""geo"":{""bgcolor"":""white"",""landcolor"":""#E5ECF6"",""subunitcolor"":""white"",""showland"":true,""showlakes"":true,""lakecolor"":""white""},""title"":{""x"":0.05},""mapbox"":{""style"":""light""}}},""xaxis"":{""anchor"":""y"",""domain"":[0.0,1.0],""title"":{""text"":""Execution cost ($)""}},""yaxis"":{""anchor"":""x"",""domain"":[0.0,1.0],""title"":{""text"":""Model""},""categoryorder"":""array"",""categoryarray"":[""RedPajama-INCITE Chat (7B)"",""Falcon Instruct (7B)"",""OpenHermes-2.5-Mistral (7B)"",""Qwen 1.5 Chat (7B)"",""Vicuna v1.5 (7B)"",""Upstage SOLAR Instruct v1 (11B)"",""LLaMA-2 Chat (7B)"",""TinyLlama\u002fTinyLlama-1.1B-Chat-v1.0"",""WizardLM v1.2 (13B)"",""Chronos Hermes (13B)"",""Mistral (7B) Instruct v0.2 (Together AI)"",""chat-bison-32k (PaLM 2 32K)"",""chat-bison (PaLM 2)"",""Snorkel Mistral PairRM DPO (7B)"",""gemini-pro"",""Mistral-7B-Instruct-v0.2"",""01-ai Yi Chat (34B)"",""llama-2-70b-chat"",""zephyr-7b-beta"",""Mixtral-8x7B-Instruct-v0.1"",""gpt-3.5-turbo"",""gpt-4-turbo"",""gpt-4""]},""legend"":{""title"":{""text"":""Model""},""tracegroupgap"":0},""title"":{""text"":""Costs of execution of 5560 test queries per model""},""barmode"":""relative""}}","{""y"": ""model"", ""x"": ""model_query_costs"", ""color"": ""model"", ""orientation"": ""h"", ""title"": ""Costs of execution of 5560 test queries per model"", ""labels"": {""model"": ""Model"", ""model_query_costs"": ""Execution cost ($)""}}",",model_query_costs,model
350
- 2,17.4942,gpt-4
351
- 1,8.7136,gpt-4-turbo
352
- 0,0.74798,gpt-3.5-turbo
353
- 4,0.7427699999999999,Mixtral-8x7B-Instruct-v0.1
354
- 5,0.6725011554823982,zephyr-7b-beta
355
- 3,0.665964,llama-2-70b-chat
356
- 11,0.40516800000000003,01-ai Yi Chat (34B)
357
- 6,0.23022897595829436,Mistral-7B-Instruct-v0.2
358
- 8,0.178845,gemini-pro
359
- 43,0.15948,Snorkel Mistral PairRM DPO (7B)
360
- 9,0.15851500000000002,chat-bison (PaLM 2)
361
- 10,0.151035,chat-bison-32k (PaLM 2 32K)
362
- 26,0.14824800000000002,Mistral (7B) Instruct v0.2 (Together AI)
363
- 12,0.141156,Chronos Hermes (13B)
364
- 55,0.131244,WizardLM v1.2 (13B)
365
- 7,0.11099814383188883,TinyLlama/TinyLlama-1.1B-Chat-v1.0
366
- 24,0.103212,LLaMA-2 Chat (7B)
367
- 56,0.10253999999999999,Upstage SOLAR Instruct v1 (11B)
368
- 17,0.097484,Vicuna v1.5 (7B)
369
- 40,0.092256,Qwen 1.5 Chat (7B)
370
- 46,0.08909600000000001,OpenHermes-2.5-Mistral (7B)
371
- 48,0.07428,Falcon Instruct (7B)
372
- 51,0.07172,RedPajama-INCITE Chat (7B)
373
  "
374
  model_sizes,./html/plots/model_sizes.html,"Figure({
375
  'data': [{'alignmentgroup': 'True',
 
10
  'showlegend': True,
11
  'textposition': 'auto',
12
  'type': 'bar',
13
+ 'x': array([45.87]),
14
  'xaxis': 'x',
15
  'y': array(['gpt-4'], dtype=object),
16
  'yaxis': 'y'},
 
24
  'showlegend': True,
25
  'textposition': 'auto',
26
  'type': 'bar',
27
+ 'x': array([19.2168]),
28
  'xaxis': 'x',
29
  'y': array(['gpt-4-turbo'], dtype=object),
30
  'yaxis': 'y'},
 
38
  'showlegend': True,
39
  'textposition': 'auto',
40
  'type': 'bar',
41
+ 'x': array([1.75176]),
42
  'xaxis': 'x',
43
  'y': array(['gpt-3.5-turbo'], dtype=object),
44
  'yaxis': 'y'},
45
  {'alignmentgroup': 'True',
46
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
47
+ 'legendgroup': 'llama-2-70b-chat',
48
  'marker': {'color': '#ab63fa', 'pattern': {'shape': ''}},
49
+ 'name': 'llama-2-70b-chat',
50
+ 'offsetgroup': 'llama-2-70b-chat',
51
  'orientation': 'h',
52
  'showlegend': True,
53
  'textposition': 'auto',
54
  'type': 'bar',
55
+ 'x': array([0.65934]),
56
  'xaxis': 'x',
57
+ 'y': array(['llama-2-70b-chat'], dtype=object),
58
  'yaxis': 'y'},
59
  {'alignmentgroup': 'True',
60
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
61
+ 'legendgroup': 'Mixtral-8x7B-Instruct-v0.1',
62
  'marker': {'color': '#FFA15A', 'pattern': {'shape': ''}},
63
+ 'name': 'Mixtral-8x7B-Instruct-v0.1',
64
+ 'offsetgroup': 'Mixtral-8x7B-Instruct-v0.1',
65
  'orientation': 'h',
66
  'showlegend': True,
67
  'textposition': 'auto',
68
  'type': 'bar',
69
+ 'x': array([0.65934]),
70
  'xaxis': 'x',
71
+ 'y': array(['Mixtral-8x7B-Instruct-v0.1'], dtype=object),
72
  'yaxis': 'y'},
73
  {'alignmentgroup': 'True',
74
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
75
+ 'legendgroup': '01-ai Yi Chat (34B)',
76
  'marker': {'color': '#19d3f3', 'pattern': {'shape': ''}},
77
+ 'name': '01-ai Yi Chat (34B)',
78
+ 'offsetgroup': '01-ai Yi Chat (34B)',
79
  'orientation': 'h',
80
  'showlegend': True,
81
  'textposition': 'auto',
82
  'type': 'bar',
83
+ 'x': array([0.58184]),
84
  'xaxis': 'x',
85
+ 'y': array(['01-ai Yi Chat (34B)'], dtype=object),
86
  'yaxis': 'y'},
87
  {'alignmentgroup': 'True',
88
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
89
+ 'legendgroup': 'Snorkel Mistral PairRM DPO (7B)',
90
  'marker': {'color': '#FF6692', 'pattern': {'shape': ''}},
91
+ 'name': 'Snorkel Mistral PairRM DPO (7B)',
92
+ 'offsetgroup': 'Snorkel Mistral PairRM DPO (7B)',
93
  'orientation': 'h',
94
  'showlegend': True,
95
  'textposition': 'auto',
96
  'type': 'bar',
97
+ 'x': array([0.334256]),
98
  'xaxis': 'x',
99
+ 'y': array(['Snorkel Mistral PairRM DPO (7B)'], dtype=object),
100
  'yaxis': 'y'},
101
  {'alignmentgroup': 'True',
102
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
103
+ 'legendgroup': 'Chronos Hermes (13B)',
104
  'marker': {'color': '#B6E880', 'pattern': {'shape': ''}},
105
+ 'name': 'Chronos Hermes (13B)',
106
+ 'offsetgroup': 'Chronos Hermes (13B)',
107
  'orientation': 'h',
108
  'showlegend': True,
109
  'textposition': 'auto',
110
  'type': 'bar',
111
+ 'x': array([0.27396]),
112
  'xaxis': 'x',
113
+ 'y': array(['Chronos Hermes (13B)'], dtype=object),
114
  'yaxis': 'y'},
115
  {'alignmentgroup': 'True',
116
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
117
+ 'legendgroup': 'WizardLM v1.2 (13B)',
118
  'marker': {'color': '#FF97FF', 'pattern': {'shape': ''}},
119
+ 'name': 'WizardLM v1.2 (13B)',
120
+ 'offsetgroup': 'WizardLM v1.2 (13B)',
121
  'orientation': 'h',
122
  'showlegend': True,
123
  'textposition': 'auto',
124
  'type': 'bar',
125
+ 'x': array([0.21207]),
126
  'xaxis': 'x',
127
+ 'y': array(['WizardLM v1.2 (13B)'], dtype=object),
128
  'yaxis': 'y'},
129
  {'alignmentgroup': 'True',
130
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
131
+ 'legendgroup': 'gemini-pro',
132
  'marker': {'color': '#FECB52', 'pattern': {'shape': ''}},
133
+ 'name': 'gemini-pro',
134
+ 'offsetgroup': 'gemini-pro',
135
  'orientation': 'h',
136
  'showlegend': True,
137
  'textposition': 'auto',
138
  'type': 'bar',
139
+ 'x': array([0.18315]),
140
  'xaxis': 'x',
141
+ 'y': array(['gemini-pro'], dtype=object),
142
  'yaxis': 'y'},
143
  {'alignmentgroup': 'True',
144
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
 
150
  'showlegend': True,
151
  'textposition': 'auto',
152
  'type': 'bar',
153
+ 'x': array([0.18315]),
154
  'xaxis': 'x',
155
  'y': array(['chat-bison (PaLM 2)'], dtype=object),
156
  'yaxis': 'y'},
 
164
  'showlegend': True,
165
  'textposition': 'auto',
166
  'type': 'bar',
167
+ 'x': array([0.18315]),
168
  'xaxis': 'x',
169
  'y': array(['chat-bison-32k (PaLM 2 32K)'], dtype=object),
170
  'yaxis': 'y'},
171
  {'alignmentgroup': 'True',
172
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
173
+ 'legendgroup': 'Upstage SOLAR Instruct v1 (11B)',
174
  'marker': {'color': '#00cc96', 'pattern': {'shape': ''}},
175
+ 'name': 'Upstage SOLAR Instruct v1 (11B)',
176
+ 'offsetgroup': 'Upstage SOLAR Instruct v1 (11B)',
177
  'orientation': 'h',
178
  'showlegend': True,
179
  'textposition': 'auto',
180
  'type': 'bar',
181
+ 'x': array([0.180288]),
182
  'xaxis': 'x',
183
+ 'y': array(['Upstage SOLAR Instruct v1 (11B)'], dtype=object),
184
  'yaxis': 'y'},
185
  {'alignmentgroup': 'True',
186
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
187
+ 'legendgroup': 'Mistral (7B) Instruct v0.2 (Together AI)',
188
  'marker': {'color': '#ab63fa', 'pattern': {'shape': ''}},
189
+ 'name': 'Mistral (7B) Instruct v0.2 (Together AI)',
190
+ 'offsetgroup': 'Mistral (7B) Instruct v0.2 (Together AI)',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  'orientation': 'h',
192
  'showlegend': True,
193
  'textposition': 'auto',
194
  'type': 'bar',
195
+ 'x': array([0.165154]),
196
  'xaxis': 'x',
197
+ 'y': array(['Mistral (7B) Instruct v0.2 (Together AI)'], dtype=object),
198
  'yaxis': 'y'},
199
  {'alignmentgroup': 'True',
200
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
201
  'legendgroup': 'LLaMA-2 Chat (7B)',
202
+ 'marker': {'color': '#FFA15A', 'pattern': {'shape': ''}},
203
  'name': 'LLaMA-2 Chat (7B)',
204
  'offsetgroup': 'LLaMA-2 Chat (7B)',
205
  'orientation': 'h',
206
  'showlegend': True,
207
  'textposition': 'auto',
208
  'type': 'bar',
209
+ 'x': array([0.163296]),
210
  'xaxis': 'x',
211
  'y': array(['LLaMA-2 Chat (7B)'], dtype=object),
212
  'yaxis': 'y'},
213
  {'alignmentgroup': 'True',
214
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
215
+ 'legendgroup': 'OpenHermes-2.5-Mistral (7B)',
216
+ 'marker': {'color': '#19d3f3', 'pattern': {'shape': ''}},
217
+ 'name': 'OpenHermes-2.5-Mistral (7B)',
218
+ 'offsetgroup': 'OpenHermes-2.5-Mistral (7B)',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  'orientation': 'h',
220
  'showlegend': True,
221
  'textposition': 'auto',
222
  'type': 'bar',
223
+ 'x': array([0.14182]),
224
  'xaxis': 'x',
225
+ 'y': array(['OpenHermes-2.5-Mistral (7B)'], dtype=object),
226
  'yaxis': 'y'},
227
  {'alignmentgroup': 'True',
228
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
229
  'legendgroup': 'Qwen 1.5 Chat (7B)',
230
+ 'marker': {'color': '#FF6692', 'pattern': {'shape': ''}},
231
  'name': 'Qwen 1.5 Chat (7B)',
232
  'offsetgroup': 'Qwen 1.5 Chat (7B)',
233
  'orientation': 'h',
234
  'showlegend': True,
235
  'textposition': 'auto',
236
  'type': 'bar',
237
+ 'x': array([0.137592]),
238
  'xaxis': 'x',
239
  'y': array(['Qwen 1.5 Chat (7B)'], dtype=object),
240
  'yaxis': 'y'},
241
  {'alignmentgroup': 'True',
242
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
243
+ 'legendgroup': 'Vicuna v1.5 (7B)',
244
+ 'marker': {'color': '#B6E880', 'pattern': {'shape': ''}},
245
+ 'name': 'Vicuna v1.5 (7B)',
246
+ 'offsetgroup': 'Vicuna v1.5 (7B)',
247
  'orientation': 'h',
248
  'showlegend': True,
249
  'textposition': 'auto',
250
  'type': 'bar',
251
+ 'x': array([0.12588]),
252
  'xaxis': 'x',
253
+ 'y': array(['Vicuna v1.5 (7B)'], dtype=object),
254
  'yaxis': 'y'},
255
  {'alignmentgroup': 'True',
256
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
257
  'legendgroup': 'Falcon Instruct (7B)',
258
+ 'marker': {'color': '#FF97FF', 'pattern': {'shape': ''}},
259
  'name': 'Falcon Instruct (7B)',
260
  'offsetgroup': 'Falcon Instruct (7B)',
261
  'orientation': 'h',
262
  'showlegend': True,
263
  'textposition': 'auto',
264
  'type': 'bar',
265
+ 'x': array([0.124768]),
266
  'xaxis': 'x',
267
  'y': array(['Falcon Instruct (7B)'], dtype=object),
268
  'yaxis': 'y'},
269
  {'alignmentgroup': 'True',
270
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
271
  'legendgroup': 'RedPajama-INCITE Chat (7B)',
272
+ 'marker': {'color': '#FECB52', 'pattern': {'shape': ''}},
273
  'name': 'RedPajama-INCITE Chat (7B)',
274
  'offsetgroup': 'RedPajama-INCITE Chat (7B)',
275
  'orientation': 'h',
276
  'showlegend': True,
277
  'textposition': 'auto',
278
  'type': 'bar',
279
+ 'x': array([0.123424]),
280
  'xaxis': 'x',
281
  'y': array(['RedPajama-INCITE Chat (7B)'], dtype=object),
282
  'yaxis': 'y'}],
283
  'layout': {'barmode': 'relative',
284
  'legend': {'title': {'text': 'Model'}, 'tracegroupgap': 0},
285
  'template': '...',
286
+ 'title': {'text': 'Costs of execution of 6660 test queries per model'},
287
  'xaxis': {'anchor': 'y', 'domain': [0.0, 1.0], 'title': {'text': 'Execution cost ($)'}},
288
  'yaxis': {'anchor': 'x',
289
  'categoryarray': [RedPajama-INCITE Chat (7B), Falcon
290
+ Instruct (7B), Vicuna v1.5 (7B), Qwen
291
+ 1.5 Chat (7B), OpenHermes-2.5-Mistral
292
+ (7B), LLaMA-2 Chat (7B), Mistral (7B)
293
+ Instruct v0.2 (Together AI), Upstage
294
+ SOLAR Instruct v1 (11B), chat-bison-32k
295
+ (PaLM 2 32K), chat-bison (PaLM 2),
296
+ gemini-pro, WizardLM v1.2 (13B), Chronos
297
+ Hermes (13B), Snorkel Mistral PairRM DPO
298
+ (7B), 01-ai Yi Chat (34B),
 
 
 
299
  Mixtral-8x7B-Instruct-v0.1,
300
+ llama-2-70b-chat, gpt-3.5-turbo,
301
+ gpt-4-turbo, gpt-4],
302
  'categoryorder': 'array',
303
  'domain': [0.0, 1.0],
304
  'title': {'text': 'Model'}}}
305
+ })",Costs of execution of 6660 test queries per model,,"{""data"":[{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""gpt-4"",""marker"":{""color"":""#636efa"",""pattern"":{""shape"":""""}},""name"":""gpt-4"",""offsetgroup"":""gpt-4"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[45.870000000000005],""xaxis"":""x"",""y"":[""gpt-4""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""gpt-4-turbo"",""marker"":{""color"":""#EF553B"",""pattern"":{""shape"":""""}},""name"":""gpt-4-turbo"",""offsetgroup"":""gpt-4-turbo"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[19.2168],""xaxis"":""x"",""y"":[""gpt-4-turbo""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""gpt-3.5-turbo"",""marker"":{""color"":""#00cc96"",""pattern"":{""shape"":""""}},""name"":""gpt-3.5-turbo"",""offsetgroup"":""gpt-3.5-turbo"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[1.75176],""xaxis"":""x"",""y"":[""gpt-3.5-turbo""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""llama-2-70b-chat"",""marker"":{""color"":""#ab63fa"",""pattern"":{""shape"":""""}},""name"":""llama-2-70b-chat"",""offsetgroup"":""llama-2-70b-chat"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.65934],""xaxis"":""x"",""y"":[""llama-2-70b-chat""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Mixtral-8x7B-Instruct-v0.1"",""marker"":{""color"":""#FFA15A"",""pattern"":{""shape"":""""}},""name"":""Mixtral-8x7B-Instruct-v0.1"",""offsetgroup"":""Mixtral-8x7B-Instruct-v0.1"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.65934],""xaxis"":""x"",""y"":[""Mixtral-8x7B-Instruct-v0.1""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""01-ai Yi Chat (34B)"",""marker"":{""color"":""#19d3f3"",""pattern"":{""shape"":""""}},""name"":""01-ai Yi Chat (34B)"",""offsetgroup"":""01-ai Yi Chat (34B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.5818400000000001],""xaxis"":""x"",""y"":[""01-ai Yi Chat (34B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Snorkel Mistral PairRM DPO (7B)"",""marker"":{""color"":""#FF6692"",""pattern"":{""shape"":""""}},""name"":""Snorkel Mistral PairRM DPO (7B)"",""offsetgroup"":""Snorkel Mistral PairRM DPO (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.334256],""xaxis"":""x"",""y"":[""Snorkel Mistral PairRM DPO (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Chronos Hermes (13B)"",""marker"":{""color"":""#B6E880"",""pattern"":{""shape"":""""}},""name"":""Chronos Hermes (13B)"",""offsetgroup"":""Chronos Hermes (13B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.27396],""xaxis"":""x"",""y"":[""Chronos Hermes (13B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""WizardLM v1.2 (13B)"",""marker"":{""color"":""#FF97FF"",""pattern"":{""shape"":""""}},""name"":""WizardLM v1.2 (13B)"",""offsetgroup"":""WizardLM v1.2 (13B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.21207],""xaxis"":""x"",""y"":[""WizardLM v1.2 (13B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""gemini-pro"",""marker"":{""color"":""#FECB52"",""pattern"":{""shape"":""""}},""name"":""gemini-pro"",""offsetgroup"":""gemini-pro"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.18315],""xaxis"":""x"",""y"":[""gemini-pro""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""chat-bison (PaLM 2)"",""marker"":{""color"":""#636efa"",""pattern"":{""shape"":""""}},""name"":""chat-bison (PaLM 2)"",""offsetgroup"":""chat-bison (PaLM 2)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.18315],""xaxis"":""x"",""y"":[""chat-bison (PaLM 2)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""chat-bison-32k (PaLM 2 32K)"",""marker"":{""color"":""#EF553B"",""pattern"":{""shape"":""""}},""name"":""chat-bison-32k (PaLM 2 32K)"",""offsetgroup"":""chat-bison-32k (PaLM 2 32K)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.18315],""xaxis"":""x"",""y"":[""chat-bison-32k (PaLM 2 32K)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Upstage SOLAR Instruct v1 (11B)"",""marker"":{""color"":""#00cc96"",""pattern"":{""shape"":""""}},""name"":""Upstage SOLAR Instruct v1 (11B)"",""offsetgroup"":""Upstage SOLAR Instruct v1 (11B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.180288],""xaxis"":""x"",""y"":[""Upstage SOLAR Instruct v1 (11B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Mistral (7B) Instruct v0.2 (Together AI)"",""marker"":{""color"":""#ab63fa"",""pattern"":{""shape"":""""}},""name"":""Mistral (7B) Instruct v0.2 (Together AI)"",""offsetgroup"":""Mistral (7B) Instruct v0.2 (Together AI)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.16515400000000002],""xaxis"":""x"",""y"":[""Mistral (7B) Instruct v0.2 (Together AI)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""LLaMA-2 Chat (7B)"",""marker"":{""color"":""#FFA15A"",""pattern"":{""shape"":""""}},""name"":""LLaMA-2 Chat (7B)"",""offsetgroup"":""LLaMA-2 Chat (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.16329600000000002],""xaxis"":""x"",""y"":[""LLaMA-2 Chat (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""OpenHermes-2.5-Mistral (7B)"",""marker"":{""color"":""#19d3f3"",""pattern"":{""shape"":""""}},""name"":""OpenHermes-2.5-Mistral (7B)"",""offsetgroup"":""OpenHermes-2.5-Mistral (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.14182000000000003],""xaxis"":""x"",""y"":[""OpenHermes-2.5-Mistral (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Qwen 1.5 Chat (7B)"",""marker"":{""color"":""#FF6692"",""pattern"":{""shape"":""""}},""name"":""Qwen 1.5 Chat (7B)"",""offsetgroup"":""Qwen 1.5 Chat (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.13759200000000002],""xaxis"":""x"",""y"":[""Qwen 1.5 Chat (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Vicuna v1.5 (7B)"",""marker"":{""color"":""#B6E880"",""pattern"":{""shape"":""""}},""name"":""Vicuna v1.5 (7B)"",""offsetgroup"":""Vicuna v1.5 (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.12588],""xaxis"":""x"",""y"":[""Vicuna v1.5 (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Falcon Instruct (7B)"",""marker"":{""color"":""#FF97FF"",""pattern"":{""shape"":""""}},""name"":""Falcon Instruct (7B)"",""offsetgroup"":""Falcon Instruct (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.12476800000000002],""xaxis"":""x"",""y"":[""Falcon Instruct (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""RedPajama-INCITE Chat (7B)"",""marker"":{""color"":""#FECB52"",""pattern"":{""shape"":""""}},""name"":""RedPajama-INCITE Chat (7B)"",""offsetgroup"":""RedPajama-INCITE Chat (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.12342400000000002],""xaxis"":""x"",""y"":[""RedPajama-INCITE Chat (7B)""],""yaxis"":""y"",""type"":""bar""}],""layout"":{""template"":{""data"":{""histogram2dcontour"":[{""type"":""histogram2dcontour"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""choropleth"":[{""type"":""choropleth"",""colorbar"":{""outlinewidth"":0,""ticks"":""""}}],""histogram2d"":[{""type"":""histogram2d"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""heatmap"":[{""type"":""heatmap"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""heatmapgl"":[{""type"":""heatmapgl"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""contourcarpet"":[{""type"":""contourcarpet"",""colorbar"":{""outlinewidth"":0,""ticks"":""""}}],""contour"":[{""type"":""contour"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""surface"":[{""type"":""surface"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""mesh3d"":[{""type"":""mesh3d"",""colorbar"":{""outlinewidth"":0,""ticks"":""""}}],""scatter"":[{""fillpattern"":{""fillmode"":""overlay"",""size"":10,""solidity"":0.2},""type"":""scatter""}],""parcoords"":[{""type"":""parcoords"",""line"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scatterpolargl"":[{""type"":""scatterpolargl"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""bar"":[{""error_x"":{""color"":""#2a3f5f""},""error_y"":{""color"":""#2a3f5f""},""marker"":{""line"":{""color"":""#E5ECF6"",""width"":0.5},""pattern"":{""fillmode"":""overlay"",""size"":10,""solidity"":0.2}},""type"":""bar""}],""scattergeo"":[{""type"":""scattergeo"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scatterpolar"":[{""type"":""scatterpolar"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""histogram"":[{""marker"":{""pattern"":{""fillmode"":""overlay"",""size"":10,""solidity"":0.2}},""type"":""histogram""}],""scattergl"":[{""type"":""scattergl"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scatter3d"":[{""type"":""scatter3d"",""line"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}},""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scattermapbox"":[{""type"":""scattermapbox"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scatterternary"":[{""type"":""scatterternary"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scattercarpet"":[{""type"":""scattercarpet"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""carpet"":[{""aaxis"":{""endlinecolor"":""#2a3f5f"",""gridcolor"":""white"",""linecolor"":""white"",""minorgridcolor"":""white"",""startlinecolor"":""#2a3f5f""},""baxis"":{""endlinecolor"":""#2a3f5f"",""gridcolor"":""white"",""linecolor"":""white"",""minorgridcolor"":""white"",""startlinecolor"":""#2a3f5f""},""type"":""carpet""}],""table"":[{""cells"":{""fill"":{""color"":""#EBF0F8""},""line"":{""color"":""white""}},""header"":{""fill"":{""color"":""#C8D4E3""},""line"":{""color"":""white""}},""type"":""table""}],""barpolar"":[{""marker"":{""line"":{""color"":""#E5ECF6"",""width"":0.5},""pattern"":{""fillmode"":""overlay"",""size"":10,""solidity"":0.2}},""type"":""barpolar""}],""pie"":[{""automargin"":true,""type"":""pie""}]},""layout"":{""autotypenumbers"":""strict"",""colorway"":[""#636efa"",""#EF553B"",""#00cc96"",""#ab63fa"",""#FFA15A"",""#19d3f3"",""#FF6692"",""#B6E880"",""#FF97FF"",""#FECB52""],""font"":{""color"":""#2a3f5f""},""hovermode"":""closest"",""hoverlabel"":{""align"":""left""},""paper_bgcolor"":""white"",""plot_bgcolor"":""#E5ECF6"",""polar"":{""bgcolor"":""#E5ECF6"",""angularaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""},""radialaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""}},""ternary"":{""bgcolor"":""#E5ECF6"",""aaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""},""baxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""},""caxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""}},""coloraxis"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}},""colorscale"":{""sequential"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]],""sequentialminus"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]],""diverging"":[[0,""#8e0152""],[0.1,""#c51b7d""],[0.2,""#de77ae""],[0.3,""#f1b6da""],[0.4,""#fde0ef""],[0.5,""#f7f7f7""],[0.6,""#e6f5d0""],[0.7,""#b8e186""],[0.8,""#7fbc41""],[0.9,""#4d9221""],[1,""#276419""]]},""xaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":"""",""title"":{""standoff"":15},""zerolinecolor"":""white"",""automargin"":true,""zerolinewidth"":2},""yaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":"""",""title"":{""standoff"":15},""zerolinecolor"":""white"",""automargin"":true,""zerolinewidth"":2},""scene"":{""xaxis"":{""backgroundcolor"":""#E5ECF6"",""gridcolor"":""white"",""linecolor"":""white"",""showbackground"":true,""ticks"":"""",""zerolinecolor"":""white"",""gridwidth"":2},""yaxis"":{""backgroundcolor"":""#E5ECF6"",""gridcolor"":""white"",""linecolor"":""white"",""showbackground"":true,""ticks"":"""",""zerolinecolor"":""white"",""gridwidth"":2},""zaxis"":{""backgroundcolor"":""#E5ECF6"",""gridcolor"":""white"",""linecolor"":""white"",""showbackground"":true,""ticks"":"""",""zerolinecolor"":""white"",""gridwidth"":2}},""shapedefaults"":{""line"":{""color"":""#2a3f5f""}},""annotationdefaults"":{""arrowcolor"":""#2a3f5f"",""arrowhead"":0,""arrowwidth"":1},""geo"":{""bgcolor"":""white"",""landcolor"":""#E5ECF6"",""subunitcolor"":""white"",""showland"":true,""showlakes"":true,""lakecolor"":""white""},""title"":{""x"":0.05},""mapbox"":{""style"":""light""}}},""xaxis"":{""anchor"":""y"",""domain"":[0.0,1.0],""title"":{""text"":""Execution cost ($)""}},""yaxis"":{""anchor"":""x"",""domain"":[0.0,1.0],""title"":{""text"":""Model""},""categoryorder"":""array"",""categoryarray"":[""RedPajama-INCITE Chat (7B)"",""Falcon Instruct (7B)"",""Vicuna v1.5 (7B)"",""Qwen 1.5 Chat (7B)"",""OpenHermes-2.5-Mistral (7B)"",""LLaMA-2 Chat (7B)"",""Mistral (7B) Instruct v0.2 (Together AI)"",""Upstage SOLAR Instruct v1 (11B)"",""chat-bison-32k (PaLM 2 32K)"",""chat-bison (PaLM 2)"",""gemini-pro"",""WizardLM v1.2 (13B)"",""Chronos Hermes (13B)"",""Snorkel Mistral PairRM DPO (7B)"",""01-ai Yi Chat (34B)"",""Mixtral-8x7B-Instruct-v0.1"",""llama-2-70b-chat"",""gpt-3.5-turbo"",""gpt-4-turbo"",""gpt-4""]},""legend"":{""title"":{""text"":""Model""},""tracegroupgap"":0},""title"":{""text"":""Costs of execution of 6660 test queries per model""},""barmode"":""relative""}}","{""y"": ""model"", ""x"": ""model_query_costs"", ""color"": ""model"", ""orientation"": ""h"", ""title"": ""Costs of execution of 6660 test queries per model"", ""labels"": {""model"": ""Model"", ""model_query_costs"": ""Execution cost ($)""}}",",model_query_costs,model
306
+ 2,45.870000000000005,gpt-4
307
+ 1,19.2168,gpt-4-turbo
308
+ 0,1.75176,gpt-3.5-turbo
309
+ 3,0.65934,llama-2-70b-chat
310
+ 4,0.65934,Mixtral-8x7B-Instruct-v0.1
311
+ 11,0.5818400000000001,01-ai Yi Chat (34B)
312
+ 43,0.334256,Snorkel Mistral PairRM DPO (7B)
313
+ 12,0.27396,Chronos Hermes (13B)
314
+ 55,0.21207,WizardLM v1.2 (13B)
315
+ 8,0.18315,gemini-pro
316
+ 9,0.18315,chat-bison (PaLM 2)
317
+ 10,0.18315,chat-bison-32k (PaLM 2 32K)
318
+ 56,0.180288,Upstage SOLAR Instruct v1 (11B)
319
+ 26,0.16515400000000002,Mistral (7B) Instruct v0.2 (Together AI)
320
+ 24,0.16329600000000002,LLaMA-2 Chat (7B)
321
+ 46,0.14182000000000003,OpenHermes-2.5-Mistral (7B)
322
+ 40,0.13759200000000002,Qwen 1.5 Chat (7B)
323
+ 17,0.12588,Vicuna v1.5 (7B)
324
+ 48,0.12476800000000002,Falcon Instruct (7B)
325
+ 51,0.12342400000000002,RedPajama-INCITE Chat (7B)
 
 
 
326
  "
327
  model_sizes,./html/plots/model_sizes.html,"Figure({
328
  'data': [{'alignmentgroup': 'True',
data/model_costs.csv CHANGED
@@ -4,8 +4,8 @@ gpt-4-turbo,OpenAI,"$10 / 1M input tokens, $30 / 1M output tokens","$10 / 1M inp
4
  gpt-4,OpenAI,"$30 / 1M input tokens, $60 / 1M output tokens","$30 / 1M input tokens, $60 / 1M output tokens"
5
  llama-2-70b-chat,Together AI,$0.9 / 1M tokens,$0.9 / 1M tokens
6
  Mixtral-8x7B-Instruct-v0.1,Together AI,$0.9 / 1M tokens,$0.9 / 1M tokens
7
- zephyr-7b-beta,Hugging Face Inference Endpoint,$0.15 / 1M tokens,$1.3 / hour
8
- Mistral-7B-Instruct-v0.2,Hugging Face Inference Endpoint,$0.14 / 1M tokens,$1.3 / hour
9
  TinyLlama/TinyLlama-1.1B-Chat-v1.0,Hugging Face Inference Endpoint,$0.068 / 1M tokens,$0.6 / hour
10
  gemini-pro,Google VertexAI,"$0.25 / 1M input tokens, $0.5 / 1M output tokens","$0.25 / 1M input characters, $0.5 / 1M output characters (60 queries per minute are free)"
11
  chat-bison (PaLM 2),Google VertexAI,"$0.25 / 1M input tokens, $0.5 / 1M output tokens","$0.25 / 1M input tokens, $0.5 / 1M output tokens"
 
4
  gpt-4,OpenAI,"$30 / 1M input tokens, $60 / 1M output tokens","$30 / 1M input tokens, $60 / 1M output tokens"
5
  llama-2-70b-chat,Together AI,$0.9 / 1M tokens,$0.9 / 1M tokens
6
  Mixtral-8x7B-Instruct-v0.1,Together AI,$0.9 / 1M tokens,$0.9 / 1M tokens
7
+ zephyr-7b-beta,Hugging Face Inference Endpoint,$0.25 / 1M tokens,$1.3 / hour
8
+ Mistral-7B-Instruct-v0.2,Hugging Face Inference Endpoint,$0.13 / 1M tokens,$1.3 / hour
9
  TinyLlama/TinyLlama-1.1B-Chat-v1.0,Hugging Face Inference Endpoint,$0.068 / 1M tokens,$0.6 / hour
10
  gemini-pro,Google VertexAI,"$0.25 / 1M input tokens, $0.5 / 1M output tokens","$0.25 / 1M input characters, $0.5 / 1M output characters (60 queries per minute are free)"
11
  chat-bison (PaLM 2),Google VertexAI,"$0.25 / 1M input tokens, $0.5 / 1M output tokens","$0.25 / 1M input tokens, $0.5 / 1M output tokens"
data/output_plots.csv CHANGED
The diff for this file is too large to render. See raw diff
 
data/summary.csv CHANGED
@@ -1,60 +1,60 @@
1
  model,language,template_name,execution_time,characters_count,words_count,chunk_sizes_mean,chunk_sizes_min,chunk_sizes_max,chunk_generation_times_mean,chunk_generation_times_min,chunk_generation_times_max
2
- 01-ai Yi Chat (34B),english,json,3.740751821820329,339.4756097560976,57.926829268292686,3.8364112458654906,1.0,14.0,0.028872832803768254,0.0004601478576660156,0.6119933128356934
3
- 01-ai Yi Chat (34B),english,markdown,4.911168761369659,545.0121951219512,91.32926829268293,4.251426940639269,1.0,15.0,0.029400016551148402,0.000396728515625,0.6902577877044678
4
- 01-ai Yi Chat (34B),japanese,json,13.06497314438891,388.7910447761194,11.104477611940299,1.1905393053016453,1.0,10.0,0.03675604747026014,0.00048804283142089844,0.6865222454071045
5
- 01-ai Yi Chat (34B),japanese,markdown,20.104321274038863,587.4657534246576,17.876712328767123,1.151120654945645,1.0,13.0,0.036697812292211376,0.0004608631134033203,0.6928038597106934
6
- Chronos Hermes (13B),english,json,2.974407357828958,368.92857142857144,63.107142857142854,3.5136054421768708,1.0,15.0,0.02094727554559167,0.00027179718017578125,0.6065487861633301
7
- Chronos Hermes (13B),english,markdown,2.6719689165673604,361.8536585365854,61.97560975609756,3.6452088452088454,1.0,14.0,0.02059152469002351,0.0004761219024658203,0.28253698348999023
8
- Chronos Hermes (13B),japanese,json,15.704193201558343,687.3620689655172,23.344827586206897,1.1621337997376475,1.0,14.0,0.023474197327925606,0.0003821849822998047,0.3109428882598877
9
- Chronos Hermes (13B),japanese,markdown,11.713542679945627,1075.8,142.26666666666668,2.5384615384615383,1.0,16.0,0.023924544976345302,0.0004260540008544922,0.34424543380737305
10
- Falcon Instruct (7B),english,json,1.9191903869311016,82.66666666666667,15.766666666666667,4.235695986336465,1.0,13.0,0.012606756387868567,0.0004930496215820312,0.08649563789367676
11
- Falcon Instruct (7B),english,markdown,1.8954222866746246,89.24590163934427,15.245901639344263,4.60965283657917,1.0,13.0,0.012657315119356548,0.0003917217254638672,0.09382963180541992
12
- Falcon Instruct (7B),japanese,json,0.9907723726370395,35.37179487179487,7.051282051282051,3.6983914209115283,1.0,10.0,0.013643914509075258,0.00037479400634765625,0.0781710147857666
13
- Falcon Instruct (7B),japanese,markdown,0.6273794174194336,8.873417721518987,2.088607594936709,3.3864734299516908,1.0,13.0,0.010549438172492428,0.000865936279296875,0.034580230712890625
14
- LLaMA-2 Chat (7B),english,json,2.602991385893388,348.05194805194805,60.311688311688314,3.5179837227618798,1.0,14.0,0.007298828235414905,0.0002570152282714844,0.12662744522094727
15
- LLaMA-2 Chat (7B),english,markdown,2.678575509124332,402.0416666666667,69.375,3.747669601242879,1.0,16.0,0.0077138773707266855,0.00035452842712402344,0.2196364402770996
16
- LLaMA-2 Chat (7B),japanese,json,4.055958332334246,471.92857142857144,53.114285714285714,1.9620478707608244,1.0,14.0,0.009438785980169328,0.00035572052001953125,0.1930844783782959
17
- LLaMA-2 Chat (7B),japanese,markdown,4.446276148704634,717.1232876712329,67.6986301369863,1.8702440070022508,1.0,16.0,0.008583835081750738,0.0003447532653808594,0.2040574550628662
18
- Mistral (7B) Instruct v0.2 (Together AI),english,json,2.0136800810822053,344.508547008547,56.93589743589744,3.8069040423120515,1.0,15.0,0.00794907963001192,0.0002071857452392578,0.30161213874816895
19
- Mistral (7B) Instruct v0.2 (Together AI),english,markdown,2.5499956497567235,452.242774566474,73.121387283237,3.7100720789074355,1.0,15.0,0.007905251983898002,0.0002067089080810547,0.4635443687438965
20
- Mistral (7B) Instruct v0.2 (Together AI),japanese,json,3.768558966402976,271.26490066225165,5.172185430463577,1.0484271417236173,1.0,13.0,0.009928898496573373,0.00023865699768066406,2.616291046142578
21
- Mistral (7B) Instruct v0.2 (Together AI),japanese,markdown,7.687208134916764,1178.012658227848,138.30379746835442,1.9892057113543091,1.0,109.0,0.010960863498020433,0.00021409988403320312,8.214757204055786
22
- Mistral-7B-Instruct-v0.2,english,json,4.278648148264204,342.8,55.15,4.1311870534561415,1.0,15.0,0.040544769239987996,0.00024437904357910156,60.7178213596344
23
- Mistral-7B-Instruct-v0.2,english,markdown,3.7188014924526214,366.85,57.1375,4.239202657807309,1.0,16.0,0.03371991898375372,0.020945072174072266,0.25538039207458496
24
- Mistral-7B-Instruct-v0.2,japanese,json,2.413569121701377,52.07857142857143,3.414285714285714,1.2991803278688525,1.0,11.0,0.039267738759305254,0.00047779083251953125,3.370748996734619
25
- Mistral-7B-Instruct-v0.2,japanese,markdown,2.323612022399902,42.25,1.5833333333333333,1.1069868995633187,1.0,6.0,0.03958142719935121,0.0004932880401611328,0.2205650806427002
26
  Mixtral-8x7B-Instruct-v0.1,english,json,3.665129848137623,403.2542016806723,62.4390756302521,4.138125060633172,1.0,15.0,0.01437194579115682,0.0002009868621826172,3.4127349853515625
27
  Mixtral-8x7B-Instruct-v0.1,english,markdown,5.775103669417532,522.9172932330827,83.45864661654136,4.350284606242572,1.0,15.0,0.015317759848210679,0.00020313262939453125,1.7103569507598877
28
  Mixtral-8x7B-Instruct-v0.1,japanese,json,7.287098480350196,556.9218106995885,63.477366255144034,2.2494971825601304,1.0,109.0,0.02290278010061531,0.00020551681518554688,76.45081758499146
29
  Mixtral-8x7B-Instruct-v0.1,japanese,markdown,9.511569020294008,432.14285714285717,7.2976190476190474,1.0973066110456153,1.0,12.0,0.017938276822029118,0.00019669532775878906,3.0317282676696777
30
- OpenHermes-2.5-Mistral (7B),english,json,2.2801950351301445,356.66265060240966,58.87951807228916,3.619833700171191,1.0,15.0,0.009280798020646228,0.0003464221954345703,0.34207844734191895
31
- OpenHermes-2.5-Mistral (7B),english,markdown,2.5686250015308985,381.69736842105266,63.828947368421055,3.9398343066684776,1.0,15.0,0.009823806159897504,0.00034999847412109375,0.32857298851013184
32
- OpenHermes-2.5-Mistral (7B),japanese,json,2.385033627835716,143.78048780487805,4.060975609756097,1.0738682940158484,1.0,8.0,0.013302163463633877,0.0003523826599121094,0.34746313095092773
33
- OpenHermes-2.5-Mistral (7B),japanese,markdown,4.09419917478794,346.7682926829268,4.865853658536586,1.0120297540662704,1.0,9.0,0.010479846772686987,0.0003566741943359375,0.6548421382904053
34
- Qwen 1.5 Chat (7B),english,json,2.176741310312778,316.40506329113924,52.949367088607595,3.865161589608783,1.0,15.0,0.0067587282957351615,0.00036597251892089844,0.13403892517089844
35
- Qwen 1.5 Chat (7B),english,markdown,2.384753034084658,433.1898734177215,71.48101265822785,4.243799603174603,1.0,15.0,0.00671186927883398,0.00036978721618652344,0.14255690574645996
36
- Qwen 1.5 Chat (7B),japanese,json,2.2887367900413804,207.50632911392404,4.139240506329114,1.4821880650994574,1.0,17.0,0.007465819402801624,0.00038743019104003906,0.21312594413757324
37
- Qwen 1.5 Chat (7B),japanese,markdown,2.041359633575251,220.80246913580248,1.876543209876543,1.465142950765954,1.0,17.0,0.007345383120587039,0.00035572052001953125,0.13096904754638672
38
- RedPajama-INCITE Chat (7B),english,json,1.9923779017785017,56.0,5.514705882352941,2.9337442218798153,1.0,15.0,0.02580270341070848,0.00044846534729003906,0.131911039352417
39
- RedPajama-INCITE Chat (7B),english,markdown,2.0556714431099268,55.04347826086956,5.217391304347826,2.928296067848882,1.0,70.0,0.02626570611158149,0.0004153251647949219,0.939018964767456
40
- RedPajama-INCITE Chat (7B),japanese,json,1.801509298929354,53.58536585365854,5.853658536585366,2.816666666666667,1.0,13.0,0.02727127411426642,0.0003719329833984375,0.1054222583770752
41
- RedPajama-INCITE Chat (7B),japanese,markdown,1.896126465099614,77.59756097560975,4.439024390243903,4.019583070120025,1.0,70.0,0.02732501960749575,0.0007987022399902344,0.4024965763092041
42
- Snorkel Mistral PairRM DPO (7B),english,json,3.095366932551066,949.9333333333333,150.72,3.8147890340544013,1.0,15.0,0.007307618875579115,0.00022792816162109375,0.1967298984527588
43
- Snorkel Mistral PairRM DPO (7B),english,markdown,2.583486044406891,507.9625,78.75,4.004829013501528,1.0,15.0,0.00809382158769837,0.0003504753112792969,0.27209019660949707
44
- Snorkel Mistral PairRM DPO (7B),japanese,json,7.790234556680994,1704.5189873417721,208.60759493670886,1.981328075570531,1.0,15.0,0.008026355929728519,0.0003476142883300781,0.49362945556640625
45
- Snorkel Mistral PairRM DPO (7B),japanese,markdown,89.00719798347096,17120.925925925927,504.25925925925924,1.0784030121985644,1.0,14.0,0.005559155525883009,0.00032520294189453125,1.1380209922790527
46
  TinyLlama/TinyLlama-1.1B-Chat-v1.0,english,json,1.9458302193674548,366.25,61.58965517241379,4.169774654522613,1.0,13.0,0.012296600931852907,0.00024318695068359375,0.17363858222961426
47
  TinyLlama/TinyLlama-1.1B-Chat-v1.0,english,markdown,1.6830505162477494,282.55,47.1125,3.9572829131652663,1.0,14.0,0.012217252319600401,0.0003170967102050781,0.15227961540222168
48
  TinyLlama/TinyLlama-1.1B-Chat-v1.0,japanese,json,1.4167885013989039,72.81428571428572,8.8,2.0274463007159906,1.0,13.0,0.014790566363975749,0.00031280517578125,0.3310587406158447
49
  TinyLlama/TinyLlama-1.1B-Chat-v1.0,japanese,markdown,1.336316986878713,89.78333333333333,12.716666666666667,2.432054176072235,1.0,13.0,0.013910464400900675,0.0005300045013427734,0.176621675491333
50
- Vicuna v1.5 (7B),english,json,2.851835834980011,372.4375,63.9875,3.508596325953839,1.0,15.0,0.011230722554045582,0.0003688335418701172,0.11755537986755371
51
- Vicuna v1.5 (7B),english,markdown,2.0539027697000747,382.4230769230769,64.55128205128206,3.7235051803769816,1.0,15.0,0.011191664883712751,0.00037550926208496094,0.1981511116027832
52
- Vicuna v1.5 (7B),japanese,json,4.896489579230547,304.5,16.9375,1.2847254268574066,1.0,15.0,0.015209688069330176,0.0003762245178222656,0.319460391998291
53
- Vicuna v1.5 (7B),japanese,markdown,5.635414824556949,301.55223880597015,3.2686567164179103,1.034511008704557,1.0,11.0,0.015171443860590671,0.0003802776336669922,0.19443845748901367
54
- WizardLM v1.2 (13B),english,json,3.101118081236539,280.52054794520546,48.0958904109589,3.2891101831031158,1.0,15.0,0.021355988586442935,0.00036406517028808594,0.4714198112487793
55
- WizardLM v1.2 (13B),english,markdown,3.5972929232650332,357.5138888888889,61.013888888888886,3.8293662600416543,1.0,14.0,0.02164479360378984,0.0005571842193603516,0.5012450218200684
56
- WizardLM v1.2 (13B),japanese,json,6.448913824267503,213.4878048780488,4.7682926829268295,1.0483262470806636,1.0,7.0,0.027856862946922703,0.00034737586975097656,0.5480227470397949
57
- WizardLM v1.2 (13B),japanese,markdown,8.740011346049425,292.6341463414634,7.634146341463414,1.0085319211532804,1.0,9.0,0.02801978238888618,0.0004551410675048828,0.8807919025421143
58
  chat-bison (PaLM 2),english,json,2.5406186148524283,381.7267857142857,60.88125,78.31727422604872,1.0,142.0,0.3495202942369709,0.0003688335418701172,7.334931373596191
59
  chat-bison (PaLM 2),english,markdown,2.9635989278554917,490.4875,78.5375,84.20386266094421,1.0,138.0,0.3641034907025841,0.0004639625549316406,4.600815534591675
60
  chat-bison (PaLM 2),japanese,json,3.0616454005241396,232.68,10.48,34.21764705882353,3.0,124.0,0.326771635167739,0.0005857944488525391,2.7724690437316895
 
1
  model,language,template_name,execution_time,characters_count,words_count,chunk_sizes_mean,chunk_sizes_min,chunk_sizes_max,chunk_generation_times_mean,chunk_generation_times_min,chunk_generation_times_max
2
+ 01-ai Yi Chat (34B),english,json,3.834192312493616,340.3775510204082,58.183673469387756,3.8218377635197065,1.0,14.0,0.02905281419386894,0.0004601478576660156,0.6119933128356934
3
+ 01-ai Yi Chat (34B),english,markdown,4.875209599125142,537.8265306122449,90.21428571428571,4.23145472061657,1.0,15.0,0.029477664979513287,0.000396728515625,0.6902577877044678
4
+ 01-ai Yi Chat (34B),japanese,json,12.8705559865928,388.04938271604937,11.691358024691358,1.2000610873549176,1.0,13.0,0.036714292821464704,0.00048804283142089844,0.7141768932342529
5
+ 01-ai Yi Chat (34B),japanese,markdown,19.812452404686574,573.056179775281,16.9438202247191,1.1449288376060702,1.0,13.0,0.03677184357071728,0.0004608631134033203,0.6928038597106934
6
+ Chronos Hermes (13B),english,json,4.721294949054718,365.9,62.69,3.490080122090805,1.0,15.0,0.021018946593792918,0.00027179718017578125,0.6065487861633301
7
+ Chronos Hermes (13B),english,markdown,4.458954448602637,358.2857142857143,61.48979591836735,3.6168108776267,1.0,14.0,0.020784828659925238,0.0004761219024658203,0.3964853286743164
8
+ Chronos Hermes (13B),japanese,json,18.112667706277634,683.2083333333334,21.72222222222222,1.150182379349046,1.0,14.0,0.02361172727552015,0.0003821849822998047,0.5440409183502197
9
+ Chronos Hermes (13B),japanese,markdown,13.88519547736808,1072.5479452054794,140.34246575342465,2.514403160024407,1.0,16.0,0.024182286664176957,0.0004260540008544922,0.39719629287719727
10
+ Falcon Instruct (7B),english,json,1.8377627313953557,82.93150684931507,15.849315068493151,4.230607966457023,1.0,13.0,0.012668974994363525,0.0004930496215820312,0.09698057174682617
11
+ Falcon Instruct (7B),english,markdown,1.8538796330841494,90.4225352112676,15.408450704225352,4.648805213613324,1.0,13.0,0.012733512128461192,0.0003917217254638672,0.09382963180541992
12
+ Falcon Instruct (7B),japanese,json,1.0451691810120927,35.819148936170215,7.127659574468085,3.7163355408388523,1.0,10.0,0.01360485859001446,0.00037479400634765625,0.08153510093688965
13
+ Falcon Instruct (7B),japanese,markdown,0.6194416974720202,7.88421052631579,1.9052631578947368,3.3587443946188342,1.0,13.0,0.009902677193885427,0.000865936279296875,0.034580230712890625
14
+ LLaMA-2 Chat (7B),english,json,2.5491372994754626,344.3695652173913,59.83695652173913,3.491129476584022,1.0,14.0,0.007231530197395766,0.0002570152282714844,0.12662744522094727
15
+ LLaMA-2 Chat (7B),english,markdown,2.7005234983834354,394.3068181818182,68.39772727272727,3.7214714714714714,1.0,16.0,0.007627917532755141,0.00035452842712402344,0.2196364402770996
16
+ LLaMA-2 Chat (7B),japanese,json,3.9193407914724694,474.2048192771084,53.433734939759034,1.960304811236179,1.0,14.0,0.009285861508448958,0.00035572052001953125,0.1930844783782959
17
+ LLaMA-2 Chat (7B),japanese,markdown,4.234327340667898,712.875,69.01136363636364,1.9195557051497811,1.0,16.0,0.008488639047908132,0.0003447532653808594,0.2040574550628662
18
+ Mistral (7B) Instruct v0.2 (Together AI),english,json,2.0483637296531096,342.8995983935743,56.7710843373494,3.79526159043428,1.0,15.0,0.007936270601427141,0.0002071857452392578,0.30161213874816895
19
+ Mistral (7B) Instruct v0.2 (Together AI),english,markdown,2.544825687053356,450.6170212765957,72.89893617021276,3.7039174536551243,1.0,15.0,0.00792093470239189,0.0002067089080810547,0.4635443687438965
20
+ Mistral (7B) Instruct v0.2 (Together AI),japanese,json,3.7119850810751855,269.34939759036143,5.162650602409639,1.0478310796559724,1.0,13.0,0.009880354483175478,0.00023865699768066406,2.616291046142578
21
+ Mistral (7B) Instruct v0.2 (Together AI),japanese,markdown,7.5212546869509485,1171.8265895953757,137.8612716763006,1.9919624258145658,1.0,109.0,0.010850601641771877,0.00021409988403320312,8.214757204055786
22
+ Mistral-7B-Instruct-v0.2,english,json,4.296516989732718,339.22727272727275,54.77272727272727,4.079097368626532,1.0,15.0,0.04079543511947759,0.00024437904357910156,60.7178213596344
23
+ Mistral-7B-Instruct-v0.2,english,markdown,3.8693686013526105,365.531914893617,57.56382978723404,4.128319115703472,1.0,16.0,0.03467739830076673,0.020945072174072266,0.25538039207458496
24
+ Mistral-7B-Instruct-v0.2,japanese,json,2.613244720867702,56.298701298701296,3.4675324675324677,1.2642169728783903,1.0,11.0,0.04004790950495914,0.00047779083251953125,3.370748996734619
25
+ Mistral-7B-Instruct-v0.2,japanese,markdown,2.7685540850098067,52.41891891891892,2.0405405405405403,1.1191575302942873,1.0,6.0,0.04190377465439063,0.0004932880401611328,0.25658416748046875
26
  Mixtral-8x7B-Instruct-v0.1,english,json,3.665129848137623,403.2542016806723,62.4390756302521,4.138125060633172,1.0,15.0,0.01437194579115682,0.0002009868621826172,3.4127349853515625
27
  Mixtral-8x7B-Instruct-v0.1,english,markdown,5.775103669417532,522.9172932330827,83.45864661654136,4.350284606242572,1.0,15.0,0.015317759848210679,0.00020313262939453125,1.7103569507598877
28
  Mixtral-8x7B-Instruct-v0.1,japanese,json,7.287098480350196,556.9218106995885,63.477366255144034,2.2494971825601304,1.0,109.0,0.02290278010061531,0.00020551681518554688,76.45081758499146
29
  Mixtral-8x7B-Instruct-v0.1,japanese,markdown,9.511569020294008,432.14285714285717,7.2976190476190474,1.0973066110456153,1.0,12.0,0.017938276822029118,0.00019669532775878906,3.0317282676696777
30
+ OpenHermes-2.5-Mistral (7B),english,json,2.299964639605308,354.92857142857144,58.66326530612245,3.597745138601572,1.0,15.0,0.009893205301989491,0.0003464221954345703,0.34207844734191895
31
+ OpenHermes-2.5-Mistral (7B),english,markdown,2.595633898092353,380.0978260869565,63.68478260869565,3.918973439426202,1.0,15.0,0.01048721514193851,0.00033664703369140625,0.3534977436065674
32
+ OpenHermes-2.5-Mistral (7B),japanese,json,2.513281902488397,143.57142857142858,4.051020408163265,1.0734721904325932,1.0,8.0,0.014181530556562719,0.0003523826599121094,0.48614931106567383
33
+ OpenHermes-2.5-Mistral (7B),japanese,markdown,4.1442127884650715,333.18367346938777,4.724489795918367,1.0109604309864388,1.0,9.0,0.010979779298567272,0.0003566741943359375,0.6548421382904053
34
+ Qwen 1.5 Chat (7B),english,json,2.1808673976570048,314.01075268817203,52.655913978494624,3.838459516298633,1.0,15.0,0.006817500015914628,0.00034117698669433594,0.13403892517089844
35
+ Qwen 1.5 Chat (7B),english,markdown,2.3712507900438813,424.5684210526316,70.2421052631579,4.211108790979328,1.0,15.0,0.006809696296879881,0.0003638267517089844,0.14255690574645996
36
+ Qwen 1.5 Chat (7B),japanese,json,2.2431268441049674,207.58947368421053,4.11578947368421,1.4861341371514696,1.0,17.0,0.007540746784425806,0.00038743019104003906,0.21312594413757324
37
+ Qwen 1.5 Chat (7B),japanese,markdown,2.0088862404227257,214.79166666666666,1.7395833333333333,1.4659462533769374,1.0,17.0,0.007425861000658759,0.00035572052001953125,0.13323616981506348
38
+ RedPajama-INCITE Chat (7B),english,json,2.0967678966976346,55.61904761904762,5.416666666666667,2.916354556803995,1.0,15.0,0.026109205798412232,0.0004024505615234375,0.131911039352417
39
+ RedPajama-INCITE Chat (7B),english,markdown,2.0654409464667824,54.84705882352941,5.176470588235294,2.9119300437226734,1.0,70.0,0.02642942978991783,0.0004153251647949219,0.939018964767456
40
+ RedPajama-INCITE Chat (7B),japanese,json,1.8026243034674196,53.40816326530612,5.877551020408164,2.8079399141630903,1.0,13.0,0.027359759986656417,0.0003719329833984375,0.1054222583770752
41
+ RedPajama-INCITE Chat (7B),japanese,markdown,1.9637482093305003,77.66326530612245,4.36734693877551,3.999474513925381,1.0,70.0,0.027386420911948303,0.0007987022399902344,0.4024965763092041
42
+ Snorkel Mistral PairRM DPO (7B),english,json,3.0900819142659506,951.3444444444444,151.1888888888889,3.789042793291145,1.0,15.0,0.0072367200916011905,0.00022792816162109375,0.1967298984527588
43
+ Snorkel Mistral PairRM DPO (7B),english,markdown,2.628829130373503,499.4736842105263,77.45263157894736,3.989071038251366,1.0,15.0,0.008036611600301004,0.0003504753112792969,0.27209019660949707
44
+ Snorkel Mistral PairRM DPO (7B),japanese,json,7.3780752953062665,1579.6914893617022,191.4468085106383,1.9991249091251784,1.0,15.0,0.008245711491711964,0.0003476142883300781,0.49362945556640625
45
+ Snorkel Mistral PairRM DPO (7B),japanese,markdown,86.86322889429458,16876.14893617021,447.1808510638298,1.0719315956553996,1.0,14.0,0.0054663013888370925,0.00032520294189453125,1.1380209922790527
46
  TinyLlama/TinyLlama-1.1B-Chat-v1.0,english,json,1.9458302193674548,366.25,61.58965517241379,4.169774654522613,1.0,13.0,0.012296600931852907,0.00024318695068359375,0.17363858222961426
47
  TinyLlama/TinyLlama-1.1B-Chat-v1.0,english,markdown,1.6830505162477494,282.55,47.1125,3.9572829131652663,1.0,14.0,0.012217252319600401,0.0003170967102050781,0.15227961540222168
48
  TinyLlama/TinyLlama-1.1B-Chat-v1.0,japanese,json,1.4167885013989039,72.81428571428572,8.8,2.0274463007159906,1.0,13.0,0.014790566363975749,0.00031280517578125,0.3310587406158447
49
  TinyLlama/TinyLlama-1.1B-Chat-v1.0,japanese,markdown,1.336316986878713,89.78333333333333,12.716666666666667,2.432054176072235,1.0,13.0,0.013910464400900675,0.0005300045013427734,0.176621675491333
50
+ Vicuna v1.5 (7B),english,json,2.811121812619661,371.14736842105265,63.88421052631579,3.485468564650059,1.0,15.0,0.011213418027071577,0.0003688335418701172,0.11755537986755371
51
+ Vicuna v1.5 (7B),english,markdown,2.038096696772474,380.3085106382979,64.2872340425532,3.6980448950036204,1.0,15.0,0.011199472534307541,0.00037550926208496094,0.1981511116027832
52
+ Vicuna v1.5 (7B),japanese,json,4.888940976460774,293.85333333333335,15.04,1.2507235684694398,1.0,15.0,0.015249164155471149,0.0003762245178222656,0.319460391998291
53
+ Vicuna v1.5 (7B),japanese,markdown,5.688798353075981,300.2125,2.9,1.0294029402940295,1.0,11.0,0.015179881321561766,0.0003802776336669922,0.19443845748901367
54
+ WizardLM v1.2 (13B),english,json,3.0704635977745056,284.04545454545456,48.71590909090909,3.286785009861933,1.0,15.0,0.021288358060095672,0.00036406517028808594,0.4714198112487793
55
+ WizardLM v1.2 (13B),english,markdown,3.646563207401949,367.45882352941175,62.84705882352941,3.7540865384615385,1.0,14.0,0.021508242619725376,0.0005571842193603516,0.5012450218200684
56
+ WizardLM v1.2 (13B),japanese,json,6.563389152896647,218.3877551020408,4.642857142857143,1.0465013935748864,1.0,7.0,0.027866148073150618,0.00034737586975097656,0.5480227470397949
57
+ WizardLM v1.2 (13B),japanese,markdown,8.748523741352315,352.1530612244898,17.73469387755102,1.2008002783576897,1.0,11.0,0.02774835801738455,0.000453948974609375,0.8807919025421143
58
  chat-bison (PaLM 2),english,json,2.5406186148524283,381.7267857142857,60.88125,78.31727422604872,1.0,142.0,0.3495202942369709,0.0003688335418701172,7.334931373596191
59
  chat-bison (PaLM 2),english,markdown,2.9635989278554917,490.4875,78.5375,84.20386266094421,1.0,138.0,0.3641034907025841,0.0004639625549316406,4.600815534591675
60
  chat-bison (PaLM 2),japanese,json,3.0616454005241396,232.68,10.48,34.21764705882353,3.0,124.0,0.326771635167739,0.0005857944488525391,2.7724690437316895
data/time_of_day_comparison.csv CHANGED
@@ -1,51 +1,52 @@
1
  model,time_of_day,execution_time,characters_count,words_count
2
- 01-ai Yi Chat (34B),early morning,8.896911223729452,394.1666666666667,40.125
3
  01-ai Yi Chat (34B),morning,9.243995500647504,410.5,40.625
4
  01-ai Yi Chat (34B),afternoon,12.255323665086614,512.3645833333334,51.59375
5
  01-ai Yi Chat (34B),late afternoon,9.417656523840767,384.9583333333333,40.125
6
  01-ai Yi Chat (34B),evening,8.16779062861488,360.8333333333333,39.375
7
  01-ai Yi Chat (34B),late evening,9.680033167203268,424.5,40.875
8
  01-ai Yi Chat (34B),midnight,9.311747227774726,397.0625,40.416666666666664
9
- 01-ai Yi Chat (34B),night,9.534157488562844,416.125,40.875
10
- Chronos Hermes (13B),early morning,7.70508497012289,461.4761904761905,49.595238095238095
11
  Chronos Hermes (13B),morning,8.221057146787643,595.375,64.5
12
  Chronos Hermes (13B),afternoon,6.651120054020601,539.5,76.10416666666667
13
  Chronos Hermes (13B),late afternoon,7.630303209478205,544.625,62.833333333333336
14
  Chronos Hermes (13B),evening,6.468730966250102,543.05,72.15
15
  Chronos Hermes (13B),late evening,7.710561646355523,423.1363636363636,46.31818181818182
16
  Chronos Hermes (13B),midnight,7.710010987520218,486.5217391304348,54.93478260869565
17
- Chronos Hermes (13B),night,8.01242661917651,553.051724137931,62.827586206896555
18
- Falcon Instruct (7B),early morning,1.1807806547297988,45.6875,8.4375
19
  Falcon Instruct (7B),morning,1.4012710821060907,47.291666666666664,8.75
20
  Falcon Instruct (7B),afternoon,1.5774729509611387,36.96875,7.020833333333333
21
  Falcon Instruct (7B),late afternoon,1.0884015900748116,43.833333333333336,8.166666666666666
22
  Falcon Instruct (7B),evening,0.7874892950057983,39.2,7.4
23
  Falcon Instruct (7B),late evening,1.2339241071180864,46.791666666666664,8.708333333333334
24
  Falcon Instruct (7B),midnight,0.9382020387894068,38.541666666666664,7.270833333333333
25
- Falcon Instruct (7B),night,1.3551653915981077,48.206896551724135,8.913793103448276
26
- LLaMA-2 Chat (7B),early morning,2.8788051708884863,451.5833333333333,62.9375
27
  LLaMA-2 Chat (7B),morning,2.678236266841059,455.0416666666667,60.791666666666664
28
  LLaMA-2 Chat (7B),afternoon,3.5475496424569024,477.9479166666667,53.135416666666664
29
  LLaMA-2 Chat (7B),late afternoon,2.7755608558654785,430.45,62.15
30
  LLaMA-2 Chat (7B),evening,2.9608939344232734,432.2916666666667,62.333333333333336
31
  LLaMA-2 Chat (7B),late evening,2.869330001913983,452.7083333333333,63.083333333333336
32
  LLaMA-2 Chat (7B),midnight,2.8870244533457656,460.6666666666667,63.166666666666664
33
- LLaMA-2 Chat (7B),night,5.449612444639206,433.07142857142856,58.214285714285715
34
- Mistral (7B) Instruct v0.2 (Together AI),early morning,3.611201712254728,509.75,64.39583333333333
35
  Mistral (7B) Instruct v0.2 (Together AI),morning,3.6725051470205816,500.82894736842104,63.75
36
  Mistral (7B) Instruct v0.2 (Together AI),afternoon,3.255509059895914,481.76237623762376,60.7970297029703
37
  Mistral (7B) Instruct v0.2 (Together AI),late afternoon,3.542298033617545,543.5138888888889,65.65277777777777
38
  Mistral (7B) Instruct v0.2 (Together AI),evening,5.496347131400273,507.48387096774195,63.774193548387096
39
  Mistral (7B) Instruct v0.2 (Together AI),late evening,3.3933188574654713,515.2794117647059,66.45588235294117
40
  Mistral (7B) Instruct v0.2 (Together AI),midnight,4.510189438914205,517.34375,65.16666666666667
41
- Mistral (7B) Instruct v0.2 (Together AI),night,3.6112242429563315,514.7452830188679,64.72641509433963
 
42
  Mistral-7B-Instruct-v0.2,morning,3.734026002883911,362.9,58.35
43
  Mistral-7B-Instruct-v0.2,afternoon,3.228973722457886,222.0625,32.25
44
  Mistral-7B-Instruct-v0.2,late afternoon,3.2048643112182615,219.8625,31.8
45
  Mistral-7B-Instruct-v0.2,evening,3.397640073299408,261.18333333333334,40.1
46
  Mistral-7B-Instruct-v0.2,late evening,3.389284573495388,175.79375,25.68125
47
  Mistral-7B-Instruct-v0.2,midnight,2.6972494465964183,88.14285714285714,10.607142857142858
48
- Mistral-7B-Instruct-v0.2,night,4.646747261285782,214.0,29.75
49
  Mixtral-8x7B-Instruct-v0.1,early morning,4.526968242530536,285.045,41.86
50
  Mixtral-8x7B-Instruct-v0.1,morning,3.9661054956285575,304.82,47.28
51
  Mixtral-8x7B-Instruct-v0.1,afternoon,5.362903979589355,369.3192307692308,54.353846153846156
@@ -54,38 +55,38 @@ Mixtral-8x7B-Instruct-v0.1,evening,3.6435119574237023,326.69,48.545
54
  Mixtral-8x7B-Instruct-v0.1,late evening,5.62397656769588,395.15714285714284,49.02857142857143
55
  Mixtral-8x7B-Instruct-v0.1,midnight,4.639010797279158,323.0394736842105,42.69210526315789
56
  Mixtral-8x7B-Instruct-v0.1,night,4.009439338194697,301.24545454545455,42.21818181818182
57
- OpenHermes-2.5-Mistral (7B),early morning,2.613155171275139,281.125,32.125
58
  OpenHermes-2.5-Mistral (7B),morning,2.46380607287089,277.875,32.125
59
  OpenHermes-2.5-Mistral (7B),afternoon,3.382803752858152,357.8125,32.791666666666664
60
  OpenHermes-2.5-Mistral (7B),late afternoon,2.52796063820521,281.125,32.125
61
  OpenHermes-2.5-Mistral (7B),evening,2.7269538966092197,254.08333333333334,26.916666666666668
62
  OpenHermes-2.5-Mistral (7B),late evening,2.720560759305954,281.125,32.125
63
  OpenHermes-2.5-Mistral (7B),midnight,2.8086547496470997,273.4583333333333,30.833333333333332
64
- OpenHermes-2.5-Mistral (7B),night,2.6971513723072253,277.63793103448273,31.982758620689655
65
- Qwen 1.5 Chat (7B),early morning,2.234119971593221,269.0625,30.125
66
  Qwen 1.5 Chat (7B),morning,1.8359179911406145,253.66666666666666,27.541666666666668
67
  Qwen 1.5 Chat (7B),afternoon,2.539412996504042,344.93617021276594,37.37234042553192
68
  Qwen 1.5 Chat (7B),late afternoon,2.0715979735056558,269.0,30.125
69
  Qwen 1.5 Chat (7B),evening,2.325377941131592,244.375,27.25
70
  Qwen 1.5 Chat (7B),late evening,1.9339114997697913,253.66666666666666,27.541666666666668
71
  Qwen 1.5 Chat (7B),midnight,2.2824907505765872,262.1458333333333,29.020833333333332
72
- Qwen 1.5 Chat (7B),night,1.902279602629798,268.2142857142857,30.017857142857142
73
- RedPajama-INCITE Chat (7B),early morning,2.1123720943927764,52.82608695652174,4.3478260869565215
74
  RedPajama-INCITE Chat (7B),morning,1.7558168431986934,57.375,4.791666666666667
75
  RedPajama-INCITE Chat (7B),afternoon,1.8110081959854474,57.71875,5.302083333333333
76
  RedPajama-INCITE Chat (7B),late afternoon,1.7436921298503876,59.625,5.0
77
  RedPajama-INCITE Chat (7B),evening,1.964497913013805,55.125,4.583333333333333
78
  RedPajama-INCITE Chat (7B),late evening,2.258105857031686,52.875,4.375
79
  RedPajama-INCITE Chat (7B),midnight,2.072928147845798,56.25,4.6875
80
- RedPajama-INCITE Chat (7B),night,1.9067783311561302,55.96551724137931,4.741379310344827
81
- Snorkel Mistral PairRM DPO (7B),early morning,22.729273674335886,4691.333333333333,100.6875
82
  Snorkel Mistral PairRM DPO (7B),morning,22.59587260087331,4714.0,104.375
83
  Snorkel Mistral PairRM DPO (7B),afternoon,34.586263124148054,5710.697916666667,541.0833333333334
84
  Snorkel Mistral PairRM DPO (7B),late afternoon,22.353231539328892,4714.0,104.375
85
  Snorkel Mistral PairRM DPO (7B),evening,24.748520737602597,4596.416666666667,92.33333333333333
86
  Snorkel Mistral PairRM DPO (7B),late evening,24.120955445549704,4623.333333333333,89.625
87
  Snorkel Mistral PairRM DPO (7B),midnight,22.670283652366475,4691.333333333333,100.6875
88
- Snorkel Mistral PairRM DPO (7B),night,22.237432054110936,4527.879310344828,96.75862068965517
89
  TinyLlama/TinyLlama-1.1B-Chat-v1.0,early morning,2.0273348593711855,372.13,62.53
90
  TinyLlama/TinyLlama-1.1B-Chat-v1.0,morning,1.9041210174560548,372.05,62.6
91
  TinyLlama/TinyLlama-1.1B-Chat-v1.0,afternoon,1.8381905496120452,308.795,51.08
@@ -94,22 +95,22 @@ TinyLlama/TinyLlama-1.1B-Chat-v1.0,evening,1.7984187936782836,313.99,51.96
94
  TinyLlama/TinyLlama-1.1B-Chat-v1.0,late evening,1.5875422928068372,192.78333333333333,31.261111111111113
95
  TinyLlama/TinyLlama-1.1B-Chat-v1.0,midnight,1.6341248273849487,210.2,34.2
96
  TinyLlama/TinyLlama-1.1B-Chat-v1.0,night,2.0128010153770446,372.05,62.6
97
- Vicuna v1.5 (7B),early morning,3.347896182859266,263.45652173913044,29.847826086956523
98
  Vicuna v1.5 (7B),morning,5.204141834507817,308.375,32.833333333333336
99
  Vicuna v1.5 (7B),afternoon,3.670576659115878,353.1458333333333,43.791666666666664
100
  Vicuna v1.5 (7B),late afternoon,3.768600355495106,324.27272727272725,35.77272727272727
101
  Vicuna v1.5 (7B),evening,3.0816855430603027,296.1,38.55
102
  Vicuna v1.5 (7B),late evening,3.779375910758972,292.5416666666667,30.125
103
  Vicuna v1.5 (7B),midnight,3.8164960656847273,288.6666666666667,32.5625
104
- Vicuna v1.5 (7B),night,3.6917150114096846,293.05357142857144,30.482142857142858
105
- WizardLM v1.2 (13B),early morning,5.147532618564108,250.89583333333334,27.416666666666668
106
  WizardLM v1.2 (13B),morning,4.6954833320949385,250.16666666666666,27.458333333333332
107
  WizardLM v1.2 (13B),afternoon,7.0199061699127885,320.51063829787233,28.43617021276596
108
  WizardLM v1.2 (13B),late afternoon,4.935352149217025,254.04166666666666,27.833333333333332
109
  WizardLM v1.2 (13B),evening,4.939989816058766,240.33333333333334,25.791666666666668
110
  WizardLM v1.2 (13B),late evening,5.11581133660816,226.25,23.0
111
  WizardLM v1.2 (13B),midnight,5.255108930847862,259.6041666666667,28.125
112
- WizardLM v1.2 (13B),night,5.112857627017157,274.37931034482756,31.224137931034484
113
  chat-bison (PaLM 2),early morning,2.4665334616388592,381.9214285714286,60.892857142857146
114
  chat-bison (PaLM 2),morning,2.488477897644043,381.73,60.88
115
  chat-bison (PaLM 2),afternoon,2.6460144804074215,376.28076923076924,55.965384615384615
 
1
  model,time_of_day,execution_time,characters_count,words_count
2
+ 01-ai Yi Chat (34B),early morning,9.13007491738049,397.0,40.625
3
  01-ai Yi Chat (34B),morning,9.243995500647504,410.5,40.625
4
  01-ai Yi Chat (34B),afternoon,12.255323665086614,512.3645833333334,51.59375
5
  01-ai Yi Chat (34B),late afternoon,9.417656523840767,384.9583333333333,40.125
6
  01-ai Yi Chat (34B),evening,8.16779062861488,360.8333333333333,39.375
7
  01-ai Yi Chat (34B),late evening,9.680033167203268,424.5,40.875
8
  01-ai Yi Chat (34B),midnight,9.311747227774726,397.0625,40.416666666666664
9
+ 01-ai Yi Chat (34B),night,9.625520552260966,421.6125,41.45
10
+ Chronos Hermes (13B),early morning,8.063542557544396,507.8181818181818,54.63636363636363
11
  Chronos Hermes (13B),morning,8.221057146787643,595.375,64.5
12
  Chronos Hermes (13B),afternoon,6.651120054020601,539.5,76.10416666666667
13
  Chronos Hermes (13B),late afternoon,7.630303209478205,544.625,62.833333333333336
14
  Chronos Hermes (13B),evening,6.468730966250102,543.05,72.15
15
  Chronos Hermes (13B),late evening,7.710561646355523,423.1363636363636,46.31818181818182
16
  Chronos Hermes (13B),midnight,7.710010987520218,486.5217391304348,54.93478260869565
17
+ Chronos Hermes (13B),night,16.544231017430622,536.2317073170732,62.76829268292683
18
+ Falcon Instruct (7B),early morning,1.2471453740046574,46.291666666666664,8.569444444444445
19
  Falcon Instruct (7B),morning,1.4012710821060907,47.291666666666664,8.75
20
  Falcon Instruct (7B),afternoon,1.5774729509611387,36.96875,7.020833333333333
21
  Falcon Instruct (7B),late afternoon,1.0884015900748116,43.833333333333336,8.166666666666666
22
  Falcon Instruct (7B),evening,0.7874892950057983,39.2,7.4
23
  Falcon Instruct (7B),late evening,1.2339241071180864,46.791666666666664,8.708333333333334
24
  Falcon Instruct (7B),midnight,0.9382020387894068,38.541666666666664,7.270833333333333
25
+ Falcon Instruct (7B),night,1.2997726936862892,45.90243902439025,8.524390243902438
26
+ LLaMA-2 Chat (7B),early morning,2.8209988124230327,444.7361111111111,60.84722222222222
27
  LLaMA-2 Chat (7B),morning,2.678236266841059,455.0416666666667,60.791666666666664
28
  LLaMA-2 Chat (7B),afternoon,3.5475496424569024,477.9479166666667,53.135416666666664
29
  LLaMA-2 Chat (7B),late afternoon,2.7755608558654785,430.45,62.15
30
  LLaMA-2 Chat (7B),evening,2.9608939344232734,432.2916666666667,62.333333333333336
31
  LLaMA-2 Chat (7B),late evening,2.869330001913983,452.7083333333333,63.083333333333336
32
  LLaMA-2 Chat (7B),midnight,2.8870244533457656,460.6666666666667,63.166666666666664
33
+ LLaMA-2 Chat (7B),night,4.6455250724417265,419.5,57.666666666666664
34
+ Mistral (7B) Instruct v0.2 (Together AI),early morning,3.627201789248306,515.1416666666667,64.99166666666666
35
  Mistral (7B) Instruct v0.2 (Together AI),morning,3.6725051470205816,500.82894736842104,63.75
36
  Mistral (7B) Instruct v0.2 (Together AI),afternoon,3.255509059895914,481.76237623762376,60.7970297029703
37
  Mistral (7B) Instruct v0.2 (Together AI),late afternoon,3.542298033617545,543.5138888888889,65.65277777777777
38
  Mistral (7B) Instruct v0.2 (Together AI),evening,5.496347131400273,507.48387096774195,63.774193548387096
39
  Mistral (7B) Instruct v0.2 (Together AI),late evening,3.3933188574654713,515.2794117647059,66.45588235294117
40
  Mistral (7B) Instruct v0.2 (Together AI),midnight,4.510189438914205,517.34375,65.16666666666667
41
+ Mistral (7B) Instruct v0.2 (Together AI),night,3.5451678467578573,500.3,62.823076923076925
42
+ Mistral-7B-Instruct-v0.2,early morning,4.625752210617065,214.0,29.75
43
  Mistral-7B-Instruct-v0.2,morning,3.734026002883911,362.9,58.35
44
  Mistral-7B-Instruct-v0.2,afternoon,3.228973722457886,222.0625,32.25
45
  Mistral-7B-Instruct-v0.2,late afternoon,3.2048643112182615,219.8625,31.8
46
  Mistral-7B-Instruct-v0.2,evening,3.397640073299408,261.18333333333334,40.1
47
  Mistral-7B-Instruct-v0.2,late evening,3.389284573495388,175.79375,25.68125
48
  Mistral-7B-Instruct-v0.2,midnight,2.6972494465964183,88.14285714285714,10.607142857142858
49
+ Mistral-7B-Instruct-v0.2,night,4.609350740909576,214.0,29.75
50
  Mixtral-8x7B-Instruct-v0.1,early morning,4.526968242530536,285.045,41.86
51
  Mixtral-8x7B-Instruct-v0.1,morning,3.9661054956285575,304.82,47.28
52
  Mixtral-8x7B-Instruct-v0.1,afternoon,5.362903979589355,369.3192307692308,54.353846153846156
 
55
  Mixtral-8x7B-Instruct-v0.1,late evening,5.62397656769588,395.15714285714284,49.02857142857143
56
  Mixtral-8x7B-Instruct-v0.1,midnight,4.639010797279158,323.0394736842105,42.69210526315789
57
  Mixtral-8x7B-Instruct-v0.1,night,4.009439338194697,301.24545454545455,42.21818181818182
58
+ OpenHermes-2.5-Mistral (7B),early morning,2.8495449788040585,281.0416666666667,32.125
59
  OpenHermes-2.5-Mistral (7B),morning,2.46380607287089,277.875,32.125
60
  OpenHermes-2.5-Mistral (7B),afternoon,3.382803752858152,357.8125,32.791666666666664
61
  OpenHermes-2.5-Mistral (7B),late afternoon,2.52796063820521,281.125,32.125
62
  OpenHermes-2.5-Mistral (7B),evening,2.7269538966092197,254.08333333333334,26.916666666666668
63
  OpenHermes-2.5-Mistral (7B),late evening,2.720560759305954,281.125,32.125
64
  OpenHermes-2.5-Mistral (7B),midnight,2.8086547496470997,273.4583333333333,30.833333333333332
65
+ OpenHermes-2.5-Mistral (7B),night,2.7470715910196306,274.4146341463415,31.317073170731707
66
+ Qwen 1.5 Chat (7B),early morning,2.167507814036475,269.0416666666667,30.125
67
  Qwen 1.5 Chat (7B),morning,1.8359179911406145,253.66666666666666,27.541666666666668
68
  Qwen 1.5 Chat (7B),afternoon,2.539412996504042,344.93617021276594,37.37234042553192
69
  Qwen 1.5 Chat (7B),late afternoon,2.0715979735056558,269.0,30.125
70
  Qwen 1.5 Chat (7B),evening,2.325377941131592,244.375,27.25
71
  Qwen 1.5 Chat (7B),late evening,1.9339114997697913,253.66666666666666,27.541666666666668
72
  Qwen 1.5 Chat (7B),midnight,2.2824907505765872,262.1458333333333,29.020833333333332
73
+ Qwen 1.5 Chat (7B),night,1.98169333812518,262.5375,29.425
74
+ RedPajama-INCITE Chat (7B),early morning,2.149312309920788,55.15714285714286,4.571428571428571
75
  RedPajama-INCITE Chat (7B),morning,1.7558168431986934,57.375,4.791666666666667
76
  RedPajama-INCITE Chat (7B),afternoon,1.8110081959854474,57.71875,5.302083333333333
77
  RedPajama-INCITE Chat (7B),late afternoon,1.7436921298503876,59.625,5.0
78
  RedPajama-INCITE Chat (7B),evening,1.964497913013805,55.125,4.583333333333333
79
  RedPajama-INCITE Chat (7B),late evening,2.258105857031686,52.875,4.375
80
  RedPajama-INCITE Chat (7B),midnight,2.072928147845798,56.25,4.6875
81
+ RedPajama-INCITE Chat (7B),night,2.0220184448437815,57.03658536585366,4.817073170731708
82
+ Snorkel Mistral PairRM DPO (7B),early morning,20.892576156343733,4257.208333333333,100.45833333333333
83
  Snorkel Mistral PairRM DPO (7B),morning,22.59587260087331,4714.0,104.375
84
  Snorkel Mistral PairRM DPO (7B),afternoon,34.586263124148054,5710.697916666667,541.0833333333334
85
  Snorkel Mistral PairRM DPO (7B),late afternoon,22.353231539328892,4714.0,104.375
86
  Snorkel Mistral PairRM DPO (7B),evening,24.748520737602597,4596.416666666667,92.33333333333333
87
  Snorkel Mistral PairRM DPO (7B),late evening,24.120955445549704,4623.333333333333,89.625
88
  Snorkel Mistral PairRM DPO (7B),midnight,22.670283652366475,4691.333333333333,100.6875
89
+ Snorkel Mistral PairRM DPO (7B),night,21.554972467422484,4255.55,95.1125
90
  TinyLlama/TinyLlama-1.1B-Chat-v1.0,early morning,2.0273348593711855,372.13,62.53
91
  TinyLlama/TinyLlama-1.1B-Chat-v1.0,morning,1.9041210174560548,372.05,62.6
92
  TinyLlama/TinyLlama-1.1B-Chat-v1.0,afternoon,1.8381905496120452,308.795,51.08
 
95
  TinyLlama/TinyLlama-1.1B-Chat-v1.0,late evening,1.5875422928068372,192.78333333333333,31.261111111111113
96
  TinyLlama/TinyLlama-1.1B-Chat-v1.0,midnight,1.6341248273849487,210.2,34.2
97
  TinyLlama/TinyLlama-1.1B-Chat-v1.0,night,2.0128010153770446,372.05,62.6
98
+ Vicuna v1.5 (7B),early morning,3.3891975168596233,274.36764705882354,31.602941176470587
99
  Vicuna v1.5 (7B),morning,5.204141834507817,308.375,32.833333333333336
100
  Vicuna v1.5 (7B),afternoon,3.670576659115878,353.1458333333333,43.791666666666664
101
  Vicuna v1.5 (7B),late afternoon,3.768600355495106,324.27272727272725,35.77272727272727
102
  Vicuna v1.5 (7B),evening,3.0816855430603027,296.1,38.55
103
  Vicuna v1.5 (7B),late evening,3.779375910758972,292.5416666666667,30.125
104
  Vicuna v1.5 (7B),midnight,3.8164960656847273,288.6666666666667,32.5625
105
+ Vicuna v1.5 (7B),night,3.648655335108439,289.6375,31.0875
106
+ WizardLM v1.2 (13B),early morning,5.290586443914884,296.2916666666667,33.708333333333336
107
  WizardLM v1.2 (13B),morning,4.6954833320949385,250.16666666666666,27.458333333333332
108
  WizardLM v1.2 (13B),afternoon,7.0199061699127885,320.51063829787233,28.43617021276596
109
  WizardLM v1.2 (13B),late afternoon,4.935352149217025,254.04166666666666,27.833333333333332
110
  WizardLM v1.2 (13B),evening,4.939989816058766,240.33333333333334,25.791666666666668
111
  WizardLM v1.2 (13B),late evening,5.11581133660816,226.25,23.0
112
  WizardLM v1.2 (13B),midnight,5.255108930847862,259.6041666666667,28.125
113
+ WizardLM v1.2 (13B),night,5.411002513689873,303.9268292682927,35.048780487804876
114
  chat-bison (PaLM 2),early morning,2.4665334616388592,381.9214285714286,60.892857142857146
115
  chat-bison (PaLM 2),morning,2.488477897644043,381.73,60.88
116
  chat-bison (PaLM 2),afternoon,2.6460144804074215,376.28076923076924,55.965384615384615
data/time_of_day_plots.csv CHANGED
The diff for this file is too large to render. See raw diff