Commit
·
ee6a180
1
Parent(s):
f12cdfe
fixed version
Browse files- app.py +10 -13
- assets/__pycache__/text_content.cpython-313.pyc +0 -0
- assets/merged_data.csv +71 -0
- assets/text_content.py +50 -0
- src/__pycache__/collect_data.cpython-313.pyc +0 -0
- src/__pycache__/filter_utils.cpython-313.pyc +0 -0
- src/collect_data.py +139 -5
- {utils → src}/filter_utils.py +47 -26
- src/main_df.csv +0 -24
- src/pricing.py +0 -36
- src/process_data.py +0 -209
- src/process_data_v2.py +157 -0
- src/results_1.6.5_ascii.csv +0 -19
- src/results_1.6.5_multimodal.csv +0 -20
- src/results_1.6.csv +0 -69
- src/v1.6.5_ascii_latency.csv +0 -19
- src/v1.6.5_multimodal_latency.csv +0 -20
- src/v1.6_latency.csv +0 -69
- utils/__pycache__/filter_utils.cpython-310.pyc +0 -0
- utils/__pycache__/filter_utils.cpython-313.pyc +0 -0
- utils/__pycache__/text_content.cpython-313.pyc +0 -0
- utils/__pycache__/text_utils.cpython-310.pyc +0 -0
- utils/text_utils.py +0 -13
app.py
CHANGED
@@ -2,24 +2,20 @@ import pandas as pd
|
|
2 |
import gradio as gr
|
3 |
import os
|
4 |
from gradio_rangeslider import RangeSlider
|
5 |
-
import math
|
6 |
|
7 |
-
from
|
8 |
|
9 |
# Main Leaderboard containing everything
|
10 |
-
text_leaderboard = pd.read_csv(os.path.join('
|
11 |
-
text_leaderboard = text_leaderboard.sort_values(by='
|
12 |
-
|
13 |
-
text_leaderboard = text_leaderboard.sort_values(by='Average Clemscore', ascending=False)
|
14 |
|
15 |
open_weight_df = text_leaderboard[text_leaderboard['Open Weight'] == True]
|
16 |
if not open_weight_df.empty: # Check if filtered df is non-empty
|
17 |
-
max_parameter_size = open_weight_df['
|
18 |
|
19 |
# Short leaderboard containing fixed columns
|
20 |
short_leaderboard = filter_cols(text_leaderboard)
|
21 |
|
22 |
-
|
23 |
## Extract data
|
24 |
langs = []
|
25 |
licenses = []
|
@@ -37,10 +33,10 @@ for i in range(len(text_leaderboard)):
|
|
37 |
license_name = text_leaderboard.iloc[i]['License Name']
|
38 |
|
39 |
licenses.append(license_name)
|
40 |
-
ip_prices.append(text_leaderboard.iloc[i]['Input $/1M'])
|
41 |
-
op_prices.append(text_leaderboard.iloc[i]['Output $/1M'])
|
42 |
-
latencies.append(text_leaderboard.iloc[i]['
|
43 |
-
parameters.append(text_leaderboard.iloc[i]['
|
44 |
contexts.append(text_leaderboard.iloc[i]['Context Size (k)'])
|
45 |
dates.append(text_leaderboard.iloc[i]['Release Date'])
|
46 |
|
@@ -55,9 +51,10 @@ max_input_price = max(ip_prices)
|
|
55 |
max_output_price = max(op_prices)
|
56 |
max_latency = max(latencies)
|
57 |
|
58 |
-
min_parameters = min(parameters)
|
59 |
max_parameter = max_parameter_size
|
60 |
parameter_step = 1
|
|
|
61 |
|
62 |
min_context = min(contexts)
|
63 |
max_context = max(contexts)
|
|
|
2 |
import gradio as gr
|
3 |
import os
|
4 |
from gradio_rangeslider import RangeSlider
|
|
|
5 |
|
6 |
+
from src.filter_utils import filter, filter_cols
|
7 |
|
8 |
# Main Leaderboard containing everything
|
9 |
+
text_leaderboard = pd.read_csv(os.path.join('assets', 'merged_data.csv'))
|
10 |
+
text_leaderboard = text_leaderboard.sort_values(by='Clemscore', ascending=False)
|
|
|
|
|
11 |
|
12 |
open_weight_df = text_leaderboard[text_leaderboard['Open Weight'] == True]
|
13 |
if not open_weight_df.empty: # Check if filtered df is non-empty
|
14 |
+
max_parameter_size = open_weight_df['Parameters (B)'].max()
|
15 |
|
16 |
# Short leaderboard containing fixed columns
|
17 |
short_leaderboard = filter_cols(text_leaderboard)
|
18 |
|
|
|
19 |
## Extract data
|
20 |
langs = []
|
21 |
licenses = []
|
|
|
33 |
license_name = text_leaderboard.iloc[i]['License Name']
|
34 |
|
35 |
licenses.append(license_name)
|
36 |
+
ip_prices.append(text_leaderboard.iloc[i]['Input $/1M tokens'])
|
37 |
+
op_prices.append(text_leaderboard.iloc[i]['Output $/1M tokens'])
|
38 |
+
latencies.append(text_leaderboard.iloc[i]['Latency (s)'])
|
39 |
+
parameters.append(text_leaderboard.iloc[i]['Parameters (B)'])
|
40 |
contexts.append(text_leaderboard.iloc[i]['Context Size (k)'])
|
41 |
dates.append(text_leaderboard.iloc[i]['Release Date'])
|
42 |
|
|
|
51 |
max_output_price = max(op_prices)
|
52 |
max_latency = max(latencies)
|
53 |
|
54 |
+
min_parameters = 0 if pd.isna(min(parameters)) else min(parameters)
|
55 |
max_parameter = max_parameter_size
|
56 |
parameter_step = 1
|
57 |
+
print(f"MIN {min_parameters}, MAX {max_parameter}")
|
58 |
|
59 |
min_context = min(contexts)
|
60 |
max_context = max(contexts)
|
assets/__pycache__/text_content.cpython-313.pyc
CHANGED
Binary files a/assets/__pycache__/text_content.cpython-313.pyc and b/assets/__pycache__/text_content.cpython-313.pyc differ
|
|
assets/merged_data.csv
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Model Name,Latency (s),Clemscore,Parameters (B),Release Date,Open Weight,Languages,Context Size (k),License Name,License URL,Single Image,Multiple Images,Audio,Video,Input $/1M tokens,Output $/1M tokens,License,Temp Date
|
2 |
+
CodeLlama-34b-Instruct-hf,3.851887315425933,14.35,34.0,2023-08-24,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2023-08-24
|
3 |
+
Idefics3-8B-Llama3,2.7247848158020003,17.52,8.0,2024-08-05,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,True,True,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-08-05
|
4 |
+
InternVL2-26B,4.239272214812438,37.45,26.0,2024-07-15,True,"Chinese, English",,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,True,True,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-07-15
|
5 |
+
InternVL2-40B,6.267102418391484,32.23,40.0,2024-07-15,True,"Chinese, English",,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,True,True,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-07-15
|
6 |
+
InternVL2-8B,1.948600327851168,23.17,8.0,2024-07-15,True,"Chinese, English",,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,True,True,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-07-15
|
7 |
+
InternVL2-Llama3-76B,10.660117299385416,33.84,76.0,2024-07-15,True,"Chinese, English",,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,True,True,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-07-15
|
8 |
+
Llama-3.1-Nemotron-70B-Instruct-HF,1.105406813859938,10.16,70.0,2024-10-12,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-10-12
|
9 |
+
Meta-Llama-3.1-405B-Instruct-Turbo,0.7886103946545819,52.11,405.0,2024-07-23,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-07-23
|
10 |
+
Meta-Llama-3.1-70B-Instruct,0.8105055275945292,38.83,70.0,2024-07-23,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-07-23
|
11 |
+
Meta-Llama-3.1-8B-Instruct,0.206305748406081,18.36,8.0,2024-07-23,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-07-23
|
12 |
+
Mistral-7B-Instruct-v0.1,0.2828647550771728,8.01,7.0,2023-09-27,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2023-09-27
|
13 |
+
Mistral-Large-Instruct-2407,1.2444667688634192,45.39,123.0,2024-07-24,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-07-24
|
14 |
+
Mixtral-8x22B-Instruct-v0.1,1.0759354563573875,12.69,141.0,2024-04-17,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-04-17
|
15 |
+
Mixtral-8x7B-Instruct-v0.1,0.9392967660636314,8.17,46.7,2023-12-11,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2023-12-11
|
16 |
+
Mixtral-8x7B-Instruct-v0.1,0.9392967660636314,8.17,46.7,2023-12-11,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2023-12-11
|
17 |
+
Phi-3-mini-128k-instruct,0.6615315832127354,6.33,3.8,2024-04-22,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-04-22
|
18 |
+
Phi-3.5-vision-instruct,1.540488050470713,15.64,4.0,2024-08-17,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,True,True,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-08-17
|
19 |
+
Pixtral-12B-2409,1.4976731684122335,28.64,12.0,2024-09-11,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,True,True,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-09-11
|
20 |
+
Qwen1.5-72B-Chat,12.689668927658191,30.37,72.0,2024-01-30,True,"Chinese, English",,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-01-30
|
21 |
+
Qwen1.5-7B-Chat,0.3898907690883847,2.58,7.0,2024-01-30,True,"Chinese, English",,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-01-30
|
22 |
+
Qwen2-72B-Instruct,0.9480584860151366,30.03,72.0,2024-05-28,True,"Chinese, English",,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-05-28
|
23 |
+
Qwen2-7B-Instruct,0.3589407217948714,6.18,7.0,2024-06-04,True,"Chinese, English",,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-06-04
|
24 |
+
Qwen2.5-Coder-32B-Instruct,0.8337066960552915,27.57,32.0,2024-11-06,True,"Chinese, English",,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-11-06
|
25 |
+
SUS-Chat-34B,2.27951476106911,14.11,34.0,2023-11-29,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2023-11-29
|
26 |
+
Starling-LM-7B-beta,1.365002297029703,6.56,7.0,2024-03-19,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-03-19
|
27 |
+
WizardLM-13b-v1.2,3.5654367625763,11.48,13.0,2023-07-25,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2023-07-25
|
28 |
+
WizardLM-70b-v1.0,3.924977203883497,17.4,70.0,2023-08-09,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2023-08-09
|
29 |
+
Yi-34B-Chat,1.2871676207135438,8.27,34.0,2023-11-22,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2023-11-22
|
30 |
+
aya-23-35B,0.5755088395104287,13.35,35.0,2024-05-19,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-05-19
|
31 |
+
aya-23-8B,0.4818848185613353,11.72,8.0,2024-05-19,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-05-19
|
32 |
+
claude-2.1,1.6836316221022516,32.5,137.0,2023-11-21,False,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,8.0,24.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2023-11-21
|
33 |
+
claude-3-5-sonnet-20240620,2.0645066812060726,68.925,,2024-06-20,False,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,True,True,False,False,3.0,15.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-06-20
|
34 |
+
claude-3-haiku-20240307,0.8695497396191068,22.49,20.0,2024-03-07,False,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,True,True,False,False,0.25,1.25,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-03-07
|
35 |
+
claude-3-opus-20240229,3.916101346449241,55.29,2000.0,2024-02-29,False,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,True,True,False,False,15.0,75.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-02-29
|
36 |
+
claude-3-sonnet-20240229,1.4194860128225952,30.53,70.0,2024-02-29,False,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,True,True,False,False,3.0,15.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-02-29
|
37 |
+
codegemma-7b-it,0.3048974050865229,15.3,7.0,2024-04-09,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-04-09
|
38 |
+
command-r,0.1883241491458606,14.15,35.0,2024-03-01,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-03-01
|
39 |
+
command-r-plus,0.3104016019283746,24.94,104.0,2024-04-01,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-04-01
|
40 |
+
dolphin-vision-72b,10.190958003739729,4.65,72.0,2024-06-28,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,True,True,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-06-28
|
41 |
+
gemma-1.1-2b-it,0.1192569946127946,2.91,2.0,2024-03-26,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-03-26
|
42 |
+
gemma-1.1-7b-it,0.1782953878345496,14.14,7.0,2024-03-26,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-03-26
|
43 |
+
gemma-2-27b-it,0.9922771009345794,3.51,27.0,2024-06-24,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-06-24
|
44 |
+
gemma-2-2b-it,0.3139821517919889,2.67,2.0,2024-07-16,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-07-16
|
45 |
+
gemma-2-9b-it,0.3692553324432573,27.34,9.0,2024-06-24,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-06-24
|
46 |
+
gemma-7b-it,0.6112263564356434,1.82,7.0,2024-02-21,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-02-21
|
47 |
+
gpt-3.5-turbo-0125,,27.22,175.0,2024-01-25,False,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.5,1.5,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-01-25
|
48 |
+
gpt-4-0125-preview,1.0418927523113648,52.5,1760.0,2024-01-25,False,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,10.0,30.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-01-25
|
49 |
+
gpt-4-0613,0.648441146582876,51.09,1760.0,2023-06-13,False,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2023-06-13
|
50 |
+
gpt-4-1106-preview,0.7767265743542736,51.99,1760.0,2023-11-06,False,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2023-11-06
|
51 |
+
gpt-4-1106-vision-preview,4.712557435752081,73.55,,2023-11-06,False,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,True,True,False,False,10.0,30.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2023-11-06
|
52 |
+
gpt-4-turbo-2024-04-09,,58.3,1760.0,2024-04-09,False,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,10.0,30.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-04-09
|
53 |
+
gpt-4o-2024-05-13,5.022646224034688,58.95,,2024-05-13,False,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,True,True,False,False,5.0,15.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-05-13
|
54 |
+
gpt-4o-2024-08-06,1.951333607454077,63.875,,2024-08-06,False,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,True,True,False,False,3.75,15.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-08-06
|
55 |
+
gpt-4o-mini-2024-07-18,2.08647007916325,46.55,,2024-07-18,False,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,True,True,False,False,0.3,1.2,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-07-18
|
56 |
+
idefics-80b-instruct,6.8089303915502315,29.55,80.0,2023-07-24,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,True,True,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2023-07-24
|
57 |
+
idefics-9b-instruct,4.156911970172687,12.29,9.0,2023-07-24,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,True,True,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2023-07-24
|
58 |
+
internlm-xcomposer2d5-7b,8.438096179522176,16.95,7.0,2024-07-02,True,"Chinese, English",,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,True,True,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-07-02
|
59 |
+
llama-2-70b-chat-hf,4.724659620079607,0.81,70.0,2023-07-18,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2023-07-18
|
60 |
+
mistral-large-2402,0.3967416598893965,28.17,123.0,2024-02-01,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-02-01
|
61 |
+
mistral-medium-2312,3.3167870515212083,16.43,,2023-12-01,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2023-12-01
|
62 |
+
o1-preview-2024-09-12,7.368572853601854,73.63,,2024-09-12,False,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,15.0,60.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-09-12
|
63 |
+
openchat-3.5-0106,0.2920951450556648,17.1,7.0,2024-01-06,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-01-06
|
64 |
+
openchat-3.5-1210,0.280498276910299,18.22,7.0,2023-12-10,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2023-12-10
|
65 |
+
openchat_3.5,0.3172876868462049,23.64,7.0,2023-10-30,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2023-10-30
|
66 |
+
salamandra-7b-instruct,0.3894831193548387,6.04,7.0,2024-09-30,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2024-09-30
|
67 |
+
sheep-duck-llama-2-13b,2.9462099794520573,5.39,13.0,2023-10-04,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2023-10-04
|
68 |
+
sheep-duck-llama-2-70b-v1.1,5.524607914346901,21.5,70.0,2023-09-27,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2023-09-27
|
69 |
+
tulu-2-dpo-70b,7.848597339328536,12.62,70.0,2023-11-12,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2023-11-12
|
70 |
+
vicuna-13b-v1.5,1.4753938719676598,7.01,13.0,2023-07-29,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2023-07-29
|
71 |
+
vicuna-33b-v1.3,0.8235025152162306,11.27,33.0,2023-06-21,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",2023-06-21
|
assets/text_content.py
CHANGED
@@ -1,3 +1,53 @@
|
|
|
|
|
|
1 |
CLEMBENCH_RUNS_REPO = "https://raw.githubusercontent.com/clembench/clembench-runs/main/"
|
2 |
REGISTRY_URL = "https://raw.githubusercontent.com/clp-research/clembench/refs/heads/refactor_model_registry/backends/model_registry.json"
|
3 |
BENCHMARK_FILE = "benchmark_runs.json"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
CLEMBENCH_RUNS_REPO = "https://raw.githubusercontent.com/clembench/clembench-runs/main/"
|
4 |
REGISTRY_URL = "https://raw.githubusercontent.com/clp-research/clembench/refs/heads/refactor_model_registry/backends/model_registry.json"
|
5 |
BENCHMARK_FILE = "benchmark_runs.json"
|
6 |
+
|
7 |
+
LATENCY_FOLDER = os.path.join("Addenda", "Latency")
|
8 |
+
RESULT_FILE = "results.csv"
|
9 |
+
LATENCY_SUFFIX = "_latency.csv"
|
10 |
+
|
11 |
+
LANG_MAPPING = {
|
12 |
+
'el': 'Greek',
|
13 |
+
'id': 'Indonesian',
|
14 |
+
'ko': 'Korean',
|
15 |
+
'sv': 'Swedish',
|
16 |
+
'de': 'German',
|
17 |
+
'lv': 'Latvian',
|
18 |
+
'am': 'Amharic',
|
19 |
+
'fi': 'Finnish',
|
20 |
+
'da': 'Danish',
|
21 |
+
'pt': 'Portuguese',
|
22 |
+
'sw': 'Swahili',
|
23 |
+
'es': 'Spanish',
|
24 |
+
'it': 'Italian',
|
25 |
+
'bn': 'Bengali',
|
26 |
+
'nl': 'Dutch',
|
27 |
+
'lt': 'Lithuanian',
|
28 |
+
'ro': 'Romanian',
|
29 |
+
'sl': 'Slovenian',
|
30 |
+
'hu': 'Hungarian',
|
31 |
+
'hr': 'Croatian',
|
32 |
+
'vi': 'Vietnamese',
|
33 |
+
'hi': 'Hindi',
|
34 |
+
'zh': 'Chinese',
|
35 |
+
'pl': 'Polish',
|
36 |
+
'ar': 'Arabic',
|
37 |
+
'cs': 'Czech',
|
38 |
+
'sk': 'Slovak',
|
39 |
+
'ja': 'Japanese',
|
40 |
+
'no': 'Norwegian',
|
41 |
+
'uk': 'Ukrainian',
|
42 |
+
'fr': 'French',
|
43 |
+
'et': 'Estonian',
|
44 |
+
'ru': 'Russian',
|
45 |
+
'th': 'Thai',
|
46 |
+
'bg': 'Bulgarian',
|
47 |
+
'tr': 'Turkish',
|
48 |
+
'ms': 'Malay',
|
49 |
+
'he': 'Hebrew',
|
50 |
+
'tl': 'Tagalog',
|
51 |
+
'sr': 'Serbian',
|
52 |
+
'en': 'English'
|
53 |
+
}
|
src/__pycache__/collect_data.cpython-313.pyc
ADDED
Binary file (7.13 kB). View file
|
|
src/__pycache__/filter_utils.cpython-313.pyc
ADDED
Binary file (3.93 kB). View file
|
|
src/collect_data.py
CHANGED
@@ -8,11 +8,145 @@ Model info - https://github.com/kushal-10/clembench/blob/feat/registry/backends/
|
|
8 |
import pandas as pd
|
9 |
import json
|
10 |
import requests
|
11 |
-
from assets.text_content import
|
|
|
12 |
|
13 |
-
response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
import pandas as pd
|
9 |
import json
|
10 |
import requests
|
11 |
+
from assets.text_content import CLEMBENCH_RUNS_REPO, REGISTRY_URL, BENCHMARK_FILE, LATENCY_FOLDER, RESULT_FILE, LATENCY_SUFFIX
|
12 |
+
import os
|
13 |
|
14 |
+
def validate_request(url: str, response) -> bool:
|
15 |
+
"""
|
16 |
+
Validate if an HTTP request was successful.
|
17 |
+
|
18 |
+
Args:
|
19 |
+
url (str): The URL that was requested
|
20 |
+
response (requests.Response): The response object from the request
|
21 |
+
|
22 |
+
Returns:
|
23 |
+
bool: True if request was successful (status code 200), False otherwise
|
24 |
+
"""
|
25 |
|
26 |
+
if response.status_code != 200:
|
27 |
+
print(f"Failed to read file - {url}. Status Code: {response.status_code}")
|
28 |
+
return False
|
29 |
+
return True
|
30 |
|
31 |
+
def fetch_benchmark_data(benchmark: str = "text", version_names: list = []) -> tuple:
|
32 |
+
"""
|
33 |
+
Fetch and parse benchmark results and latency data from CSV files.
|
34 |
+
|
35 |
+
Args:
|
36 |
+
benchmark (str): Type of benchmark to fetch ('text' or 'multimodal')
|
37 |
+
version_names (list): List of version names to search through, sorted by latest first
|
38 |
+
|
39 |
+
Returns:
|
40 |
+
tuple[pd.DataFrame, pd.DataFrame]: A tuple containing:
|
41 |
+
- results_df: DataFrame with benchmark results
|
42 |
+
- latency_df: DataFrame with latency measurements
|
43 |
+
Returns (None, None) if no matching version is found or requests fail
|
44 |
+
|
45 |
+
Raises:
|
46 |
+
requests.RequestException: If there's an error fetching the data
|
47 |
+
pd.errors.EmptyDataError: If CSV file is empty
|
48 |
+
pd.errors.ParserError: If CSV parsing fails
|
49 |
+
"""
|
50 |
+
for v in version_names:
|
51 |
+
# Check if version matches benchmark type
|
52 |
+
is_multimodal = 'multimodal' in v
|
53 |
+
if (benchmark == "multimodal") != is_multimodal:
|
54 |
+
continue
|
55 |
+
|
56 |
+
# Construct URLs
|
57 |
+
results_url = os.path.join(CLEMBENCH_RUNS_REPO, v, RESULT_FILE)
|
58 |
+
latency_url = os.path.join(CLEMBENCH_RUNS_REPO, LATENCY_FOLDER, v + LATENCY_SUFFIX)
|
59 |
+
|
60 |
+
try:
|
61 |
+
results = requests.get(results_url)
|
62 |
+
latency = requests.get(latency_url)
|
63 |
+
|
64 |
+
if validate_request(results_url, results) and validate_request(latency_url, latency):
|
65 |
+
# Convert the CSV content to pandas DataFrames
|
66 |
+
results_df = pd.read_csv(pd.io.common.StringIO(results.text))
|
67 |
+
latency_df = pd.read_csv(pd.io.common.StringIO(latency.text))
|
68 |
+
return results_df, latency_df
|
69 |
+
|
70 |
+
except requests.RequestException as e:
|
71 |
+
print(f"Error fetching data for version {v}: {e}")
|
72 |
+
except pd.errors.EmptyDataError:
|
73 |
+
print(f"Error: Empty CSV file found for version {v}")
|
74 |
+
except pd.errors.ParserError:
|
75 |
+
print(f"Error: Unable to parse CSV data for version {v}")
|
76 |
+
|
77 |
+
return None, None
|
78 |
+
|
79 |
+
def fetch_version_metadata() -> tuple:
|
80 |
+
"""
|
81 |
+
Fetch and process benchmark metadata from the Clembench GitHub repository.
|
82 |
+
|
83 |
+
The data is sourced from: https://github.com/clembench/clembench-runs
|
84 |
+
Configure the repository path in src/assets/text_content/CLEMBENCH_RUNS_REPO
|
85 |
+
|
86 |
+
Returns:
|
87 |
+
tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]: A tuple containing:
|
88 |
+
- mm_result: Multimodal benchmark results
|
89 |
+
- mm_latency: Multimodal latency data
|
90 |
+
- text_result: Text benchmark results
|
91 |
+
- text_latency: Text latency data
|
92 |
+
Returns (None, None, None, None) if the request fails
|
93 |
+
"""
|
94 |
+
json_url = CLEMBENCH_RUNS_REPO + BENCHMARK_FILE
|
95 |
+
response = requests.get(json_url)
|
96 |
+
|
97 |
+
# Check if the JSON file request was successful
|
98 |
+
if not validate_request(json_url, response):
|
99 |
+
return None, None, None, None
|
100 |
+
|
101 |
+
json_data = response.json()
|
102 |
+
versions = json_data['versions']
|
103 |
+
|
104 |
+
# Sort the versions in benchmark by latest first
|
105 |
+
version_names = sorted(
|
106 |
+
[ver['version'] for ver in versions],
|
107 |
+
key=lambda v: list(map(int, v[1:].split('_')[0].split('.'))),
|
108 |
+
reverse=True
|
109 |
+
)
|
110 |
+
|
111 |
+
# Latency is in seconds
|
112 |
+
mm_result, mm_latency = fetch_benchmark_data("multimodal", version_names)
|
113 |
+
text_result, text_latency = fetch_benchmark_data("text", version_names)
|
114 |
+
|
115 |
+
return mm_latency, mm_result, text_latency, text_result
|
116 |
+
|
117 |
+
def fetch_registry_data() -> dict:
|
118 |
+
"""
|
119 |
+
Fetch and parse model registry data from the Clembench registry URL.
|
120 |
+
|
121 |
+
The data is sourced from the model registry defined in REGISTRY_URL.
|
122 |
+
Contains information about various LLM models including their specifications
|
123 |
+
and capabilities.
|
124 |
+
|
125 |
+
Returns:
|
126 |
+
dict: Dictionary containing model registry data.
|
127 |
+
Returns None if the request fails or the JSON is invalid.
|
128 |
+
|
129 |
+
Raises:
|
130 |
+
requests.RequestException: If there's an error fetching the data
|
131 |
+
json.JSONDecodeError: If the response cannot be parsed as JSON
|
132 |
+
"""
|
133 |
+
try:
|
134 |
+
response = requests.get(REGISTRY_URL)
|
135 |
+
if not validate_request(REGISTRY_URL, response):
|
136 |
+
return None
|
137 |
+
|
138 |
+
return response.json()
|
139 |
+
|
140 |
+
except requests.RequestException as e:
|
141 |
+
print(f"Error fetching registry data: {e}")
|
142 |
+
except json.JSONDecodeError as e:
|
143 |
+
print(f"Error parsing registry JSON: {e}")
|
144 |
+
|
145 |
+
return None
|
146 |
+
|
147 |
+
if __name__=="__main__":
|
148 |
+
fetch_version_metadata()
|
149 |
+
registry_data = fetch_registry_data()
|
150 |
+
print(registry_data[0])
|
151 |
+
|
152 |
+
|
{utils → src}/filter_utils.py
RENAMED
@@ -4,19 +4,17 @@ import pandas as pd
|
|
4 |
|
5 |
def filter_cols(df):
|
6 |
|
7 |
-
|
8 |
df = df[[
|
9 |
'Model Name',
|
10 |
-
'
|
11 |
-
'
|
12 |
-
'
|
|
|
13 |
'Context Size (k)',
|
14 |
-
'
|
15 |
-
'Parameter Size (B)',
|
16 |
'Release Date',
|
17 |
'License'
|
18 |
]]
|
19 |
-
|
20 |
|
21 |
return df
|
22 |
|
@@ -24,38 +22,59 @@ def filter_cols(df):
|
|
24 |
def filter(df, language_list, parameters, input_price, output_price, multimodal,
|
25 |
context, open_weight, start, end, license ):
|
26 |
|
|
|
27 |
if not df.empty: # Check if df is non-empty
|
28 |
df = df[df['Languages'].apply(lambda x: all(lang in x for lang in language_list))]
|
29 |
-
|
30 |
-
if not df.empty:
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
print(f"MMMMMMMMMMMMMMMMMMMMMMm: {max_parameter_size}")
|
35 |
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
if not df.empty: # Check if df is non-empty
|
42 |
-
df = df[(df['Input $/1M'] >= input_price[0]) & (df['Input $/1M'] <= input_price[1])]
|
43 |
|
44 |
if not df.empty: # Check if df is non-empty
|
45 |
-
df = df[(df['Output $/1M'] >= output_price[0]) & (df['Output $/1M'] <= output_price[1])]
|
|
|
|
|
|
|
|
|
46 |
|
47 |
if not df.empty: # Check if df is non-empty
|
48 |
if "Image" in multimodal:
|
49 |
-
df = df[df['
|
50 |
if "Multi-Image" in multimodal:
|
51 |
-
df = df[df['
|
52 |
if "Audio" in multimodal:
|
53 |
-
df = df[df['
|
54 |
if "Video" in multimodal:
|
55 |
-
df = df[df['
|
56 |
|
57 |
-
if not df.empty: # Check if df is non-empty
|
58 |
-
|
|
|
|
|
|
|
|
|
59 |
|
60 |
if not df.empty: # Check if df is non-empty
|
61 |
if "Open" in open_weight and "Commercial" not in open_weight:
|
@@ -80,5 +99,7 @@ def filter(df, language_list, parameters, input_price, output_price, multimodal,
|
|
80 |
|
81 |
df = filter_cols(df)
|
82 |
|
|
|
|
|
83 |
return df # Return the filtered dataframe
|
84 |
|
|
|
4 |
|
5 |
def filter_cols(df):
|
6 |
|
|
|
7 |
df = df[[
|
8 |
'Model Name',
|
9 |
+
'Clemscore',
|
10 |
+
'Input $/1M tokens',
|
11 |
+
'Output $/1M tokens',
|
12 |
+
'Latency (s)',
|
13 |
'Context Size (k)',
|
14 |
+
'Parameters (B)',
|
|
|
15 |
'Release Date',
|
16 |
'License'
|
17 |
]]
|
|
|
18 |
|
19 |
return df
|
20 |
|
|
|
22 |
def filter(df, language_list, parameters, input_price, output_price, multimodal,
|
23 |
context, open_weight, start, end, license ):
|
24 |
|
25 |
+
|
26 |
if not df.empty: # Check if df is non-empty
|
27 |
df = df[df['Languages'].apply(lambda x: all(lang in x for lang in language_list))]
|
28 |
+
|
29 |
+
if not df.empty:
|
30 |
+
# Split dataframe by Open Weight
|
31 |
+
open_weight_true = df[df['Open Weight'] == True]
|
32 |
+
open_weight_false = df[df['Open Weight'] == False]
|
|
|
33 |
|
34 |
+
# Get max parameter size for open weight models
|
35 |
+
max_parameter_size = open_weight_true['Parameters (B)'].max() if not open_weight_true.empty else 0
|
36 |
+
|
37 |
+
# Filter only the open weight models based on parameters
|
38 |
+
if not open_weight_true.empty:
|
39 |
+
if parameters[1] >= max_parameter_size:
|
40 |
+
filtered_open = open_weight_true[
|
41 |
+
(open_weight_true['Parameters (B)'] >= parameters[0])
|
42 |
+
]
|
43 |
+
else:
|
44 |
+
filtered_open = open_weight_true[
|
45 |
+
(open_weight_true['Parameters (B)'] >= parameters[0]) &
|
46 |
+
(open_weight_true['Parameters (B)'] <= parameters[1])
|
47 |
+
]
|
48 |
+
|
49 |
+
# Combine filtered open weight models with unfiltered commercial models
|
50 |
+
df = pd.concat([filtered_open, open_weight_false])
|
51 |
+
|
52 |
if not df.empty: # Check if df is non-empty
|
53 |
+
df = df[(df['Input $/1M tokens'] >= input_price[0]) & (df['Input $/1M tokens'] <= input_price[1])]
|
54 |
|
55 |
if not df.empty: # Check if df is non-empty
|
56 |
+
df = df[(df['Output $/1M tokens'] >= output_price[0]) & (df['Output $/1M tokens'] <= output_price[1])]
|
57 |
+
|
58 |
+
|
59 |
+
print("Price")
|
60 |
+
print(df)
|
61 |
|
62 |
if not df.empty: # Check if df is non-empty
|
63 |
if "Image" in multimodal:
|
64 |
+
df = df[df['Image'] == True]
|
65 |
if "Multi-Image" in multimodal:
|
66 |
+
df = df[df['Multiple Image'] == True]
|
67 |
if "Audio" in multimodal:
|
68 |
+
df = df[df['Audio'] == True]
|
69 |
if "Video" in multimodal:
|
70 |
+
df = df[df['Video'] == True]
|
71 |
|
72 |
+
# if not df.empty: # Check if df is non-empty
|
73 |
+
# df = df[(df['Context Size (k)'] >= (context[0])) & (df['Context Size (k)'] <= (context[1]))]
|
74 |
+
|
75 |
+
|
76 |
+
print("Modality")
|
77 |
+
print(df)
|
78 |
|
79 |
if not df.empty: # Check if df is non-empty
|
80 |
if "Open" in open_weight and "Commercial" not in open_weight:
|
|
|
99 |
|
100 |
df = filter_cols(df)
|
101 |
|
102 |
+
print(df)
|
103 |
+
|
104 |
return df # Return the filtered dataframe
|
105 |
|
src/main_df.csv
DELETED
@@ -1,24 +0,0 @@
|
|
1 |
-
Model Name,Input $/1M,Output $/1M,Multimodality Image,Multimodality Multiple Image,Multimodality Audio,Multimodality Video,Source,License Name,License,Languages,Release Date,Open Weight,Context Size (k),Average Clemscore,Average Latency (s),Parameter Size (B),Estimated,Temp Date
|
2 |
-
"<a href=""https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct"" style=""color: blue;"">Meta-Llama-3-70B-Instruct-hf</a>",0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct,Meta Llama 3 License,"<a href=""https://www.llama.com/llama3/license/"" style=""color: blue;"">Meta Llama 3 License</a>",English,2024-04-18,True,8,11.703,1.116,70.0,False,2024-04-18
|
3 |
-
"<a href=""https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct"" style=""color: blue;"">Meta-Llama-3-8B-Instruct-hf</a>",0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct,Meta Llama 3 License,"<a href=""https://www.llama.com/llama3/license/"" style=""color: blue;"">Meta Llama 3 License</a>",English,2024-04-18,True,8,6.663,0.705,8.0,False,2024-04-18
|
4 |
-
"<a href=""https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct"" style=""color: blue;"">Meta-Llama-3.1-405B-Instruct-Turbo</a>",0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct,Llama 3.1 Community License,"<a href=""https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE"" style=""color: blue;"">Llama 3.1 Community License</a>","English, German, French, Italian, Hindi, Portuguese, Spanish, Thai",2024-07-23,True,128,17.37,0.263,405.0,False,2024-07-23
|
5 |
-
"<a href=""https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct"" style=""color: blue;"">Meta-Llama-3.1-70B-Instruct</a>",0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct,Llama 3.1 Community License,"<a href=""https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE"" style=""color: blue;"">Llama 3.1 Community License</a>","English, German, French, Italian, Hindi, Portuguese, Spanish, Thai",2024-07-23,True,128,12.943,0.27,70.0,False,2024-07-23
|
6 |
-
"<a href=""https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct"" style=""color: blue;"">Meta-Llama-3.1-8B-Instruct</a>",0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct,Llama 3.1 Community License,"<a href=""https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE"" style=""color: blue;"">Llama 3.1 Community License</a>","English, German, French, Italian, Hindi, Portuguese, Spanish, Thai",2024-07-23,True,128,6.12,0.069,8.0,False,2024-07-23
|
7 |
-
"<a href=""https://huggingface.co/OpenGVLab/InternVL2-40B"" style=""color: blue;"">InternVL2-40B</a>",0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-40B,MIT,"<a href=""https://choosealicense.com/licenses/mit/"" style=""color: blue;"">MIT</a>","Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic",2024-07-15,True,8,21.81,2.609,40.0,False,2024-07-15
|
8 |
-
"<a href=""https://huggingface.co/OpenGVLab/InternVL2-8B"" style=""color: blue;"">InternVL2-8B</a>",0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-8B,MIT,"<a href=""https://choosealicense.com/licenses/mit/"" style=""color: blue;"">MIT</a>","Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic",2024-07-15,True,8,19.74,0.837,8.0,False,2024-07-15
|
9 |
-
"<a href=""https://huggingface.co/OpenGVLab/InternVL2-Llama3-76B"" style=""color: blue;"">InternVL2-Llama3-76B</a>",0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-Llama3-76B,MIT,"<a href=""https://choosealicense.com/licenses/mit/"" style=""color: blue;"">MIT</a>","Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic",2024-07-15,True,8,25.71,4.591,76.0,False,2024-07-15
|
10 |
-
"<a href=""https://huggingface.co/OpenGVLab/InternVL2-26B"" style=""color: blue;"">InternVL2-26B</a>",0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-26B,MIT,"<a href=""https://choosealicense.com/licenses/mit/"" style=""color: blue;"">MIT</a>","Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic",2024-07-15,True,8,23.24,1.759,26.0,False,2024-07-15
|
11 |
-
"<a href=""https://huggingface.co/OpenGVLab/InternVL2-26B"" style=""color: blue;"">InternVL2-26B</a>",0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-26B,MIT,"<a href=""https://choosealicense.com/licenses/mit/"" style=""color: blue;"">MIT</a>","Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic",2024-07-15,True,8,23.24,1.759,26.0,False,2024-07-15
|
12 |
-
"<a href=""https://huggingface.co/mistralai/Mistral-Large-Instruct-2407"" style=""color: blue;"">Mistral-Large-Instruct-2407</a>",0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mistral-Large-Instruct-2407,Apache 2.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>","English, French, Spanish, German, Italian, Russian, Chinese, Japanese, Korean",2024-06-12,True,8,15.13,0.415,70.0,False,2024-06-12
|
13 |
-
"<a href=""https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1"" style=""color: blue;"">Mixtral-8x22B-Instruct-v0.1</a>",0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1,Apache 2.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>","English, French, Spanish, German, Italian, Russian",2024-04-17,True,8,4.23,0.359,141.0,False,2024-04-17
|
14 |
-
"<a href=""https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2"" style=""color: blue;"">Mistral-7B-Instruct-v0.2</a>",0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2,Apache 2.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>","English, French, Spanish, German, Italian, Russian, Chinese",2024-01-15,True,8,3.25,0.255,7.0,False,2024-01-15
|
15 |
-
"<a href=""https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1"" style=""color: blue;"">Mistral-7B-Instruct-v0.1</a>",0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1,Apache 2.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>","English, French, Spanish, German, Italian, Russian, Chinese",2023-12-11,True,8,2.67,0.094,7.0,False,2023-12-11
|
16 |
-
"<a href=""https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1"" style=""color: blue;"">Mixtral-8x7B-Instruct-v0.1</a>",0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1,Apache 2.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>","English, French, Spanish, German, Italian, Russian",2023-12-11,True,8,2.723,0.313,46.7,False,2023-12-11
|
17 |
-
"<a href=""https://huggingface.co/openchat/openchat-3.5-0106"" style=""color: blue;"">openchat-3.5-0106</a>",0.0,0.0,False,False,False,False,https://huggingface.co/openchat/openchat-3.5-0106,Apache 2.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",English,2024-01-06,True,8,5.7,0.097,7.0,False,2024-01-06
|
18 |
-
"<a href=""https://huggingface.co/openchat/openchat-3.5-1210"" style=""color: blue;"">openchat-3.5-1210</a>",0.0,0.0,False,False,False,False,https://huggingface.co/openchat/openchat-3.5-1210,Apache 2.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",English,2023-12-10,True,8,6.073,0.093,7.0,False,2023-12-10
|
19 |
-
"<a href=""https://huggingface.co/openchat/openchat_3.5"" style=""color: blue;"">openchat_3.5</a>",0.0,0.0,False,False,False,False,https://huggingface.co/openchat/openchat_3.5,Apache 2.0,"<a href=""https://www.apache.org/licenses/LICENSE-2.0"" style=""color: blue;"">Apache 2.0</a>",English,2023-10-30,True,8,7.88,0.106,7.0,False,2023-10-30
|
20 |
-
"<a href=""https://openai.com/api/pricing/"" style=""color: blue;"">gpt-4o-mini-2024-07-18</a>",0.15,0.6,True,True,False,False,https://openai.com/api/pricing/,Commercial License,"<a href=""https://openai.com/policies/terms-of-use"" style=""color: blue;"">Commercial License</a>","English, Spanish, French, German, Chinese, Japanese, Korean, Italian, Portuguese, Dutch, Russian, Arabic, Hindi, Turkish, Vietnamese, Polish, Thai, Swedish, Danish, Norwegian, Finnish, Hungarian, Czech, Slovak, Romanian, Bulgarian, Ukrainian, Lithuanian, Latvian, Estonian, Slovenian, Malay, Indonesian, Tagalog, Swahili, Amharic",2024-07-18,False,128,52.323,1.619,8.0,True,2024-07-18
|
21 |
-
"<a href=""https://openai.com/api/pricing/"" style=""color: blue;"">gpt-4o-2024-08-06</a>",2.5,10.0,True,True,False,False,https://openai.com/api/pricing/,Commercial License,"<a href=""https://openai.com/policies/terms-of-use"" style=""color: blue;"">Commercial License</a>","English, Spanish, French, German, Chinese, Japanese, Korean, Italian, Portuguese, Dutch, Russian, Arabic, Hindi, Turkish, Vietnamese, Polish, Thai, Swedish, Danish, Norwegian, Finnish, Hungarian, Czech, Slovak, Romanian, Bulgarian, Ukrainian, Lithuanian, Latvian, Estonian, Slovenian, Malay, Indonesian, Tagalog, Swahili, Amharic",2024-08-06,False,128,69.57,1.577,200.0,True,2024-08-06
|
22 |
-
"<a href=""https://openai.com/api/pricing/"" style=""color: blue;"">gpt-4o-2024-05-13</a>",2.5,10.0,True,True,False,False,https://openai.com/api/pricing/,Commercial License,"<a href=""https://openai.com/policies/terms-of-use"" style=""color: blue;"">Commercial License</a>","English, Spanish, French, German, Chinese, Japanese, Korean, Italian, Portuguese, Dutch, Russian, Arabic, Hindi, Turkish, Vietnamese, Polish, Thai, Swedish, Danish, Norwegian, Finnish, Hungarian, Czech, Slovak, Romanian, Bulgarian, Ukrainian, Lithuanian, Latvian, Estonian, Slovenian, Malay, Indonesian, Tagalog, Swahili, Amharic",2024-05-13,False,128,66.873,3.705,200.0,True,2024-05-13
|
23 |
-
"<a href=""https://openai.com/api/pricing/"" style=""color: blue;"">gpt-4-1106-vision-preview</a>",10.0,30.0,True,True,False,False,https://openai.com/api/pricing/,Commercial License,"<a href=""https://openai.com/policies/terms-of-use"" style=""color: blue;"">Commercial License</a>","English, Spanish, French, German, Chinese, Japanese, Korean, Italian, Portuguese, Dutch, Russian, Arabic, Hindi, Turkish, Vietnamese, Polish, Thai, Swedish, Danish, Norwegian, Finnish, Hungarian, Czech, Slovak, Romanian, Bulgarian, Ukrainian, Lithuanian, Latvian, Estonian, Slovenian, Malay, Indonesian, Tagalog, Swahili, Amharic",2023-11-06,False,128,47.23,2.217,1760.0,True,2023-11-06
|
24 |
-
"<a href=""https://cloud.google.com/vertex-ai/generative-ai/pricing"" style=""color: blue;"">gemini-1.5-flash-latest</a>",0.075,0.3,True,True,True,True,https://cloud.google.com/vertex-ai/generative-ai/pricing,Commercial License,"<a href="""" style=""color: blue;"">Commercial License</a>","Lithuanian, Norwegian, Polish, Portuguese, Romanian, Russian, Serbian, Slovak, Slovenian, Spanish, Swahili, Swedish, Thai, Turkish, Ukrainian, Vietnamese, Chinese, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, German, Greek, Hebrew, Hindi, Hungarian, Indonesian, Italian, Japanese, Korean, Latvian, Arabic, Bengali, Bulgarian",2024-05-24,False,128,42.537,26.268,1760.0,True,2024-05-24
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/pricing.py
DELETED
@@ -1,36 +0,0 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
import requests
|
3 |
-
import os
|
4 |
-
|
5 |
-
def fetch_prices():
|
6 |
-
# Fetch the JSON data from the URL
|
7 |
-
url = "https://llm-price.huhuhang.workers.dev/"
|
8 |
-
response = requests.get(url)
|
9 |
-
|
10 |
-
# Check if the request was successful
|
11 |
-
if response.status_code == 200:
|
12 |
-
data = response.json()
|
13 |
-
# Extract relevant information
|
14 |
-
extracted_data = []
|
15 |
-
for entry in data:
|
16 |
-
extracted_info = {
|
17 |
-
"model_name": entry["fields"]["model_name"],
|
18 |
-
"provider": entry["fields"]["provider"],
|
19 |
-
"input_tokens": entry["fields"]["input_tokens"],
|
20 |
-
"output_tokens": entry["fields"]["output_tokens"],
|
21 |
-
"url": entry["fields"]["url"],
|
22 |
-
"update_time": entry["fields"]["update_time"]
|
23 |
-
}
|
24 |
-
extracted_data.append(extracted_info)
|
25 |
-
|
26 |
-
# Create a DataFrame from the extracted data
|
27 |
-
df = pd.DataFrame(extracted_data)
|
28 |
-
save_path = os.path.join('src', 'prices.csv')
|
29 |
-
df.to_csv(save_path, index=False) # Save the DataFrame as a CSV file
|
30 |
-
print(f"Saved the Prices as a CSV under {save_path}")
|
31 |
-
else:
|
32 |
-
print(f"Failed to retrieve data: {response.status_code}")
|
33 |
-
return None
|
34 |
-
|
35 |
-
if __name__ == '__main__':
|
36 |
-
fetch_prices()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/process_data.py
DELETED
@@ -1,209 +0,0 @@
|
|
1 |
-
# ... existing code ...
|
2 |
-
import pandas as pd
|
3 |
-
import json
|
4 |
-
|
5 |
-
# Load the JSON data
|
6 |
-
with open('src/combined_data.json') as f:
|
7 |
-
data = json.load(f)
|
8 |
-
|
9 |
-
# Flatten the data
|
10 |
-
flattened_data = []
|
11 |
-
for entry in data:
|
12 |
-
flattened_entry = {
|
13 |
-
"model_name": entry["model_name"],
|
14 |
-
"input_price": entry["pricing"]["input_price"],
|
15 |
-
"output_price": entry["pricing"]["output_price"],
|
16 |
-
"multimodality_image": entry["multimodality"]["image"],
|
17 |
-
"multimodality_multiple_image": entry["multimodality"]["multiple_image"],
|
18 |
-
"multimodality_audio": entry["multimodality"]["audio"],
|
19 |
-
"multimodality_video": entry["multimodality"]["video"],
|
20 |
-
"source": entry["pricing"]["source"],
|
21 |
-
"license_name": entry["license"]["name"],
|
22 |
-
"license_url": entry["license"]["url"],
|
23 |
-
"languages": ", ".join(entry["languages"]),
|
24 |
-
"release_date": entry["release_date"],
|
25 |
-
"parameter_size": entry["parameters"]["size"],
|
26 |
-
"estimated": entry["parameters"]["estimated"],
|
27 |
-
"open_weight": entry["open_weight"],
|
28 |
-
"context_size": entry["context_size"],
|
29 |
-
|
30 |
-
# ... additional prices ...
|
31 |
-
"additional_prices_context_caching": entry["pricing"].get("additional_prices", {}).get("context_caching", None),
|
32 |
-
"additional_prices_context_storage": entry["pricing"].get("additional_prices", {}).get("context_storage", None),
|
33 |
-
"additional_prices_image_input": entry["pricing"].get("additional_prices", {}).get("image_input", None),
|
34 |
-
"additional_prices_image_output": entry["pricing"].get("additional_prices", {}).get("image_output", None),
|
35 |
-
"additional_prices_video_input": entry["pricing"].get("additional_prices", {}).get("video_input", None),
|
36 |
-
"additional_prices_video_output": entry["pricing"].get("additional_prices", {}).get("video_output", None),
|
37 |
-
"additional_prices_audio_input": entry["pricing"].get("additional_prices", {}).get("audio_input", None),
|
38 |
-
"additional_prices_audio_output": entry["pricing"].get("additional_prices", {}).get("audio_output", None),
|
39 |
-
}
|
40 |
-
flattened_data.append(flattened_entry)
|
41 |
-
|
42 |
-
# Create a DataFrame
|
43 |
-
df = pd.DataFrame(flattened_data)
|
44 |
-
|
45 |
-
# Load the results CSV files
|
46 |
-
results_1_6_5_multimodal = pd.read_csv('src/results_1.6.5_multimodal.csv', header=None)
|
47 |
-
results_1_6_5_ascii = pd.read_csv('src/results_1.6.5_ascii.csv', header=None)
|
48 |
-
results_1_6 = pd.read_csv('src/results_1.6.csv', header=None)
|
49 |
-
|
50 |
-
# Split model names by '-t0.0' and use the first part
|
51 |
-
results_1_6_5_multimodal[0] = results_1_6_5_multimodal[0].str.split('-t0.0').str[0]
|
52 |
-
results_1_6_5_ascii[0] = results_1_6_5_ascii[0].str.split('-t0.0').str[0]
|
53 |
-
results_1_6[0] = results_1_6[0].str.split('-t0.0').str[0]
|
54 |
-
|
55 |
-
|
56 |
-
# Create a mapping for clemscore values
|
57 |
-
clemscore_map_1_6_5_multimodal = dict(zip(results_1_6_5_multimodal[0], results_1_6_5_multimodal[1]))
|
58 |
-
clemscore_map_1_6_5_ascii = dict(zip(results_1_6_5_ascii[0], results_1_6_5_ascii[1]))
|
59 |
-
clemscore_map_1_6 = dict(zip(results_1_6[0], results_1_6[1]))
|
60 |
-
|
61 |
-
|
62 |
-
# Add clemscore columns to the main DataFrame
|
63 |
-
df['clemscore_v1.6.5_multimodal'] = df['model_name'].map(clemscore_map_1_6_5_multimodal).fillna(0).astype(float)
|
64 |
-
df['clemscore_v1.6.5_ascii'] = df['model_name'].map(clemscore_map_1_6_5_ascii).fillna(0).astype(float)
|
65 |
-
df['clemscore_v1.6'] = df['model_name'].map(clemscore_map_1_6).fillna(0).astype(float)
|
66 |
-
|
67 |
-
# Load the latency CSV files
|
68 |
-
latency_1_6 = pd.read_csv('src/v1.6_latency.csv', header=None)
|
69 |
-
latency_1_6_5_ascii = pd.read_csv('src/v1.6.5_ascii_latency.csv', header=None)
|
70 |
-
latency_1_6_5_multimodal = pd.read_csv('src/v1.6.5_multimodal_latency.csv', header=None)
|
71 |
-
|
72 |
-
# Create a mapping for latency values
|
73 |
-
latency_map_1_6 = dict(zip(latency_1_6[0], latency_1_6[1]))
|
74 |
-
latency_map_1_6_5_ascii = dict(zip(latency_1_6_5_ascii[0], latency_1_6_5_ascii[1]))
|
75 |
-
latency_map_1_6_5_multimodal = dict(zip(latency_1_6_5_multimodal[0], latency_1_6_5_multimodal[1]))
|
76 |
-
|
77 |
-
# Add latency columns to the main DataFrame
|
78 |
-
df['latency_v1.6'] = df['model_name'].map(latency_map_1_6).fillna(0).astype(float)
|
79 |
-
df['latency_v1.6.5_multimodal'] = df['model_name'].map(latency_map_1_6_5_multimodal).fillna(0).astype(float)
|
80 |
-
df['latency_v1.6.5_ascii'] = df['model_name'].map(latency_map_1_6_5_ascii).fillna(0).astype(float)
|
81 |
-
|
82 |
-
|
83 |
-
# Calculate average latency and clemscore
|
84 |
-
df['average_clemscore'] = df[['clemscore_v1.6.5_multimodal', 'clemscore_v1.6.5_ascii', 'clemscore_v1.6']].mean(axis=1).round(3)
|
85 |
-
df['average_latency'] = df[['latency_v1.6', 'latency_v1.6.5_multimodal', 'latency_v1.6.5_ascii']].mean(axis=1).round(3)
|
86 |
-
|
87 |
-
|
88 |
-
# More clean up
|
89 |
-
# Clean and convert prices to float
|
90 |
-
df['input_price'] = df['input_price'].replace({'\$': '', '': None}, regex=True).astype(float).round(3)
|
91 |
-
df['output_price'] = df['output_price'].replace({'\$': '', '': None}, regex=True).astype(float).round(3)
|
92 |
-
|
93 |
-
# Clean and convert additional prices to float
|
94 |
-
additional_price_columns = [
|
95 |
-
'additional_prices_context_caching',
|
96 |
-
'additional_prices_context_storage',
|
97 |
-
'additional_prices_image_input',
|
98 |
-
'additional_prices_image_output',
|
99 |
-
'additional_prices_video_input',
|
100 |
-
'additional_prices_video_output',
|
101 |
-
'additional_prices_audio_input',
|
102 |
-
'additional_prices_audio_output'
|
103 |
-
]
|
104 |
-
|
105 |
-
for col in additional_price_columns:
|
106 |
-
df[col] = df[col].replace({'\$': '', '': None}, regex=True).astype(float).round(3)
|
107 |
-
|
108 |
-
# Clean and convert context to integer
|
109 |
-
df['context_size'] = df['context_size'].replace({'k': ''}, regex=True).astype(int)
|
110 |
-
|
111 |
-
df['context_size'] = df['context_size']
|
112 |
-
|
113 |
-
df['parameter_size'] = df['parameter_size'].replace({'B': '', '': None}, regex=True).astype(float)
|
114 |
-
|
115 |
-
LANG_MAPPING = {
|
116 |
-
'el': 'Greek',
|
117 |
-
'id': 'Indonesian',
|
118 |
-
'ko': 'Korean',
|
119 |
-
'sv': 'Swedish',
|
120 |
-
'de': 'German',
|
121 |
-
'lv': 'Latvian',
|
122 |
-
'am': 'Amharic',
|
123 |
-
'fi': 'Finnish',
|
124 |
-
'da': 'Danish',
|
125 |
-
'pt': 'Portuguese',
|
126 |
-
'sw': 'Swahili',
|
127 |
-
'es': 'Spanish',
|
128 |
-
'it': 'Italian',
|
129 |
-
'bn': 'Bengali',
|
130 |
-
'nl': 'Dutch',
|
131 |
-
'lt': 'Lithuanian',
|
132 |
-
'ro': 'Romanian',
|
133 |
-
'sl': 'Slovenian',
|
134 |
-
'hu': 'Hungarian',
|
135 |
-
'hr': 'Croatian',
|
136 |
-
'vi': 'Vietnamese',
|
137 |
-
'hi': 'Hindi',
|
138 |
-
'zh': 'Chinese',
|
139 |
-
'pl': 'Polish',
|
140 |
-
'ar': 'Arabic',
|
141 |
-
'cs': 'Czech',
|
142 |
-
'sk': 'Slovak',
|
143 |
-
'ja': 'Japanese',
|
144 |
-
'no': 'Norwegian',
|
145 |
-
'uk': 'Ukrainian',
|
146 |
-
'fr': 'French',
|
147 |
-
'et': 'Estonian',
|
148 |
-
'ru': 'Russian',
|
149 |
-
'th': 'Thai',
|
150 |
-
'bg': 'Bulgarian',
|
151 |
-
'tr': 'Turkish',
|
152 |
-
'ms': 'Malay',
|
153 |
-
'he': 'Hebrew',
|
154 |
-
'tl': 'Tagalog',
|
155 |
-
'sr': 'Serbian',
|
156 |
-
'en': 'English'
|
157 |
-
}
|
158 |
-
|
159 |
-
df['languages'] = df['languages'].apply(lambda x: ', '.join([LANG_MAPPING.get(lang, lang) for lang in x.split(', ')]))
|
160 |
-
|
161 |
-
# Keep only the specified columns
|
162 |
-
df = df[[
|
163 |
-
'model_name',
|
164 |
-
'input_price',
|
165 |
-
'output_price',
|
166 |
-
'multimodality_image',
|
167 |
-
'multimodality_multiple_image',
|
168 |
-
'multimodality_audio',
|
169 |
-
'multimodality_video',
|
170 |
-
'source',
|
171 |
-
'license_name',
|
172 |
-
'license_url',
|
173 |
-
'languages',
|
174 |
-
'release_date',
|
175 |
-
'open_weight',
|
176 |
-
'context_size',
|
177 |
-
'average_clemscore',
|
178 |
-
'average_latency',
|
179 |
-
'parameter_size',
|
180 |
-
'estimated'
|
181 |
-
]]
|
182 |
-
|
183 |
-
df = df.rename(columns={
|
184 |
-
'model_name': 'Model Name',
|
185 |
-
'input_price': 'Input $/1M',
|
186 |
-
'output_price': 'Output $/1M',
|
187 |
-
'multimodality_image': 'Multimodality Image',
|
188 |
-
'multimodality_multiple_image': 'Multimodality Multiple Image',
|
189 |
-
'multimodality_audio': 'Multimodality Audio',
|
190 |
-
'multimodality_video': 'Multimodality Video',
|
191 |
-
'source': 'Source',
|
192 |
-
'license_name': 'License Name',
|
193 |
-
'license_url': 'License',
|
194 |
-
'languages': 'Languages',
|
195 |
-
'release_date': 'Release Date',
|
196 |
-
'open_weight': 'Open Weight',
|
197 |
-
'context_size': 'Context Size (k)',
|
198 |
-
'average_clemscore': 'Average Clemscore',
|
199 |
-
'average_latency': 'Average Latency (s)',
|
200 |
-
'parameter_size': 'Parameter Size (B)',
|
201 |
-
'estimated': 'Estimated'
|
202 |
-
})
|
203 |
-
|
204 |
-
df['License'] = df.apply(lambda row: f'<a href="{row["License"]}" style="color: blue;">{row["License Name"]}</a>', axis=1)
|
205 |
-
df['Model Name'] = df.apply(lambda row: f'<a href="{row["Source"]}" style="color: blue;">{row["Model Name"]}</a>', axis=1)
|
206 |
-
df['Temp Date'] = df['Release Date']
|
207 |
-
print(df)
|
208 |
-
# Save to CSV
|
209 |
-
df.to_csv('src/main_df.csv', index=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/process_data_v2.py
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
|
5 |
+
from src.collect_data import fetch_version_metadata, fetch_registry_data
|
6 |
+
from assets.text_content import LANG_MAPPING
|
7 |
+
PRICING_PATH = os.path.join('assets', 'pricing.json')
|
8 |
+
|
9 |
+
# Convert parameters to float, handling both B and T suffixes
|
10 |
+
def convert_parameters(param):
|
11 |
+
if pd.isna(param) or param == '':
|
12 |
+
return None
|
13 |
+
param = str(param)
|
14 |
+
if 'T' in param:
|
15 |
+
return float(param.replace('T', '')) * 1000
|
16 |
+
return float(param.replace('B', ''))
|
17 |
+
|
18 |
+
# Clean price strings by removing '$' and handling empty strings
|
19 |
+
def clean_price(price):
|
20 |
+
if pd.isna(price) or price == '':
|
21 |
+
return None
|
22 |
+
return float(price.replace('$', ''))
|
23 |
+
|
24 |
+
# Handle language mapping for both string and list inputs
|
25 |
+
def map_languages(languages):
|
26 |
+
if isinstance(languages, float) and pd.isna(languages):
|
27 |
+
return None
|
28 |
+
# If it's already a list
|
29 |
+
if isinstance(languages, list):
|
30 |
+
return ', '.join([LANG_MAPPING.get(str(lang), str(lang)) for lang in languages])
|
31 |
+
# If it's a string
|
32 |
+
if isinstance(languages, str):
|
33 |
+
return ', '.join([LANG_MAPPING.get(lang.strip(), lang.strip()) for lang in languages.split(',')])
|
34 |
+
# If it's an array or any other type
|
35 |
+
try:
|
36 |
+
return ', '.join([str(lang) for lang in languages])
|
37 |
+
except:
|
38 |
+
return str(languages)
|
39 |
+
|
40 |
+
# Extract multimodality fields
|
41 |
+
def get_multimodality_field(model_data, field):
|
42 |
+
try:
|
43 |
+
return model_data.get('model_config', {}).get('multimodality', {}).get(field, False)
|
44 |
+
except:
|
45 |
+
return False
|
46 |
+
|
47 |
+
|
48 |
+
def merge_data():
|
49 |
+
|
50 |
+
mm_latency_df, mm_result_df, text_latency_df, text_result_df = fetch_version_metadata()
|
51 |
+
registry_data = fetch_registry_data()
|
52 |
+
with open(PRICING_PATH, 'r') as f:
|
53 |
+
pricing_data = json.load(f)
|
54 |
+
|
55 |
+
# Ensure the unnamed column is renamed to 'model'
|
56 |
+
mm_result_df.rename(columns={'Unnamed: 0': 'model', '-, clemscore': 'clemscore'}, inplace=True)
|
57 |
+
text_result_df.rename(columns={'Unnamed: 0': 'model', '-, clemscore': 'clemscore'}, inplace=True)
|
58 |
+
mm_result_df['model'] = mm_result_df['model'].str.split('-t0.0--').str[0]
|
59 |
+
text_result_df['model'] = text_result_df['model'].str.split('-t0.0--').str[0] # Bug in get_latency.py, split by -t0.0 instead of -t (gpt-3.5-turbo/gpt-4-turbo breaks)
|
60 |
+
|
61 |
+
# Merge datasets to compute average values
|
62 |
+
avg_latency_df = pd.concat([mm_latency_df, text_latency_df], axis=0).groupby('model')['latency'].mean().reset_index()
|
63 |
+
avg_clemscore_df = pd.concat([mm_result_df, text_result_df], axis=0).groupby('model')['clemscore'].mean().reset_index()
|
64 |
+
|
65 |
+
# Merge latency, clemscore, registry, and pricing data
|
66 |
+
lat_clem_df = pd.merge(avg_latency_df, avg_clemscore_df, on='model', how='outer')
|
67 |
+
|
68 |
+
# Convert registry_data to DataFrame for easier merging
|
69 |
+
registry_df = pd.DataFrame(registry_data)
|
70 |
+
|
71 |
+
# Extract license info
|
72 |
+
registry_df['license_name'] = registry_df['license'].apply(lambda x: x['name'])
|
73 |
+
registry_df['license_url'] = registry_df['license'].apply(lambda x: x['url'])
|
74 |
+
|
75 |
+
# Add individual multimodality columns
|
76 |
+
registry_df['single_image'] = registry_df.apply(lambda x: get_multimodality_field(x, 'single_image'), axis=1)
|
77 |
+
registry_df['multiple_images'] = registry_df.apply(lambda x: get_multimodality_field(x, 'multiple_images'), axis=1)
|
78 |
+
registry_df['audio'] = registry_df.apply(lambda x: get_multimodality_field(x, 'audio'), axis=1)
|
79 |
+
registry_df['video'] = registry_df.apply(lambda x: get_multimodality_field(x, 'video'), axis=1)
|
80 |
+
|
81 |
+
# Update columns list to include new multimodality fields
|
82 |
+
registry_df = registry_df[[
|
83 |
+
'model_name', 'parameters', 'release_date', 'open_weight',
|
84 |
+
'languages', 'context_size', 'license_name', 'license_url',
|
85 |
+
'single_image', 'multiple_images', 'audio', 'video'
|
86 |
+
]]
|
87 |
+
|
88 |
+
# Merge with previous data
|
89 |
+
merged_df = pd.merge(
|
90 |
+
lat_clem_df,
|
91 |
+
registry_df,
|
92 |
+
left_on='model',
|
93 |
+
right_on='model_name',
|
94 |
+
how='inner'
|
95 |
+
)
|
96 |
+
|
97 |
+
# Update column renaming
|
98 |
+
merged_df = merged_df.rename(columns={
|
99 |
+
'model': 'Model Name',
|
100 |
+
'latency': 'Latency (s)',
|
101 |
+
'clemscore': 'Clemscore',
|
102 |
+
'parameters': 'Parameters (B)',
|
103 |
+
'release_date': 'Release Date',
|
104 |
+
'open_weight': 'Open Weight',
|
105 |
+
'languages': 'Languages',
|
106 |
+
'context_size': 'Context Size (k)',
|
107 |
+
'license_name': 'License Name',
|
108 |
+
'license_url': 'License URL',
|
109 |
+
'single_image': 'Single Image',
|
110 |
+
'multiple_images': 'Multiple Images',
|
111 |
+
'audio': 'Audio',
|
112 |
+
'video': 'Video'
|
113 |
+
})
|
114 |
+
|
115 |
+
# Convert pricing_data list to DataFrame
|
116 |
+
pricing_df = pd.DataFrame(pricing_data)
|
117 |
+
pricing_df['input'] = pricing_df['input'].apply(clean_price)
|
118 |
+
pricing_df['output'] = pricing_df['output'].apply(clean_price)
|
119 |
+
|
120 |
+
# Merge pricing data with the existing dataframe
|
121 |
+
merged_df = pd.merge(
|
122 |
+
merged_df,
|
123 |
+
pricing_df,
|
124 |
+
left_on='Model Name',
|
125 |
+
right_on='model_id',
|
126 |
+
how='left'
|
127 |
+
)
|
128 |
+
|
129 |
+
# Drop duplicate model column and rename price columns
|
130 |
+
merged_df = merged_df.drop('model_id', axis=1)
|
131 |
+
merged_df = merged_df.rename(columns={
|
132 |
+
'input': 'Input $/1M tokens',
|
133 |
+
'output': 'Output $/1M tokens'
|
134 |
+
})
|
135 |
+
|
136 |
+
# Fill NaN values with 0.0 for pricing columns
|
137 |
+
merged_df['Input $/1M tokens'] = merged_df['Input $/1M tokens'].fillna(0.0)
|
138 |
+
merged_df['Output $/1M tokens'] = merged_df['Output $/1M tokens'].fillna(0.0)
|
139 |
+
|
140 |
+
merged_df['Parameters (B)'] = merged_df['Parameters (B)'].apply(convert_parameters)
|
141 |
+
|
142 |
+
merged_df['License'] = merged_df.apply(lambda row: f'<a href="{row["License URL"]}" style="color: blue;">{row["License Name"]}</a>', axis=1)
|
143 |
+
merged_df['Temp Date'] = merged_df['Release Date']
|
144 |
+
|
145 |
+
merged_df['Languages'] = merged_df['Languages'].apply(map_languages)
|
146 |
+
|
147 |
+
# merged_df['Context Size (k)'] = merged_df['Context Size (k)'].replace({'k': ''}, regex=True).astype(int)
|
148 |
+
|
149 |
+
merged_df.drop(columns=['model_name'], inplace=True)
|
150 |
+
|
151 |
+
return merged_df
|
152 |
+
|
153 |
+
if __name__=='__main__':
|
154 |
+
merged_df = merge_data()
|
155 |
+
# # Save to CSV
|
156 |
+
output_path = os.path.join('assets', 'merged_data.csv')
|
157 |
+
merged_df.to_csv(output_path, index=False)
|
src/results_1.6.5_ascii.csv
DELETED
@@ -1,19 +0,0 @@
|
|
1 |
-
,"-, clemscore","all, Average % Played","all, Average Quality Score","matchit_ascii, % Played","matchit_ascii, Quality Score","matchit_ascii, Quality Score (std)","referencegame, % Played","referencegame, Quality Score","referencegame, Quality Score (std)","textmapworld, % Played","textmapworld, Quality Score","textmapworld, Quality Score (std)","textmapworld_graphreasoning, % Played","textmapworld_graphreasoning, Quality Score","textmapworld_graphreasoning, Quality Score (std)","textmapworld_specificroom, % Played","textmapworld_specificroom, Quality Score","textmapworld_specificroom, Quality Score (std)"
|
2 |
-
Idefics3-8B-Llama3-t0.0--Idefics3-8B-Llama3-t0.0,22.56,40.0,56.39,100.0,70.0,46.41,100.0,42.78,49.61,0.0,,,0.0,,,0.0,,
|
3 |
-
InternVL2-26B-t0.0--InternVL2-26B-t0.0,32.27,51.2,63.03,100.0,65.0,48.3,100.0,43.89,49.76,16.0,51.55,16.62,0.0,,,40.0,91.67,28.87
|
4 |
-
InternVL2-40B-t0.0--InternVL2-40B-t0.0,33.2,50.67,65.52,100.0,70.0,46.41,93.33,48.21,50.12,10.0,70.55,19.72,0.0,,,50.0,73.33,45.77
|
5 |
-
InternVL2-8B-t0.0--InternVL2-8B-t0.0,36.05,48.67,74.07,100.0,70.0,46.41,100.0,52.22,50.09,0.0,,,0.0,,,43.33,100.0,0.0
|
6 |
-
InternVL2-Llama3-76B-t0.0--InternVL2-Llama3-76B-t0.0,43.29,60.27,71.82,100.0,55.0,50.38,100.0,61.11,48.89,28.0,71.17,7.4,0.0,,,73.33,100.0,0.0
|
7 |
-
Phi-3-vision-128k-instruct-t0.0--Phi-3-vision-128k-instruct-t0.0,22.07,29.61,74.53,67.5,81.48,39.58,73.89,42.11,49.56,0.0,,,0.0,,,6.67,100.0,0.0
|
8 |
-
Phi-3.5-vision-instruct-t0.0--Phi-3.5-vision-instruct-t0.0,26.95,41.73,64.58,100.0,55.0,50.38,83.33,42.67,49.62,12.0,60.63,21.21,0.0,,,13.33,100.0,0.0
|
9 |
-
Pixtral-12B-2409-t0.0--Pixtral-12B-2409-t0.0,37.57,59.63,63.01,97.5,69.23,46.76,100.0,41.11,49.34,34.0,53.61,17.0,6.67,56.67,4.71,60.0,94.44,23.57
|
10 |
-
claude-3-5-sonnet-20240620-t0.0--claude-3-5-sonnet-20240620-t0.0,90.56,100.0,90.56,100.0,92.5,26.67,100.0,91.11,28.54,100.0,86.26,12.12,100.0,82.91,10.88,100.0,100.0,0.0
|
11 |
-
claude-3-opus-20240229-t0.0--claude-3-opus-20240229-t0.0,74.99,100.0,74.99,100.0,85.0,36.16,100.0,29.44,45.71,100.0,83.83,14.64,100.0,76.69,12.8,100.0,100.0,0.0
|
12 |
-
gemini-1.5-flash-latest-t0.0--gemini-1.5-flash-latest-t0.0,47.88,62.97,76.03,97.5,76.92,42.68,100.0,61.11,48.89,64.0,66.08,16.46,0.0,,,53.33,100.0,0.0
|
13 |
-
gpt-4-1106-vision-preview-t0.0--gpt-4-1106-vision-preview-t0.0,68.14,99.33,68.6,100.0,72.5,45.22,100.0,29.44,45.71,100.0,73.62,14.33,100.0,67.46,15.11,96.67,100.0,0.0
|
14 |
-
gpt-4o-2024-05-13-t0.0--gpt-4o-2024-05-13-t0.0,82.72,96.67,85.57,100.0,97.5,15.81,100.0,90.0,30.08,90.0,74.25,12.12,96.67,66.12,12.83,96.67,100.0,0.0
|
15 |
-
gpt-4o-2024-08-06-t0.0--gpt-4o-2024-08-06-t0.0,80.96,98.67,82.05,100.0,82.5,38.48,100.0,87.78,32.85,100.0,72.84,10.76,100.0,67.15,12.41,93.33,100.0,0.0
|
16 |
-
gpt-4o-mini-2024-07-18-t0.0--gpt-4o-mini-2024-07-18-t0.0,63.87,85.76,74.48,100.0,87.5,33.49,99.44,73.74,44.13,66.0,63.7,16.72,96.67,47.46,15.37,66.67,100.0,0.0
|
17 |
-
idefics-80b-instruct-t0.0--idefics-80b-instruct-t0.0,19.73,46.5,42.44,80.0,37.5,49.19,100.0,31.11,46.42,6.0,58.71,10.65,,,,0.0,,
|
18 |
-
idefics-9b-instruct-t0.0--idefics-9b-instruct-t0.0,7.66,22.56,33.97,100.0,37.5,49.03,12.78,30.43,47.05,0.0,,,0.0,,,0.0,,
|
19 |
-
internlm-xcomposer2d5-7b-t0.0--internlm-xcomposer2d5-7b-t0.0,19.69,25.47,77.32,100.0,62.5,49.03,0.0,,,4.0,69.47,13.4,0.0,,,23.33,100.0,0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/results_1.6.5_multimodal.csv
DELETED
@@ -1,20 +0,0 @@
|
|
1 |
-
,"-, clemscore","all, Average % Played","all, Average Quality Score","matchit, % Played","matchit, Quality Score","matchit, Quality Score (std)","mm_mapworld, % Played","mm_mapworld, Quality Score","mm_mapworld, Quality Score (std)","mm_mapworld_graphs, % Played","mm_mapworld_graphs, Quality Score","mm_mapworld_graphs, Quality Score (std)","mm_mapworld_specificroom, % Played","mm_mapworld_specificroom, Quality Score","mm_mapworld_specificroom, Quality Score (std)","multimodal_referencegame, % Played","multimodal_referencegame, Quality Score","multimodal_referencegame, Quality Score (std)"
|
2 |
-
Idefics3-8B-Llama3-t0.0--Idefics3-8B-Llama3-t0.0,17.52,32.59,53.76,40.0,79.17,41.49,14.0,4.76,12.6,0.0,,,10.0,100.0,0.0,98.97,31.09,46.35
|
3 |
-
InternVL2-26B-t0.0--InternVL2-26B-t0.0,37.45,66.76,56.09,100.0,93.33,25.15,52.0,58.47,20.73,16.67,69.33,16.91,80.0,25.0,44.23,85.13,34.34,47.56
|
4 |
-
InternVL2-40B-t0.0--InternVL2-40B-t0.0,32.23,56.27,57.28,96.67,79.31,40.86,28.0,23.29,35.09,33.33,76.2,20.56,23.33,71.43,48.8,100.0,36.15,48.11
|
5 |
-
InternVL2-8B-t0.0--InternVL2-8B-t0.0,23.17,46.61,49.7,100.0,68.33,46.91,0.0,,,3.33,85.71,,43.33,7.69,27.74,86.41,37.09,48.38
|
6 |
-
InternVL2-Llama3-76B-t0.0--InternVL2-Llama3-76B-t0.0,33.84,54.8,61.76,100.0,90.0,30.25,34.0,57.15,18.59,3.33,54.55,,36.67,72.73,46.71,100.0,34.36,47.55
|
7 |
-
Phi-3-vision-128k-instruct-t0.0--Phi-3-vision-128k-instruct-t0.0,3.34,5.06,65.98,0.0,,,4.0,45.0,7.07,3.33,52.94,,0.0,,,17.95,100.0,0.0
|
8 |
-
Phi-3.5-vision-instruct-t0.0--Phi-3.5-vision-instruct-t0.0,15.64,40.67,38.46,100.0,0.0,0.0,0.0,,,3.33,100.0,,0.0,,,100.0,15.38,36.13
|
9 |
-
Pixtral-12B-2409-t0.0--Pixtral-12B-2409-t0.0,28.64,49.98,57.3,100.0,63.33,48.6,24.0,58.01,29.16,3.33,66.67,,43.33,53.85,51.89,79.23,44.66,49.79
|
10 |
-
claude-3-5-sonnet-20240620-t0.0--claude-3-5-sonnet-20240620-t0.0,80.77,95.33,84.73,100.0,85.0,36.01,100.0,82.41,11.7,76.67,85.23,15.36,100.0,90.0,30.51,100.0,81.03,39.26
|
11 |
-
claude-3-opus-20240229-t0.0--claude-3-opus-20240229-t0.0,68.16,99.33,68.62,100.0,81.67,39.02,100.0,75.79,14.43,96.67,85.12,13.27,100.0,53.33,50.74,100.0,47.18,49.98
|
12 |
-
dolphin-vision-72b-t0.0--dolphin-vision-72b-t0.0,4.65,7.88,58.95,16.67,90.0,31.62,4.0,60.61,8.57,3.33,0.0,,6.67,100.0,0.0,8.72,44.12,50.4
|
13 |
-
gemini-1.5-flash-latest-t0.0--gemini-1.5-flash-latest-t0.0,47.73,85.0,56.15,85.0,84.31,36.73,100.0,60.05,20.46,46.67,62.72,13.21,93.33,32.14,47.56,100.0,41.54,49.34
|
14 |
-
gpt-4-1106-vision-preview-t0.0--gpt-4-1106-vision-preview-t0.0,73.55,97.79,75.21,100.0,80.0,40.34,100.0,73.74,13.24,90.0,77.25,10.74,100.0,76.67,43.02,98.97,68.39,46.55
|
15 |
-
gpt-4o-2024-05-13-t0.0--gpt-4o-2024-05-13-t0.0,69.56,87.73,79.29,100.0,78.33,41.55,52.0,73.58,12.43,90.0,76.06,16.67,96.67,93.1,25.79,100.0,75.38,43.13
|
16 |
-
gpt-4o-2024-08-06-t0.0--gpt-4o-2024-08-06-t0.0,80.04,96.93,82.57,93.33,80.36,40.09,98.0,81.59,12.0,96.67,82.93,11.51,96.67,93.1,25.79,100.0,74.87,43.43
|
17 |
-
gpt-4o-mini-2024-07-18-t0.0--gpt-4o-mini-2024-07-18-t0.0,58.46,90.04,64.93,100.0,86.67,34.28,92.0,64.65,16.71,73.33,59.93,16.17,86.67,65.38,48.52,98.21,48.04,50.03
|
18 |
-
idefics-80b-instruct-t0.0--idefics-80b-instruct-t0.0,29.55,58.29,50.7,88.14,55.77,50.15,20.0,32.78,29.72,50.0,81.36,,33.33,50.0,52.7,100.0,33.59,47.29
|
19 |
-
idefics-9b-instruct-t0.0--idefics-9b-instruct-t0.0,12.29,38.0,32.34,100.0,33.33,47.54,0.0,,,0.0,,,0.0,,,90.0,31.34,46.45
|
20 |
-
internlm-xcomposer2d5-7b-t0.0--internlm-xcomposer2d5-7b-t0.0,16.95,20.18,83.98,98.33,77.97,41.8,0.0,,,0.0,,,0.0,,,2.56,90.0,31.62
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/results_1.6.csv
DELETED
@@ -1,69 +0,0 @@
|
|
1 |
-
,"-, clemscore","all, Average % Played","all, Average Quality Score","imagegame, % Played","imagegame, Quality Score","imagegame, Quality Score (std)","privateshared, % Played","privateshared, Quality Score","privateshared, Quality Score (std)","referencegame, % Played","referencegame, Quality Score","referencegame, Quality Score (std)","taboo, % Played","taboo, Quality Score","taboo, Quality Score (std)","wordle, % Played","wordle, Quality Score","wordle, Quality Score (std)","wordle_withclue, % Played","wordle_withclue, Quality Score","wordle_withclue, Quality Score (std)","wordle_withcritic, % Played","wordle_withcritic, Quality Score","wordle_withcritic, Quality Score (std)"
|
2 |
-
CodeLlama-34b-Instruct-hf-t0.0--CodeLlama-34b-Instruct-hf-t0.0,14.35,33.57,42.76,0.0,,,0.0,,,100.0,94.44,22.97,51.67,51.61,50.8,56.67,0.0,0.0,26.67,25.0,46.29,0.0,,
|
3 |
-
Meta-Llama-3-70B-Instruct-hf-t0.0--Meta-Llama-3-70B-Instruct-hf-t0.0,35.11,80.72,43.5,0.0,,,100.0,84.37,13.69,100.0,64.44,48.0,91.67,70.3,39.37,90.0,1.85,6.81,96.67,14.37,32.34,86.67,25.64,39.08
|
4 |
-
Meta-Llama-3-8B-Instruct-hf-t0.0--Meta-Llama-3-8B-Instruct-hf-t0.0,19.99,76.1,26.27,0.0,,,96.0,58.91,30.05,100.0,46.11,49.99,100.0,37.78,45.08,86.67,0.0,0.0,83.33,14.0,33.91,66.67,0.83,3.73
|
5 |
-
Meta-Llama-3.1-405B-Instruct-Turbo-t0.0--Meta-Llama-3.1-405B-Instruct-Turbo-t0.0,52.11,90.12,57.82,62.5,94.12,12.55,100.0,84.24,29.65,100.0,80.0,40.11,95.0,76.61,36.03,93.33,7.14,12.72,93.33,29.7,34.87,86.67,32.95,35.63
|
6 |
-
Meta-Llama-3.1-70B-Instruct-t0.0--Meta-Llama-3.1-70B-Instruct-t0.0,38.83,82.14,47.27,0.0,,,100.0,85.69,13.94,100.0,72.78,44.63,88.33,75.16,34.69,93.33,1.43,5.25,93.33,19.29,33.55,100.0,29.28,36.5
|
7 |
-
Meta-Llama-3.1-8B-Instruct-t0.0--Meta-Llama-3.1-8B-Instruct-t0.0,18.36,72.91,25.18,0.0,,,82.0,29.31,25.61,100.0,47.78,50.09,88.33,50.0,47.7,96.67,0.0,0.0,76.67,12.32,29.82,66.67,11.67,31.11
|
8 |
-
Mistral-7B-Instruct-v0.1-t0.0--Mistral-7B-Instruct-v0.1-t0.0,8.01,37.14,21.58,0.0,,,20.0,1.21,2.58,100.0,55.0,49.89,100.0,31.67,45.07,0.0,,,23.33,0.0,0.0,16.67,20.0,44.72
|
9 |
-
Mistral-7B-Instruct-v0.2-t0.0--Mistral-7B-Instruct-v0.2-t0.0,9.75,36.91,26.42,0.0,,,0.0,,,100.0,38.33,48.76,65.0,0.0,0.0,50.0,0.0,0.0,26.67,43.75,49.55,16.67,50.0,50.0
|
10 |
-
Mistral-Large-Instruct-2407-t0.0--Mistral-Large-Instruct-2407-t0.0,45.39,82.21,55.21,7.5,100.0,0.0,78.0,81.39,13.12,100.0,81.11,39.25,96.67,71.84,36.57,100.0,0.67,3.65,93.33,21.73,32.97,100.0,29.72,41.79
|
11 |
-
Mixtral-8x22B-Instruct-v0.1-t0.0--Mixtral-8x22B-Instruct-v0.1-t0.0,12.69,52.14,24.33,0.0,,,0.0,,,100.0,36.67,48.32,58.33,40.0,49.71,96.67,0.0,0.0,60.0,15.0,33.3,50.0,30.0,41.4
|
12 |
-
Mixtral-8x7B-Instruct-v0.1-t0.0--Mixtral-8x7B-Instruct-v0.1-t0.0,8.17,47.62,17.15,0.0,,,0.0,,,61.67,41.44,49.49,51.67,9.68,30.05,96.67,0.0,0.0,76.67,19.13,35.28,46.67,15.48,36.08
|
13 |
-
Nous-Hermes-2-Mixtral-8x7B-SFT-t0.0--Nous-Hermes-2-Mixtral-8x7B-SFT-t0.0,11.95,39.68,30.12,0.0,,,0.0,,,97.78,36.93,48.4,93.33,47.92,47.36,0.0,,,53.33,15.62,30.1,33.33,20.0,42.16
|
14 |
-
Phi-3-mini-128k-instruct-t0.0--Phi-3-mini-128k-instruct-t0.0,6.33,34.52,18.34,0.0,,,0.0,,,100.0,36.67,48.32,98.33,0.0,0.0,0.0,,,33.33,20.0,42.16,10.0,16.67,28.87
|
15 |
-
Qwen1.5-0.5B-Chat-t0.0--Qwen1.5-0.5B-Chat-t0.0,0.12,25.72,0.48,0.0,,,0.0,,,0.0,,,86.67,1.92,13.87,46.67,0.0,0.0,40.0,0.0,0.0,6.67,0.0,0.0
|
16 |
-
Qwen1.5-1.8B-Chat-t0.0--Qwen1.5-1.8B-Chat-t0.0,0.0,15.24,0.0,0.0,,,0.0,,,0.0,,,93.33,0.0,0.0,0.0,,,10.0,0.0,0.0,3.33,0.0,
|
17 |
-
Qwen1.5-14B-Chat-t0.0--Qwen1.5-14B-Chat-t0.0,16.8,40.95,41.02,30.0,20.58,14.69,0.0,,,100.0,44.44,49.83,46.67,41.07,47.25,90.0,0.0,0.0,16.67,40.0,54.77,3.33,100.0,
|
18 |
-
Qwen1.5-32B-Chat-t0.0--Qwen1.5-32B-Chat-t0.0,15.41,63.69,24.19,67.5,42.15,29.29,20.0,35.52,9.63,100.0,12.78,33.48,61.67,42.79,47.39,93.33,0.0,0.0,60.0,16.85,33.34,43.33,19.23,38.4
|
19 |
-
Qwen1.5-72B-Chat-t0.0--Qwen1.5-72B-Chat-t0.0,30.37,80.05,37.94,65.0,50.0,25.53,92.0,52.87,20.39,100.0,37.22,48.47,73.33,73.11,43.02,96.67,0.69,3.71,90.0,20.93,39.03,43.33,30.77,48.04
|
20 |
-
Qwen1.5-7B-Chat-t0.0--Qwen1.5-7B-Chat-t0.0,2.58,30.24,8.53,0.0,,,0.0,,,100.0,20.56,40.52,98.33,13.56,33.26,0.0,,,10.0,0.0,0.0,3.33,0.0,
|
21 |
-
Qwen2-72B-Instruct-t0.0--Qwen2-72B-Instruct-t0.0,30.03,74.52,40.3,0.0,,,80.0,65.69,22.85,100.0,67.22,47.07,91.67,70.61,40.31,100.0,2.67,10.48,86.67,12.82,29.56,63.33,22.81,33.31
|
22 |
-
Qwen2-7B-Instruct-t0.0--Qwen2-7B-Instruct-t0.0,6.18,35.32,17.51,5.0,23.0,1.41,0.0,,,98.89,41.01,49.32,86.67,41.03,46.14,26.67,0.0,0.0,26.67,0.0,0.0,3.33,0.0,
|
23 |
-
SUS-Chat-34B-t0.0--SUS-Chat-34B-t0.0,14.11,54.4,25.93,2.5,29.0,,20.0,0.0,0.0,100.0,70.0,45.95,98.33,52.26,45.64,93.33,0.0,0.0,43.33,23.08,43.85,23.33,7.14,18.9
|
24 |
-
Starling-LM-7B-beta-t0.0--Starling-LM-7B-beta-t0.0,6.56,30.89,21.25,0.0,,,4.0,97.12,4.08,62.22,30.36,46.19,46.67,0.0,0.0,66.67,0.0,0.0,33.33,0.0,0.0,3.33,0.0,
|
25 |
-
WizardLM-13b-v1.2-t0.0--WizardLM-13b-v1.2-t0.0,11.48,39.57,29.0,0.0,,,42.0,10.11,21.96,100.0,71.11,45.45,35.0,64.29,45.12,26.67,0.0,0.0,53.33,6.25,25.0,20.0,22.22,40.37
|
26 |
-
WizardLM-70b-v1.0-t0.0--WizardLM-70b-v1.0-t0.0,17.4,46.19,37.66,0.0,,,0.0,,,100.0,81.67,38.8,56.67,70.59,44.58,73.33,0.0,0.0,56.67,17.84,34.09,36.67,18.18,40.45
|
27 |
-
Yi-1.5-34B-Chat-t0.0--Yi-1.5-34B-Chat-t0.0,7.67,52.38,14.65,0.0,,,0.0,,,100.0,43.33,49.69,66.67,0.0,0.0,96.67,0.0,0.0,70.0,18.25,36.48,33.33,11.67,31.48
|
28 |
-
Yi-1.5-6B-Chat-t0.0--Yi-1.5-6B-Chat-t0.0,6.73,41.43,16.25,0.0,,,0.0,,,88.33,34.59,47.72,65.0,0.0,0.0,86.67,0.0,0.0,33.33,20.0,42.16,16.67,26.67,43.46
|
29 |
-
Yi-1.5-9B-Chat-t0.0--Yi-1.5-9B-Chat-t0.0,4.37,38.1,11.48,0.0,,,0.0,,,51.67,41.94,49.61,41.67,0.0,0.0,86.67,0.0,0.0,46.67,7.14,26.73,40.0,8.33,28.87
|
30 |
-
Yi-34B-Chat-t0.0--Yi-34B-Chat-t0.0,8.27,40.86,20.25,35.0,9.07,10.84,26.0,8.02,17.17,3.33,33.33,51.64,68.33,41.46,49.88,83.33,0.0,0.0,43.33,26.92,43.85,26.67,22.92,36.66
|
31 |
-
aya-23-35B-t0.0--aya-23-35B-t0.0,13.35,47.9,27.88,0.0,,,82.0,31.48,15.69,100.0,42.78,49.61,90.0,40.43,45.63,0.0,,,33.33,19.17,31.93,30.0,5.56,16.67
|
32 |
-
aya-23-8B-t0.0--aya-23-8B-t0.0,11.72,45.24,25.9,0.0,,,50.0,35.71,33.91,100.0,35.0,47.83,100.0,22.22,40.44,40.0,0.0,0.0,13.33,50.0,57.74,13.33,12.5,25.0
|
33 |
-
claude-2.1-t0.0--claude-2.1-t0.0,32.5,82.14,39.57,0.0,,,100.0,74.92,26.26,100.0,50.56,50.14,95.0,64.91,45.93,96.67,7.59,21.16,86.67,21.6,39.58,96.67,17.82,35.34
|
34 |
-
claude-3-5-sonnet-20240620-t0.0--claude-3-5-sonnet-20240620-t0.0,57.08,89.64,63.68,97.5,97.1,10.08,100.0,89.57,10.67,100.0,91.11,28.54,33.33,72.5,28.75,100.0,15.0,23.45,100.0,41.44,34.92,96.67,39.02,34.81
|
35 |
-
claude-3-haiku-20240307-t0.0--claude-3-haiku-20240307-t0.0,22.49,79.52,28.28,0.0,,,100.0,50.46,34.83,100.0,17.22,37.86,63.33,78.95,32.11,100.0,0.0,0.0,100.0,8.44,21.27,93.33,14.58,31.64
|
36 |
-
claude-3-opus-20240229-t0.0--claude-3-opus-20240229-t0.0,42.42,83.1,51.05,0.0,,,100.0,95.32,6.4,100.0,29.44,45.71,88.33,83.65,32.11,100.0,20.0,28.65,96.67,46.09,38.59,96.67,31.78,35.15
|
37 |
-
claude-3-sonnet-20240229-t0.0--claude-3-sonnet-20240229-t0.0,30.53,85.24,35.82,0.0,,,100.0,60.81,25.28,100.0,27.22,44.63,100.0,73.61,36.73,100.0,10.67,23.33,100.0,20.5,33.65,96.67,22.13,33.35
|
38 |
-
codegemma-7b-it-t0.0--codegemma-7b-it-t0.0,15.3,51.95,29.45,0.0,,,42.0,0.0,0.0,81.67,96.6,18.19,83.33,26.0,44.31,96.67,0.0,0.0,43.33,14.1,30.31,16.67,40.0,54.77
|
39 |
-
command-r-plus-t0.0--command-r-plus-t0.0,24.94,74.9,33.3,0.0,,,,,,99.44,47.49,50.08,63.33,67.11,45.44,100.0,7.33,19.82,93.33,26.79,37.91,93.33,17.8,32.58
|
40 |
-
command-r-t0.0--command-r-t0.0,14.15,61.67,22.95,0.0,,,,,,100.0,23.33,42.41,63.33,44.74,47.63,93.33,0.0,0.0,66.67,30.0,44.13,46.67,16.67,36.4
|
41 |
-
dolphin-2.5-mixtral-8x7b-t0.0--dolphin-2.5-mixtral-8x7b-t0.0,15.1,46.38,32.55,0.0,,,48.0,58.95,25.96,100.0,35.0,47.83,100.0,41.11,46.79,0.0,,,43.33,7.69,27.74,33.33,20.0,42.16
|
42 |
-
gemini-1.0-pro-t0.0--gemini-1.0-pro-t0.0,26.95,80.14,33.63,30.0,49.08,26.5,76.0,63.7,19.97,100.0,46.11,49.99,85.0,55.23,44.53,90.0,0.74,3.85,86.67,12.82,32.76,93.33,7.74,21.98
|
43 |
-
gemini-1.5-flash-latest-t0.0--gemini-1.5-flash-latest-t0.0,32.0,76.14,42.03,0.0,,,98.0,78.18,20.17,100.0,61.11,48.89,91.67,57.88,43.61,96.67,0.69,3.71,66.67,33.33,38.9,80.0,20.97,31.07
|
44 |
-
gemini-1.5-pro-latest-t0.0--gemini-1.5-pro-latest-t0.0,41.9,81.29,51.55,0.0,,,94.0,88.7,10.41,100.0,65.0,47.83,85.0,70.59,35.84,100.0,10.67,22.43,93.33,41.37,39.25,96.67,32.99,35.32
|
45 |
-
gemma-1.1-2b-it-t0.0--gemma-1.1-2b-it-t0.0,2.91,22.62,12.87,0.0,,,0.0,,,100.0,20.0,40.11,45.0,14.81,36.2,0.0,,,6.67,0.0,0.0,6.67,16.66,23.57
|
46 |
-
gemma-1.1-7b-it-t0.0--gemma-1.1-7b-it-t0.0,14.14,49.67,28.46,0.0,,,6.0,10.83,10.1,100.0,92.22,26.86,35.0,52.38,51.18,73.33,0.0,0.0,76.67,6.52,22.88,56.67,8.82,26.43
|
47 |
-
gemma-2-27b-it-t0.0--gemma-2-27b-it-t0.0,3.51,11.9,29.51,0.0,,,0.0,,,75.0,38.52,48.85,5.0,0.0,0.0,0.0,,,0.0,,,3.33,50.0,
|
48 |
-
gemma-2-2b-it-t0.0--gemma-2-2b-it-t0.0,2.67,38.33,6.96,0.0,,,0.0,,,0.0,,,98.33,0.0,0.0,100.0,0.0,0.0,46.67,10.71,28.95,23.33,17.14,37.29
|
49 |
-
gemma-2-9b-it-t0.0--gemma-2-9b-it-t0.0,27.34,75.48,36.22,0.0,,,70.0,53.52,40.57,100.0,42.22,49.53,78.33,77.66,38.27,100.0,1.67,9.13,93.33,17.26,33.48,86.67,25.0,43.01
|
50 |
-
gemma-7b-it-t0.0--gemma-7b-it-t0.0,1.82,17.78,10.23,0.0,,,0.0,,,97.78,40.91,49.31,0.0,,,3.33,0.0,,3.33,0.0,,20.0,0.0,0.0
|
51 |
-
gpt-3.5-turbo-0125-t0.0--gpt-3.5-turbo-0125-t0.0,27.22,89.67,30.36,70.0,64.18,29.33,96.0,36.7,31.04,100.0,3.33,18.0,68.33,73.17,41.98,100.0,0.0,0.0,96.67,24.25,40.95,96.67,10.92,27.56
|
52 |
-
gpt-4-0125-preview-t0.0--gpt-4-0125-preview-t0.0,52.5,94.92,55.31,100.0,99.6,1.53,100.0,90.22,6.92,99.44,31.84,46.72,75.0,93.33,20.23,100.0,20.67,27.66,100.0,33.17,42.87,90.0,18.33,32.69
|
53 |
-
gpt-4-0613-t0.0--gpt-4-0613-t0.0,51.09,94.88,53.85,77.5,98.19,10.06,100.0,97.33,4.12,100.0,35.56,48.0,86.67,79.81,33.22,100.0,9.0,25.78,100.0,36.78,40.4,100.0,20.28,29.17
|
54 |
-
gpt-4-1106-preview-t0.0--gpt-4-1106-preview-t0.0,51.99,98.1,53.0,95.0,94.34,10.24,100.0,87.08,10.69,100.0,29.44,45.71,91.67,83.94,29.57,100.0,13.0,27.56,100.0,29.0,39.53,100.0,34.22,39.55
|
55 |
-
gpt-4-turbo-2024-04-09-t0.0--gpt-4-turbo-2024-04-09-t0.0,58.3,94.88,61.45,82.5,99.79,1.22,100.0,92.68,9.45,100.0,88.89,31.51,85.0,82.35,30.81,100.0,16.33,31.35,100.0,29.89,39.18,96.67,20.23,28.21
|
56 |
-
gpt-4o-2024-05-13-t0.0--gpt-4o-2024-05-13-t0.0,48.34,85.71,56.4,0.0,,,100.0,94.66,5.56,100.0,90.0,30.08,100.0,75.28,35.19,100.0,19.33,28.52,100.0,28.0,36.38,100.0,31.11,33.14
|
57 |
-
gpt-4o-2024-08-06-t0.0--gpt-4o-2024-08-06-t0.0,47.71,85.71,55.66,0.0,,,100.0,90.36,8.32,100.0,87.78,32.85,100.0,85.28,25.51,100.0,23.0,30.53,100.0,23.94,34.28,100.0,23.61,30.72
|
58 |
-
gpt-4o-mini-2024-07-18-t0.0--gpt-4o-mini-2024-07-18-t0.0,34.64,85.06,40.73,0.0,,,96.0,59.27,19.82,99.44,73.74,44.13,100.0,69.72,40.18,100.0,10.33,23.56,100.0,15.67,31.78,100.0,15.67,31.17
|
59 |
-
llama-2-70b-chat-hf-t0.0--llama-2-70b-chat-hf-t0.0,0.81,7.14,11.31,0.0,,,0.0,,,46.67,22.62,42.09,0.0,,,0.0,,,3.33,0.0,,0.0,,
|
60 |
-
mistral-large-2402-t0.0--mistral-large-2402-t0.0,28.17,66.86,42.14,0.0,,,98.0,77.07,27.28,100.0,25.0,43.42,60.0,88.89,31.87,63.33,5.26,22.94,83.33,26.8,36.21,63.33,29.82,40.57
|
61 |
-
mistral-medium-2312-t0.0--mistral-medium-2312-t0.0,16.43,49.25,33.36,0.0,,,22.0,15.28,24.12,76.11,48.91,50.17,30.0,88.89,32.34,80.0,0.0,0.0,83.33,26.8,42.67,53.33,20.31,40.02
|
62 |
-
openchat-3.5-0106-t0.0--openchat-3.5-0106-t0.0,17.1,52.57,32.52,35.0,0.86,3.21,98.0,56.86,23.59,100.0,93.33,25.01,65.0,64.1,48.6,10.0,0.0,0.0,40.0,12.5,31.08,20.0,0.0,0.0
|
63 |
-
openchat-3.5-1210-t0.0--openchat-3.5-1210-t0.0,18.22,51.19,35.6,15.0,3.17,7.76,80.0,60.82,24.23,100.0,90.56,29.33,40.0,66.67,48.15,46.67,0.0,0.0,53.33,20.83,40.14,23.33,7.14,18.9
|
64 |
-
openchat_3.5-t0.0--openchat_3.5-t0.0,23.64,63.52,37.22,50.0,8.7,11.65,38.0,73.36,22.12,100.0,73.89,44.05,100.0,45.0,49.32,90.0,0.0,0.0,36.67,15.15,31.14,30.0,44.44,52.7
|
65 |
-
sheep-duck-llama-2-13b-t0.0--sheep-duck-llama-2-13b-t0.0,5.39,31.9,16.9,0.0,,,0.0,,,96.67,1.72,13.05,83.33,4.0,19.79,0.0,,,23.33,28.57,48.8,20.0,33.33,51.64
|
66 |
-
sheep-duck-llama-2-70b-v1.1-t0.0--sheep-duck-llama-2-70b-v1.1-t0.0,21.5,41.19,52.2,0.0,,,0.0,,,100.0,83.33,37.37,55.0,90.91,29.19,60.0,0.0,0.0,43.33,42.31,44.94,30.0,44.44,46.4
|
67 |
-
tulu-2-dpo-70b-t0.0--tulu-2-dpo-70b-t0.0,12.62,49.76,25.37,0.0,,,0.0,,,100.0,16.67,37.37,68.33,68.29,47.11,80.0,0.0,0.0,53.33,16.88,29.83,46.67,25.0,42.74
|
68 |
-
vicuna-13b-v1.5-t0.0--vicuna-13b-v1.5-t0.0,7.01,39.52,17.73,0.0,,,20.0,20.27,8.84,100.0,0.0,0.0,46.67,60.71,49.73,53.33,0.0,0.0,36.67,21.21,40.2,20.0,4.17,10.21
|
69 |
-
vicuna-33b-v1.3-t0.0--vicuna-33b-v1.3-t0.0,11.27,23.81,47.32,0.0,,,0.0,,,100.0,0.0,0.0,46.67,89.29,31.5,0.0,,,10.0,16.67,28.87,10.0,83.33,28.87
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/v1.6.5_ascii_latency.csv
DELETED
@@ -1,19 +0,0 @@
|
|
1 |
-
model,latency
|
2 |
-
InternVL2-Llama3-76B,3.114070534839201
|
3 |
-
InternVL2-26B,1.0386292812723366
|
4 |
-
gpt-4o-2024-05-13,1.0694715724240909
|
5 |
-
claude-3-5-sonnet-20240620,2.1419643781420827
|
6 |
-
idefics-9b-instruct,1.4799789114093953
|
7 |
-
claude-3-opus-20240229,2.6997723769014104
|
8 |
-
gpt-4o-mini-2024-07-18,0.6847286470238105
|
9 |
-
gpt-4-1106-vision-preview,1.9390430972762676
|
10 |
-
Phi-3.5-vision-instruct,0.9667233222621183
|
11 |
-
InternVL2-40B,1.5607129299048943
|
12 |
-
gpt-4o-2024-08-06,0.8286696862643078
|
13 |
-
idefics-80b-instruct,9.909203573061212
|
14 |
-
InternVL2-8B,0.5617990863945588
|
15 |
-
gemini-1.5-flash-latest,13.396903850443566
|
16 |
-
Idefics3-8B-Llama3,0.5553913162090334
|
17 |
-
Phi-3-vision-128k-instruct,1.1679793685649222
|
18 |
-
Pixtral-12B-2409,0.3775700848214281
|
19 |
-
internlm-xcomposer2d5-7b,6.095479326235735
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/v1.6.5_multimodal_latency.csv
DELETED
@@ -1,20 +0,0 @@
|
|
1 |
-
model,latency
|
2 |
-
InternVL2-Llama3-76B,10.660117299385437
|
3 |
-
InternVL2-26B,4.239272214812449
|
4 |
-
dolphin-vision-72b,10.19095800373974
|
5 |
-
gpt-4o-2024-05-13,9.488193374830397
|
6 |
-
claude-3-5-sonnet-20240620,3.2490840805235996
|
7 |
-
idefics-9b-instruct,4.156911970172689
|
8 |
-
claude-3-opus-20240229,4.8763568649807185
|
9 |
-
gpt-4o-mini-2024-07-18,3.638671743317612
|
10 |
-
gpt-4-1106-vision-preview,4.712557435752083
|
11 |
-
Phi-3.5-vision-instruct,1.5404880504707106
|
12 |
-
InternVL2-40B,6.267102418391499
|
13 |
-
gpt-4o-2024-08-06,3.3857084617187416
|
14 |
-
idefics-80b-instruct,6.808930391550246
|
15 |
-
InternVL2-8B,1.9486003278511734
|
16 |
-
gemini-1.5-flash-latest,28.203669643584554
|
17 |
-
Idefics3-8B-Llama3,2.7247848158020056
|
18 |
-
Phi-3-vision-128k-instruct,1.3368420310828857
|
19 |
-
Pixtral-12B-2409,1.4976731684122302
|
20 |
-
internlm-xcomposer2d5-7b,8.438096179522184
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/v1.6_latency.csv
DELETED
@@ -1,69 +0,0 @@
|
|
1 |
-
model,latency
|
2 |
-
Phi-3-mini-128k-instruct,0.6615315832127351
|
3 |
-
Meta-Llama-3.1-70B-Instruct,0.8105055275945272
|
4 |
-
Meta-Llama-3.1-405B-Instruct-Turbo,0.7886103946545832
|
5 |
-
mistral-medium-2312,3.3167870515212137
|
6 |
-
gemma-2-2b-it,0.3139821517919896
|
7 |
-
Meta-Llama-3-8B-Instruct-hf,2.116447543256806
|
8 |
-
Qwen1.5-1.8B-Chat,0.5502705450000004
|
9 |
-
CodeLlama-34b-Instruct-hf,3.851887315425931
|
10 |
-
Qwen2-72B-Instruct,0.9480584860151365
|
11 |
-
Meta-Llama-3.1-8B-Instruct,0.20630574840608434
|
12 |
-
gemma-1.1-7b-it,0.1782953878345513
|
13 |
-
gemma-2-27b-it,0.9922771009345804
|
14 |
-
openchat-3.5-1210,0.28049827691029705
|
15 |
-
codegemma-7b-it,0.30489740508652785
|
16 |
-
dolphin-2.5-mixtral-8x7b,1.100729847312237
|
17 |
-
sheep-duck-llama-2-70b-v1.1,5.524607914346896
|
18 |
-
gpt-4-0613,0.6484411465828742
|
19 |
-
Meta-Llama-3-70B-Instruct-hf,3.348256158662245
|
20 |
-
openchat_3.5,0.31728768684620623
|
21 |
-
Yi-34B-Chat,1.2871676207135385
|
22 |
-
gpt-4o-2024-05-13,0.5570990732389735
|
23 |
-
openchat-3.5-0106,0.2920951450556654
|
24 |
-
Mixtral-8x7B-Instruct-v0.1,0.9392967660636317
|
25 |
-
gpt-4,0.9930876319528372
|
26 |
-
Qwen1.5-14B-Chat,0.37289333481152975
|
27 |
-
mistral-large-2402,0.3967416598893948
|
28 |
-
claude-3-5-sonnet-20240620,0.879929281888541
|
29 |
-
gemma-7b-it,0.6112263564356414
|
30 |
-
gpt-4-0125-preview,1.0418927523113601
|
31 |
-
gpt-4-1106-preview,0.7767265743542753
|
32 |
-
gemini-1.5-pro-latest,82.90283880578006
|
33 |
-
Nous-Hermes-2-Mixtral-8x7B-SFT,0.860892263303385
|
34 |
-
claude-2.1,1.6836316221022491
|
35 |
-
gemma-1.1-2b-it,0.11925699461279458
|
36 |
-
claude-3-opus-20240229,2.955845827917778
|
37 |
-
command-r,0.18832414914586068
|
38 |
-
gpt-4o-mini-2024-07-18,0.5342684150088973
|
39 |
-
sheep-duck-llama-2-13b,2.9462099794520453
|
40 |
-
SUS-Chat-34B,2.27951476106911
|
41 |
-
claude-3-haiku-20240307,0.869549739619107
|
42 |
-
gpt-4o-2024-08-06,0.5169587531894035
|
43 |
-
Yi-1.5-34B-Chat,9.232551962889337
|
44 |
-
Qwen2-7B-Instruct,0.3589407217948713
|
45 |
-
Mixtral-8x22B-Instruct-v0.1,1.0759354563573875
|
46 |
-
vicuna-13b-v1.5,1.4753938719676492
|
47 |
-
gemma-2-9b-it,0.3692553324432539
|
48 |
-
Mistral-7B-Instruct-v0.2,0.7635151196709047
|
49 |
-
Qwen1.5-32B-Chat,0.5092292557397938
|
50 |
-
Qwen1.5-0.5B-Chat,0.173469139375476
|
51 |
-
Starling-LM-7B-beta,1.365002297029707
|
52 |
-
gemini-1.0-pro,0.6346876567117742
|
53 |
-
gemini-1.5-flash-latest,37.2042672100488
|
54 |
-
aya-23-35B,0.5755088395104249
|
55 |
-
llama-2-70b-chat-hf,4.724659620079609
|
56 |
-
Yi-1.5-9B-Chat,8.02352422018858
|
57 |
-
WizardLM-70b-v1.0,3.9249772038834863
|
58 |
-
Yi-1.5-6B-Chat,6.496741103848931
|
59 |
-
tulu-2-dpo-70b,7.848597339328551
|
60 |
-
Qwen1.5-72B-Chat,12.689668927658234
|
61 |
-
Mistral-7B-Instruct-v0.1,0.2828647550771723
|
62 |
-
gpt-3.5,0.6829601016193744
|
63 |
-
aya-23-8B,0.4818848185613409
|
64 |
-
Qwen1.5-7B-Chat,0.38989076908838965
|
65 |
-
claude-3-sonnet-20240229,1.419486012822594
|
66 |
-
vicuna-33b-v1.3,0.8235025152162343
|
67 |
-
WizardLM-13b-v1.2,3.565436762576296
|
68 |
-
Mistral-Large-Instruct-2407,1.2444667688634197
|
69 |
-
command-r-plus,0.3104016019283745
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/__pycache__/filter_utils.cpython-310.pyc
DELETED
Binary file (2.17 kB)
|
|
utils/__pycache__/filter_utils.cpython-313.pyc
DELETED
Binary file (3.87 kB)
|
|
utils/__pycache__/text_content.cpython-313.pyc
DELETED
Binary file (423 Bytes)
|
|
utils/__pycache__/text_utils.cpython-310.pyc
DELETED
Binary file (650 Bytes)
|
|
utils/text_utils.py
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
|
3 |
-
# def context_markdown(context):
|
4 |
-
# return gr.Markdown(f"### Selected Context Range : {min_context}k - {max_context}k")
|
5 |
-
|
6 |
-
|
7 |
-
# def parameter_markdown(parameters):
|
8 |
-
|
9 |
-
# min_p = int(2**parameters[0])
|
10 |
-
# max_p = int(2**parameters[1])
|
11 |
-
|
12 |
-
# return gr.Markdown(f"### Selected Parameter Range : {min_p}B - {max_p}B")
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|