Commit
·
68e6513
1
Parent(s):
54d8c8e
working first draft
Browse files- app.py +107 -7
- requirements.txt +2 -1
- src/combined_data.json +1 -1
- src/main_df.csv +23 -23
- src/process_data.py +48 -0
- utils/__pycache__/filter_utils.cpython-310.pyc +0 -0
- utils/filter_utils.py +29 -0
app.py
CHANGED
|
@@ -3,9 +3,45 @@ import gradio as gr
|
|
| 3 |
import os
|
| 4 |
from gradio_rangeslider import RangeSlider
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
text_leaderboard = pd.read_csv(os.path.join('src', 'main_df.csv'))
|
| 7 |
text = "## The range is: {min} to {max}"
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
llm_calc_app = gr.Blocks()
|
| 11 |
with llm_calc_app:
|
|
@@ -14,12 +50,37 @@ with llm_calc_app:
|
|
| 14 |
"""
|
| 15 |
Main Filters Row
|
| 16 |
"""
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
with gr.Row():
|
|
@@ -28,13 +89,52 @@ with llm_calc_app:
|
|
| 28 |
"""
|
| 29 |
|
| 30 |
leaderboard_table = gr.Dataframe(
|
| 31 |
-
value=
|
| 32 |
elem_id="text-leaderboard-table",
|
| 33 |
interactive=False,
|
| 34 |
visible=True,
|
| 35 |
height=800
|
| 36 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
llm_calc_app.load()
|
| 40 |
llm_calc_app.queue()
|
|
|
|
| 3 |
import os
|
| 4 |
from gradio_rangeslider import RangeSlider
|
| 5 |
|
| 6 |
+
from utils.filter_utils import filter
|
| 7 |
+
|
| 8 |
+
# MAPS = filter_utils.LANG_MAPPING
|
| 9 |
+
|
| 10 |
+
# Main Leaderboard containing everything
|
| 11 |
text_leaderboard = pd.read_csv(os.path.join('src', 'main_df.csv'))
|
| 12 |
text = "## The range is: {min} to {max}"
|
| 13 |
|
| 14 |
+
# Short leaderboard containing fixed columns
|
| 15 |
+
short_leaderboard = text_leaderboard[[
|
| 16 |
+
'model_name',
|
| 17 |
+
'input_price',
|
| 18 |
+
'output_price',
|
| 19 |
+
'release_date',
|
| 20 |
+
'context_size',
|
| 21 |
+
'average_clemscore',
|
| 22 |
+
'average_latency',
|
| 23 |
+
'parameter_size',
|
| 24 |
+
]]
|
| 25 |
+
|
| 26 |
+
## Get Languages
|
| 27 |
+
langs = []
|
| 28 |
+
for i in range(len(text_leaderboard)):
|
| 29 |
+
lang_splits = text_leaderboard.iloc[i]['languages'].split(',')
|
| 30 |
+
lang_splits = [s.strip() for s in lang_splits]
|
| 31 |
+
langs += lang_splits
|
| 32 |
+
langs = list(set(langs))
|
| 33 |
+
langs.sort()
|
| 34 |
+
|
| 35 |
+
## Get input prices
|
| 36 |
+
ip_prices = []
|
| 37 |
+
op_prices = []
|
| 38 |
+
for i in range(len(text_leaderboard)):
|
| 39 |
+
ip_prices.append(text_leaderboard.iloc[i]['input_price'])
|
| 40 |
+
op_prices.append(text_leaderboard.iloc[i]['output_price'])
|
| 41 |
+
|
| 42 |
+
max_input_price = max(ip_prices)
|
| 43 |
+
max_output_price = max(op_prices)
|
| 44 |
+
|
| 45 |
|
| 46 |
llm_calc_app = gr.Blocks()
|
| 47 |
with llm_calc_app:
|
|
|
|
| 50 |
"""
|
| 51 |
Main Filters Row
|
| 52 |
"""
|
| 53 |
+
|
| 54 |
+
### Language filter
|
| 55 |
+
with gr.Row():
|
| 56 |
+
lang_dropdown = gr.Dropdown(
|
| 57 |
+
choices=langs,
|
| 58 |
+
value=[],
|
| 59 |
+
multiselect=True,
|
| 60 |
+
label="Select Languages 🕹️"
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
clemscore_slider = RangeSlider(
|
| 64 |
+
minimum=0,
|
| 65 |
+
maximum=100,
|
| 66 |
+
value=(0, 100),
|
| 67 |
+
label="Select Clemscore range"
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
input_pricing_slider = RangeSlider(
|
| 71 |
+
minimum=0,
|
| 72 |
+
maximum=max_input_price,
|
| 73 |
+
value=(0, max_input_price),
|
| 74 |
+
label="Select Price range /1M input tokens"
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
output_pricing_slider = RangeSlider(
|
| 78 |
+
minimum=0,
|
| 79 |
+
maximum=max_output_price,
|
| 80 |
+
value=(0, max_output_price),
|
| 81 |
+
label="Select Price range /1M output tokens"
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
|
| 85 |
|
| 86 |
with gr.Row():
|
|
|
|
| 89 |
"""
|
| 90 |
|
| 91 |
leaderboard_table = gr.Dataframe(
|
| 92 |
+
value=short_leaderboard,
|
| 93 |
elem_id="text-leaderboard-table",
|
| 94 |
interactive=False,
|
| 95 |
visible=True,
|
| 96 |
height=800
|
| 97 |
)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
dummy_leaderboard_table = gr.Dataframe(
|
| 101 |
+
value=text_leaderboard,
|
| 102 |
+
elem_id="dummy-leaderboard-table",
|
| 103 |
+
interactive=False,
|
| 104 |
+
visible=False
|
| 105 |
+
)
|
| 106 |
|
| 107 |
+
lang_dropdown.change(
|
| 108 |
+
filter,
|
| 109 |
+
[dummy_leaderboard_table, lang_dropdown, clemscore_slider,
|
| 110 |
+
input_pricing_slider, output_pricing_slider],
|
| 111 |
+
[leaderboard_table],
|
| 112 |
+
queue=True
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
clemscore_slider.change(
|
| 116 |
+
filter,
|
| 117 |
+
[dummy_leaderboard_table, lang_dropdown, clemscore_slider,
|
| 118 |
+
input_pricing_slider, output_pricing_slider],
|
| 119 |
+
[leaderboard_table],
|
| 120 |
+
queue=True
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
input_pricing_slider.change(
|
| 124 |
+
filter,
|
| 125 |
+
[dummy_leaderboard_table, lang_dropdown, clemscore_slider,
|
| 126 |
+
input_pricing_slider, output_pricing_slider],
|
| 127 |
+
[leaderboard_table],
|
| 128 |
+
queue=True
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
output_pricing_slider.change(
|
| 132 |
+
filter,
|
| 133 |
+
[dummy_leaderboard_table, lang_dropdown, clemscore_slider,
|
| 134 |
+
input_pricing_slider, output_pricing_slider],
|
| 135 |
+
[leaderboard_table],
|
| 136 |
+
queue=True
|
| 137 |
+
)
|
| 138 |
|
| 139 |
llm_calc_app.load()
|
| 140 |
llm_calc_app.queue()
|
requirements.txt
CHANGED
|
@@ -1,3 +1,4 @@
|
|
| 1 |
beautifulsoup4==4.12.3
|
| 2 |
pandas==2.2.3
|
| 3 |
-
gradio_rangeslider==0.0.7
|
|
|
|
|
|
| 1 |
beautifulsoup4==4.12.3
|
| 2 |
pandas==2.2.3
|
| 3 |
+
gradio_rangeslider==0.0.7
|
| 4 |
+
gradio==4.44.1
|
src/combined_data.json
CHANGED
|
@@ -303,7 +303,7 @@
|
|
| 303 |
"name": "Apache 2.0",
|
| 304 |
"url": "https://www.apache.org/licenses/LICENSE-2.0"
|
| 305 |
},
|
| 306 |
-
"languages": ["
|
| 307 |
"release_date": "2024-04-17",
|
| 308 |
"parameters": {
|
| 309 |
"estimated": false,
|
|
|
|
| 303 |
"name": "Apache 2.0",
|
| 304 |
"url": "https://www.apache.org/licenses/LICENSE-2.0"
|
| 305 |
},
|
| 306 |
+
"languages": ["en", "fr", "es", "de", "it", "ru"],
|
| 307 |
"release_date": "2024-04-17",
|
| 308 |
"parameters": {
|
| 309 |
"estimated": false,
|
src/main_df.csv
CHANGED
|
@@ -1,24 +1,24 @@
|
|
| 1 |
model_name,input_price,output_price,multimodality_image,multimodality_multiple_image,multimodality_audio,multimodality_video,source,license_name,license_url,languages,release_date,open_weight,context_size,average_clemscore,average_latency,parameter_size,estimated
|
| 2 |
-
Meta-Llama-3-70B-Instruct-hf,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct,Meta Llama 3 License,https://www.llama.com/llama3/license/,
|
| 3 |
-
Meta-Llama-3-8B-Instruct-hf,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct,Meta Llama 3 License,https://www.llama.com/llama3/license/,
|
| 4 |
-
Meta-Llama-3.1-405B-Instruct-Turbo,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct,Llama 3.1 Community License,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE,"
|
| 5 |
-
Meta-Llama-3.1-70B-Instruct,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct,Llama 3.1 Community License,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE,"
|
| 6 |
-
Meta-Llama-3.1-8B-Instruct,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct,Llama 3.1 Community License,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE,"
|
| 7 |
-
InternVL2-40B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-40B,MIT,https://choosealicense.com/licenses/mit/,"
|
| 8 |
-
InternVL2-8B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-8B,MIT,https://choosealicense.com/licenses/mit/,"
|
| 9 |
-
InternVL2-Llama3-76B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-Llama3-76B,MIT,https://choosealicense.com/licenses/mit/,"
|
| 10 |
-
InternVL2-26B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-26B,MIT,https://choosealicense.com/licenses/mit/,"
|
| 11 |
-
InternVL2-26B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-26B,MIT,https://choosealicense.com/licenses/mit/,"
|
| 12 |
-
Mistral-Large-Instruct-2407,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mistral-Large-Instruct-2407,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"
|
| 13 |
-
Mixtral-8x22B-Instruct-v0.1,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"
|
| 14 |
-
Mistral-7B-Instruct-v0.2,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"
|
| 15 |
-
Mistral-7B-Instruct-v0.1,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"
|
| 16 |
-
Mixtral-8x7B-Instruct-v0.1,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"
|
| 17 |
-
openchat-3.5-0106,0.0,0.0,False,False,False,False,https://huggingface.co/openchat/openchat-3.5-0106,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,
|
| 18 |
-
openchat-3.5-1210,0.0,0.0,False,False,False,False,https://huggingface.co/openchat/openchat-3.5-1210,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,
|
| 19 |
-
openchat_3.5,0.0,0.0,False,False,False,False,https://huggingface.co/openchat/openchat_3.5,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,
|
| 20 |
-
gpt-4o-mini-2024-07-18,0.15,0.6,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"
|
| 21 |
-
gpt-4o-2024-08-06,2.5,10.0,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"
|
| 22 |
-
gpt-4o-2024-05-13,2.5,10.0,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"
|
| 23 |
-
gpt-4-1106-vision-preview,10.0,30.0,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"
|
| 24 |
-
gemini-1.5-flash-latest,0.075,0.3,True,True,True,True,https://cloud.google.com/vertex-ai/generative-ai/pricing,Commercial License,,"
|
|
|
|
| 1 |
model_name,input_price,output_price,multimodality_image,multimodality_multiple_image,multimodality_audio,multimodality_video,source,license_name,license_url,languages,release_date,open_weight,context_size,average_clemscore,average_latency,parameter_size,estimated
|
| 2 |
+
Meta-Llama-3-70B-Instruct-hf,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct,Meta Llama 3 License,https://www.llama.com/llama3/license/,English,2024-04-18,True,8192,11.703333333333333,1.1160853862207483,70.0,False
|
| 3 |
+
Meta-Llama-3-8B-Instruct-hf,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct,Meta Llama 3 License,https://www.llama.com/llama3/license/,English,2024-04-18,True,8192,6.663333333333333,0.7054825144189354,8.0,False
|
| 4 |
+
Meta-Llama-3.1-405B-Instruct-Turbo,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct,Llama 3.1 Community License,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE,"English, German, French, Italian, Hindi, Portuguese, Spanish, Thai",2024-07-23,True,131072,17.37,0.2628701315515277,405.0,False
|
| 5 |
+
Meta-Llama-3.1-70B-Instruct,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct,Llama 3.1 Community License,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE,"English, German, French, Italian, Hindi, Portuguese, Spanish, Thai",2024-07-23,True,131072,12.943333333333333,0.27016850919817575,70.0,False
|
| 6 |
+
Meta-Llama-3.1-8B-Instruct,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct,Llama 3.1 Community License,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE,"English, German, French, Italian, Hindi, Portuguese, Spanish, Thai",2024-07-23,True,131072,6.12,0.06876858280202812,8.0,False
|
| 7 |
+
InternVL2-40B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-40B,MIT,https://choosealicense.com/licenses/mit/,"Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic",2024-07-15,True,8192,21.810000000000002,2.609271782765464,40.0,False
|
| 8 |
+
InternVL2-8B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-8B,MIT,https://choosealicense.com/licenses/mit/,"Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic",2024-07-15,True,8192,19.74,0.8367998047485775,8.0,False
|
| 9 |
+
InternVL2-Llama3-76B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-Llama3-76B,MIT,https://choosealicense.com/licenses/mit/,"Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic",2024-07-15,True,8192,25.709999999999997,4.591395944741546,76.0,False
|
| 10 |
+
InternVL2-26B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-26B,MIT,https://choosealicense.com/licenses/mit/,"Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic",2024-07-15,True,8192,23.24,1.7593004986949285,26.0,False
|
| 11 |
+
InternVL2-26B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-26B,MIT,https://choosealicense.com/licenses/mit/,"Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic",2024-07-15,True,8192,23.24,1.7593004986949285,26.0,False
|
| 12 |
+
Mistral-Large-Instruct-2407,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mistral-Large-Instruct-2407,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"English, French, Spanish, German, Italian, Russian, Chinese, Japanese, Korean",2024-06-12,True,8192,15.13,0.41482225628780656,70.0,False
|
| 13 |
+
Mixtral-8x22B-Instruct-v0.1,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"English, French, Spanish, German, Italian, Russian",2024-04-17,True,8192,4.2299999999999995,0.3586451521191292,141.0,False
|
| 14 |
+
Mistral-7B-Instruct-v0.2,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"English, French, Spanish, German, Italian, Russian, Chinese",2024-01-15,True,8192,3.25,0.25450503989030154,7.0,False
|
| 15 |
+
Mistral-7B-Instruct-v0.1,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"English, French, Spanish, German, Italian, Russian, Chinese",2023-12-11,True,8192,2.67,0.09428825169239076,7.0,False
|
| 16 |
+
Mixtral-8x7B-Instruct-v0.1,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"English, French, Spanish, German, Italian, Russian",2023-12-11,True,8192,2.723333333333333,0.31309892202121054,46.7,False
|
| 17 |
+
openchat-3.5-0106,0.0,0.0,False,False,False,False,https://huggingface.co/openchat/openchat-3.5-0106,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,English,2024-01-06,True,8192,5.7,0.09736504835188847,7.0,False
|
| 18 |
+
openchat-3.5-1210,0.0,0.0,False,False,False,False,https://huggingface.co/openchat/openchat-3.5-1210,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,English,2023-12-10,True,8192,6.073333333333333,0.09349942563676568,7.0,False
|
| 19 |
+
openchat_3.5,0.0,0.0,False,False,False,False,https://huggingface.co/openchat/openchat_3.5,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,English,2023-10-30,True,8192,7.88,0.10576256228206875,7.0,False
|
| 20 |
+
gpt-4o-mini-2024-07-18,0.15,0.6,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"English, Spanish, French, German, Chinese, Japanese, Korean, Italian, Portuguese, Dutch, Russian, Arabic, Hindi, Turkish, Vietnamese, Polish, Thai, Swedish, Danish, Norwegian, Finnish, Hungarian, Czech, Slovak, Romanian, Bulgarian, Ukrainian, Lithuanian, Latvian, Estonian, Slovenian, Malay, Indonesian, Tagalog, Swahili, Amharic",2024-07-18,False,131072,52.32333333333333,1.619222935116773,8.0,True
|
| 21 |
+
gpt-4o-2024-08-06,2.5,10.0,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"English, Spanish, French, German, Chinese, Japanese, Korean, Italian, Portuguese, Dutch, Russian, Arabic, Hindi, Turkish, Vietnamese, Polish, Thai, Swedish, Danish, Norwegian, Finnish, Hungarian, Czech, Slovak, Romanian, Bulgarian, Ukrainian, Lithuanian, Latvian, Estonian, Slovenian, Malay, Indonesian, Tagalog, Swahili, Amharic",2024-08-06,False,131072,69.57000000000001,1.5771123003908176,200.0,True
|
| 22 |
+
gpt-4o-2024-05-13,2.5,10.0,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"English, Spanish, French, German, Chinese, Japanese, Korean, Italian, Portuguese, Dutch, Russian, Arabic, Hindi, Turkish, Vietnamese, Polish, Thai, Swedish, Danish, Norwegian, Finnish, Hungarian, Czech, Slovak, Romanian, Bulgarian, Ukrainian, Lithuanian, Latvian, Estonian, Slovenian, Malay, Indonesian, Tagalog, Swahili, Amharic",2024-05-13,False,131072,66.87333333333333,3.704921340164487,200.0,True
|
| 23 |
+
gpt-4-1106-vision-preview,10.0,30.0,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"English, Spanish, French, German, Chinese, Japanese, Korean, Italian, Portuguese, Dutch, Russian, Arabic, Hindi, Turkish, Vietnamese, Polish, Thai, Swedish, Danish, Norwegian, Finnish, Hungarian, Czech, Slovak, Romanian, Bulgarian, Ukrainian, Lithuanian, Latvian, Estonian, Slovenian, Malay, Indonesian, Tagalog, Swahili, Amharic",2023-11-06,False,131072,47.23,2.217200177676117,1760.0,True
|
| 24 |
+
gemini-1.5-flash-latest,0.075,0.3,True,True,True,True,https://cloud.google.com/vertex-ai/generative-ai/pricing,Commercial License,,"Lithuanian, Norwegian, Polish, Portuguese, Romanian, Russian, Serbian, Slovak, Slovenian, Spanish, Swahili, Swedish, Thai, Turkish, Ukrainian, Vietnamese, Chinese, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, German, Greek, Hebrew, Hindi, Hungarian, Indonesian, Italian, Japanese, Korean, Latvian, Arabic, Bengali, Bulgarian",2024-05-24,False,131072,42.53666666666667,26.268280234692302,1760.0,True
|
src/process_data.py
CHANGED
|
@@ -108,8 +108,56 @@ for col in additional_price_columns:
|
|
| 108 |
# Clean and convert context to integer
|
| 109 |
df['context_size'] = df['context_size'].replace({'k': ''}, regex=True).astype(int)
|
| 110 |
|
|
|
|
|
|
|
| 111 |
df['parameter_size'] = df['parameter_size'].replace({'B': '', '': None}, regex=True).astype(float)
|
| 112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
# Keep only the specified columns
|
| 114 |
df = df[[
|
| 115 |
'model_name',
|
|
|
|
| 108 |
# Clean and convert context to integer
|
| 109 |
df['context_size'] = df['context_size'].replace({'k': ''}, regex=True).astype(int)
|
| 110 |
|
| 111 |
+
df['context_size'] = df['context_size']*1024
|
| 112 |
+
|
| 113 |
df['parameter_size'] = df['parameter_size'].replace({'B': '', '': None}, regex=True).astype(float)
|
| 114 |
|
| 115 |
+
LANG_MAPPING = {
|
| 116 |
+
'el': 'Greek',
|
| 117 |
+
'id': 'Indonesian',
|
| 118 |
+
'ko': 'Korean',
|
| 119 |
+
'sv': 'Swedish',
|
| 120 |
+
'de': 'German',
|
| 121 |
+
'lv': 'Latvian',
|
| 122 |
+
'am': 'Amharic',
|
| 123 |
+
'fi': 'Finnish',
|
| 124 |
+
'da': 'Danish',
|
| 125 |
+
'pt': 'Portuguese',
|
| 126 |
+
'sw': 'Swahili',
|
| 127 |
+
'es': 'Spanish',
|
| 128 |
+
'it': 'Italian',
|
| 129 |
+
'bn': 'Bengali',
|
| 130 |
+
'nl': 'Dutch',
|
| 131 |
+
'lt': 'Lithuanian',
|
| 132 |
+
'ro': 'Romanian',
|
| 133 |
+
'sl': 'Slovenian',
|
| 134 |
+
'hu': 'Hungarian',
|
| 135 |
+
'hr': 'Croatian',
|
| 136 |
+
'vi': 'Vietnamese',
|
| 137 |
+
'hi': 'Hindi',
|
| 138 |
+
'zh': 'Chinese',
|
| 139 |
+
'pl': 'Polish',
|
| 140 |
+
'ar': 'Arabic',
|
| 141 |
+
'cs': 'Czech',
|
| 142 |
+
'sk': 'Slovak',
|
| 143 |
+
'ja': 'Japanese',
|
| 144 |
+
'no': 'Norwegian',
|
| 145 |
+
'uk': 'Ukrainian',
|
| 146 |
+
'fr': 'French',
|
| 147 |
+
'et': 'Estonian',
|
| 148 |
+
'ru': 'Russian',
|
| 149 |
+
'th': 'Thai',
|
| 150 |
+
'bg': 'Bulgarian',
|
| 151 |
+
'tr': 'Turkish',
|
| 152 |
+
'ms': 'Malay',
|
| 153 |
+
'he': 'Hebrew',
|
| 154 |
+
'tl': 'Tagalog',
|
| 155 |
+
'sr': 'Serbian',
|
| 156 |
+
'en': 'English'
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
df['languages'] = df['languages'].apply(lambda x: ', '.join([LANG_MAPPING.get(lang, lang) for lang in x.split(', ')]))
|
| 160 |
+
|
| 161 |
# Keep only the specified columns
|
| 162 |
df = df[[
|
| 163 |
'model_name',
|
utils/__pycache__/filter_utils.cpython-310.pyc
ADDED
|
Binary file (1.02 kB). View file
|
|
|
utils/filter_utils.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Utility functions for filtering the dataframe
|
| 2 |
+
|
| 3 |
+
def filter_cols(df):
|
| 4 |
+
|
| 5 |
+
df = df[[
|
| 6 |
+
'model_name',
|
| 7 |
+
'input_price',
|
| 8 |
+
'output_price',
|
| 9 |
+
'release_date',
|
| 10 |
+
'context_size',
|
| 11 |
+
'average_clemscore',
|
| 12 |
+
'average_latency',
|
| 13 |
+
'parameter_size',
|
| 14 |
+
]]
|
| 15 |
+
|
| 16 |
+
return df
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def filter(df, language_list, clemscore, input_price, output_price):
|
| 20 |
+
df = df[df['languages'].apply(lambda x: all(lang in x for lang in language_list))]
|
| 21 |
+
df = df[(df['average_clemscore'] >= clemscore[0]) & (df['average_clemscore'] <= clemscore[1])]
|
| 22 |
+
df = df[(df['input_price'] >= input_price[0]) & (df['input_price'] <= input_price[1])]
|
| 23 |
+
df = df[(df['output_price'] >= output_price[0]) & (df['output_price'] <= output_price[1])]
|
| 24 |
+
|
| 25 |
+
df = filter_cols(df)
|
| 26 |
+
return df # Return the filtered dataframe
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
|