Spaces:

Koshti10
/

LLMCalc

Runtime error

App Files Files Community

carbonnnnn commited on Oct 15, 2024

Commit

68e6513

1 Parent(s): 54d8c8e

working first draft

Browse files

Files changed (7) hide show

app.py +107 -7
requirements.txt +2 -1
src/combined_data.json +1 -1
src/main_df.csv +23 -23
src/process_data.py +48 -0
utils/__pycache__/filter_utils.cpython-310.pyc +0 -0
utils/filter_utils.py +29 -0

app.py CHANGED Viewed

@@ -3,9 +3,45 @@ import gradio as gr
 import os
 from gradio_rangeslider import RangeSlider
 text_leaderboard = pd.read_csv(os.path.join('src', 'main_df.csv'))
 text = "## The range is: {min} to {max}"
 llm_calc_app = gr.Blocks()
 with llm_calc_app:
@@ -14,12 +50,37 @@ with llm_calc_app:
         """
         Main Filters Row
         """
-        range_slider = RangeSlider(minimum=0, maximum=100, value=(0, 100))
-        range_ = gr.Markdown(value=text.format(min=0, max=100))
-        range_slider.change(lambda s: text.format(min=s[0], max=s[1]), range_slider, range_,
-                            show_progress="hide", trigger_mode="always_last")
     with gr.Row():
@@ -28,13 +89,52 @@ with llm_calc_app:
         """
         leaderboard_table = gr.Dataframe(
-                                value=text_leaderboard,
                                 elem_id="text-leaderboard-table",
                                 interactive=False,
                                 visible=True,
                                 height=800
                             )
     llm_calc_app.load()
 llm_calc_app.queue()

 import os
 from gradio_rangeslider import RangeSlider
+from utils.filter_utils import filter
+# MAPS = filter_utils.LANG_MAPPING
+# Main Leaderboard containing everything
 text_leaderboard = pd.read_csv(os.path.join('src', 'main_df.csv'))
 text = "## The range is: {min} to {max}"
+# Short leaderboard containing fixed columns
+short_leaderboard = text_leaderboard[[
+    'model_name',
+    'input_price',
+    'output_price',
+    'release_date',
+    'context_size',
+    'average_clemscore',
+    'average_latency',
+    'parameter_size',
+    ]]
+## Get Languages
+langs = []
+for i in range(len(text_leaderboard)):
+    lang_splits = text_leaderboard.iloc[i]['languages'].split(',')
+    lang_splits = [s.strip() for s in lang_splits]
+    langs += lang_splits
+langs = list(set(langs))
+langs.sort()
+## Get input prices
+ip_prices = []
+op_prices = []
+for i in range(len(text_leaderboard)):
+    ip_prices.append(text_leaderboard.iloc[i]['input_price'])
+    op_prices.append(text_leaderboard.iloc[i]['output_price'])
+max_input_price = max(ip_prices)
+max_output_price = max(op_prices)
 llm_calc_app = gr.Blocks()
 with llm_calc_app:
         """
         Main Filters Row
         """
+    ### Language filter
+    with gr.Row():
+        lang_dropdown = gr.Dropdown(
+            choices=langs,
+            value=[],
+            multiselect=True,
+            label="Select Languages 🕹️"
+        )
+        clemscore_slider = RangeSlider(
+            minimum=0,
+            maximum=100,
+            value=(0, 100),
+            label="Select Clemscore range"
+        )
+        input_pricing_slider = RangeSlider(
+            minimum=0,
+            maximum=max_input_price,
+            value=(0, max_input_price),
+            label="Select Price range /1M input tokens"
+        )
+        output_pricing_slider = RangeSlider(
+            minimum=0,
+            maximum=max_output_price,
+            value=(0, max_output_price),
+            label="Select Price range /1M output tokens"
+        )
     with gr.Row():
         """
         leaderboard_table = gr.Dataframe(
+                                value=short_leaderboard,
                                 elem_id="text-leaderboard-table",
                                 interactive=False,
                                 visible=True,
                                 height=800
                             )
+        dummy_leaderboard_table = gr.Dataframe(
+                                value=text_leaderboard,
+                                elem_id="dummy-leaderboard-table",
+                                interactive=False,
+                                visible=False
+                            )
+        lang_dropdown.change(
+            filter,
+            [dummy_leaderboard_table, lang_dropdown, clemscore_slider,
+             input_pricing_slider, output_pricing_slider],
+            [leaderboard_table],
+            queue=True
+        )
+        clemscore_slider.change(
+            filter,
+            [dummy_leaderboard_table, lang_dropdown, clemscore_slider,
+             input_pricing_slider, output_pricing_slider],
+            [leaderboard_table],
+            queue=True
+        )
+        input_pricing_slider.change(
+            filter,
+            [dummy_leaderboard_table, lang_dropdown, clemscore_slider,
+             input_pricing_slider, output_pricing_slider],
+            [leaderboard_table],
+            queue=True
+        )
+        output_pricing_slider.change(
+            filter,
+            [dummy_leaderboard_table, lang_dropdown, clemscore_slider,
+             input_pricing_slider, output_pricing_slider],
+            [leaderboard_table],
+            queue=True
+        )
     llm_calc_app.load()
 llm_calc_app.queue()

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 beautifulsoup4==4.12.3
 pandas==2.2.3
-gradio_rangeslider==0.0.7

 beautifulsoup4==4.12.3
 pandas==2.2.3
+gradio_rangeslider==0.0.7
+gradio==4.44.1

src/combined_data.json CHANGED Viewed

@@ -303,7 +303,7 @@
             "name": "Apache 2.0",
             "url": "https://www.apache.org/licenses/LICENSE-2.0"
         },
-        "languages": ["eng", "fra", "spa", "deu", "ita", "rus"],
         "release_date": "2024-04-17",
         "parameters": {
             "estimated": false,

             "name": "Apache 2.0",
             "url": "https://www.apache.org/licenses/LICENSE-2.0"
         },
+        "languages": ["en", "fr", "es", "de", "it", "ru"],
         "release_date": "2024-04-17",
         "parameters": {
             "estimated": false,

src/main_df.csv CHANGED Viewed

@@ -1,24 +1,24 @@
 model_name,input_price,output_price,multimodality_image,multimodality_multiple_image,multimodality_audio,multimodality_video,source,license_name,license_url,languages,release_date,open_weight,context_size,average_clemscore,average_latency,parameter_size,estimated
-Meta-Llama-3-70B-Instruct-hf,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct,Meta Llama 3 License,https://www.llama.com/llama3/license/,en,2024-04-18,True,8,11.703333333333333,1.1160853862207483,70.0,False
-Meta-Llama-3-8B-Instruct-hf,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct,Meta Llama 3 License,https://www.llama.com/llama3/license/,en,2024-04-18,True,8,6.663333333333333,0.7054825144189354,8.0,False
-Meta-Llama-3.1-405B-Instruct-Turbo,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct,Llama 3.1 Community License,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE,"en, de, fr, it, hi, pt, es, th",2024-07-23,True,128,17.37,0.2628701315515277,405.0,False
-Meta-Llama-3.1-70B-Instruct,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct,Llama 3.1 Community License,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE,"en, de, fr, it, hi, pt, es, th",2024-07-23,True,128,12.943333333333333,0.27016850919817575,70.0,False
-Meta-Llama-3.1-8B-Instruct,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct,Llama 3.1 Community License,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE,"en, de, fr, it, hi, pt, es, th",2024-07-23,True,128,6.12,0.06876858280202812,8.0,False
-InternVL2-40B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-40B,MIT,https://choosealicense.com/licenses/mit/,"zh, en, fr, es, pt, de, it, ru, ja, ko, vi, th, ar",2024-07-15,True,8,21.810000000000002,2.609271782765464,40.0,False
-InternVL2-8B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-8B,MIT,https://choosealicense.com/licenses/mit/,"zh, en, fr, es, pt, de, it, ru, ja, ko, vi, th, ar",2024-07-15,True,8,19.74,0.8367998047485775,8.0,False
-InternVL2-Llama3-76B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-Llama3-76B,MIT,https://choosealicense.com/licenses/mit/,"zh, en, fr, es, pt, de, it, ru, ja, ko, vi, th, ar",2024-07-15,True,8,25.709999999999997,4.591395944741546,76.0,False
-InternVL2-26B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-26B,MIT,https://choosealicense.com/licenses/mit/,"zh, en, fr, es, pt, de, it, ru, ja, ko, vi, th, ar",2024-07-15,True,8,23.24,1.7593004986949285,26.0,False
-InternVL2-26B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-26B,MIT,https://choosealicense.com/licenses/mit/,"zh, en, fr, es, pt, de, it, ru, ja, ko, vi, th, ar",2024-07-15,True,8,23.24,1.7593004986949285,26.0,False
-Mistral-Large-Instruct-2407,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mistral-Large-Instruct-2407,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"en, fr, es, de, it, ru, zh, ja, ko",2024-06-12,True,8,15.13,0.41482225628780656,70.0,False
-Mixtral-8x22B-Instruct-v0.1,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"eng, fra, spa, deu, ita, rus",2024-04-17,True,8,4.2299999999999995,0.3586451521191292,141.0,False
-Mistral-7B-Instruct-v0.2,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"en, fr, es, de, it, ru, zh",2024-01-15,True,8,3.25,0.25450503989030154,7.0,False
-Mistral-7B-Instruct-v0.1,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"en, fr, es, de, it, ru, zh",2023-12-11,True,8,2.67,0.09428825169239076,7.0,False
-Mixtral-8x7B-Instruct-v0.1,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"en, fr, es, de, it, ru",2023-12-11,True,8,2.723333333333333,0.31309892202121054,46.7,False
-openchat-3.5-0106,0.0,0.0,False,False,False,False,https://huggingface.co/openchat/openchat-3.5-0106,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,en,2024-01-06,True,8,5.7,0.09736504835188847,7.0,False
-openchat-3.5-1210,0.0,0.0,False,False,False,False,https://huggingface.co/openchat/openchat-3.5-1210,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,en,2023-12-10,True,8,6.073333333333333,0.09349942563676568,7.0,False
-openchat_3.5,0.0,0.0,False,False,False,False,https://huggingface.co/openchat/openchat_3.5,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,en,2023-10-30,True,8,7.88,0.10576256228206875,7.0,False
-gpt-4o-mini-2024-07-18,0.15,0.6,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"en, es, fr, de, zh, ja, ko, it, pt, nl, ru, ar, hi, tr, vi, pl, th, sv, da, no, fi, hu, cs, sk, ro, bg, uk, lt, lv, et, sl, ms, id, tl, sw, am",2024-07-18,False,128,52.32333333333333,1.619222935116773,8.0,True
-gpt-4o-2024-08-06,2.5,10.0,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"en, es, fr, de, zh, ja, ko, it, pt, nl, ru, ar, hi, tr, vi, pl, th, sv, da, no, fi, hu, cs, sk, ro, bg, uk, lt, lv, et, sl, ms, id, tl, sw, am",2024-08-06,False,128,69.57000000000001,1.5771123003908176,200.0,True
-gpt-4o-2024-05-13,2.5,10.0,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"en, es, fr, de, zh, ja, ko, it, pt, nl, ru, ar, hi, tr, vi, pl, th, sv, da, no, fi, hu, cs, sk, ro, bg, uk, lt, lv, et, sl, ms, id, tl, sw, am",2024-05-13,False,128,66.87333333333333,3.704921340164487,200.0,True
-gpt-4-1106-vision-preview,10.0,30.0,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"en, es, fr, de, zh, ja, ko, it, pt, nl, ru, ar, hi, tr, vi, pl, th, sv, da, no, fi, hu, cs, sk, ro, bg, uk, lt, lv, et, sl, ms, id, tl, sw, am",2023-11-06,False,128,47.23,2.217200177676117,1760.0,True
-gemini-1.5-flash-latest,0.075,0.3,True,True,True,True,https://cloud.google.com/vertex-ai/generative-ai/pricing,Commercial License,,"lt, no, pl, pt, ro, ru, sr, sk, sl, es, sw, sv, th, tr, uk, vi, zh, hr, cs, da, nl, en, et, fi, fr, de, el, he, hi, hu, id, it, ja, ko, lv, ar, bn, bg",2024-05-24,False,128,42.53666666666667,26.268280234692302,1760.0,True

 model_name,input_price,output_price,multimodality_image,multimodality_multiple_image,multimodality_audio,multimodality_video,source,license_name,license_url,languages,release_date,open_weight,context_size,average_clemscore,average_latency,parameter_size,estimated
+Meta-Llama-3-70B-Instruct-hf,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct,Meta Llama 3 License,https://www.llama.com/llama3/license/,English,2024-04-18,True,8192,11.703333333333333,1.1160853862207483,70.0,False
+Meta-Llama-3-8B-Instruct-hf,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct,Meta Llama 3 License,https://www.llama.com/llama3/license/,English,2024-04-18,True,8192,6.663333333333333,0.7054825144189354,8.0,False
+Meta-Llama-3.1-405B-Instruct-Turbo,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct,Llama 3.1 Community License,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE,"English, German, French, Italian, Hindi, Portuguese, Spanish, Thai",2024-07-23,True,131072,17.37,0.2628701315515277,405.0,False
+Meta-Llama-3.1-70B-Instruct,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct,Llama 3.1 Community License,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE,"English, German, French, Italian, Hindi, Portuguese, Spanish, Thai",2024-07-23,True,131072,12.943333333333333,0.27016850919817575,70.0,False
+Meta-Llama-3.1-8B-Instruct,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct,Llama 3.1 Community License,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE,"English, German, French, Italian, Hindi, Portuguese, Spanish, Thai",2024-07-23,True,131072,6.12,0.06876858280202812,8.0,False
+InternVL2-40B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-40B,MIT,https://choosealicense.com/licenses/mit/,"Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic",2024-07-15,True,8192,21.810000000000002,2.609271782765464,40.0,False
+InternVL2-8B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-8B,MIT,https://choosealicense.com/licenses/mit/,"Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic",2024-07-15,True,8192,19.74,0.8367998047485775,8.0,False
+InternVL2-Llama3-76B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-Llama3-76B,MIT,https://choosealicense.com/licenses/mit/,"Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic",2024-07-15,True,8192,25.709999999999997,4.591395944741546,76.0,False
+InternVL2-26B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-26B,MIT,https://choosealicense.com/licenses/mit/,"Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic",2024-07-15,True,8192,23.24,1.7593004986949285,26.0,False
+InternVL2-26B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-26B,MIT,https://choosealicense.com/licenses/mit/,"Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic",2024-07-15,True,8192,23.24,1.7593004986949285,26.0,False
+Mistral-Large-Instruct-2407,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mistral-Large-Instruct-2407,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"English, French, Spanish, German, Italian, Russian, Chinese, Japanese, Korean",2024-06-12,True,8192,15.13,0.41482225628780656,70.0,False
+Mixtral-8x22B-Instruct-v0.1,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"English, French, Spanish, German, Italian, Russian",2024-04-17,True,8192,4.2299999999999995,0.3586451521191292,141.0,False
+Mistral-7B-Instruct-v0.2,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"English, French, Spanish, German, Italian, Russian, Chinese",2024-01-15,True,8192,3.25,0.25450503989030154,7.0,False
+Mistral-7B-Instruct-v0.1,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"English, French, Spanish, German, Italian, Russian, Chinese",2023-12-11,True,8192,2.67,0.09428825169239076,7.0,False
+Mixtral-8x7B-Instruct-v0.1,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"English, French, Spanish, German, Italian, Russian",2023-12-11,True,8192,2.723333333333333,0.31309892202121054,46.7,False
+openchat-3.5-0106,0.0,0.0,False,False,False,False,https://huggingface.co/openchat/openchat-3.5-0106,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,English,2024-01-06,True,8192,5.7,0.09736504835188847,7.0,False
+openchat-3.5-1210,0.0,0.0,False,False,False,False,https://huggingface.co/openchat/openchat-3.5-1210,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,English,2023-12-10,True,8192,6.073333333333333,0.09349942563676568,7.0,False
+openchat_3.5,0.0,0.0,False,False,False,False,https://huggingface.co/openchat/openchat_3.5,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,English,2023-10-30,True,8192,7.88,0.10576256228206875,7.0,False
+gpt-4o-mini-2024-07-18,0.15,0.6,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"English, Spanish, French, German, Chinese, Japanese, Korean, Italian, Portuguese, Dutch, Russian, Arabic, Hindi, Turkish, Vietnamese, Polish, Thai, Swedish, Danish, Norwegian, Finnish, Hungarian, Czech, Slovak, Romanian, Bulgarian, Ukrainian, Lithuanian, Latvian, Estonian, Slovenian, Malay, Indonesian, Tagalog, Swahili, Amharic",2024-07-18,False,131072,52.32333333333333,1.619222935116773,8.0,True
+gpt-4o-2024-08-06,2.5,10.0,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"English, Spanish, French, German, Chinese, Japanese, Korean, Italian, Portuguese, Dutch, Russian, Arabic, Hindi, Turkish, Vietnamese, Polish, Thai, Swedish, Danish, Norwegian, Finnish, Hungarian, Czech, Slovak, Romanian, Bulgarian, Ukrainian, Lithuanian, Latvian, Estonian, Slovenian, Malay, Indonesian, Tagalog, Swahili, Amharic",2024-08-06,False,131072,69.57000000000001,1.5771123003908176,200.0,True
+gpt-4o-2024-05-13,2.5,10.0,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"English, Spanish, French, German, Chinese, Japanese, Korean, Italian, Portuguese, Dutch, Russian, Arabic, Hindi, Turkish, Vietnamese, Polish, Thai, Swedish, Danish, Norwegian, Finnish, Hungarian, Czech, Slovak, Romanian, Bulgarian, Ukrainian, Lithuanian, Latvian, Estonian, Slovenian, Malay, Indonesian, Tagalog, Swahili, Amharic",2024-05-13,False,131072,66.87333333333333,3.704921340164487,200.0,True
+gpt-4-1106-vision-preview,10.0,30.0,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"English, Spanish, French, German, Chinese, Japanese, Korean, Italian, Portuguese, Dutch, Russian, Arabic, Hindi, Turkish, Vietnamese, Polish, Thai, Swedish, Danish, Norwegian, Finnish, Hungarian, Czech, Slovak, Romanian, Bulgarian, Ukrainian, Lithuanian, Latvian, Estonian, Slovenian, Malay, Indonesian, Tagalog, Swahili, Amharic",2023-11-06,False,131072,47.23,2.217200177676117,1760.0,True
+gemini-1.5-flash-latest,0.075,0.3,True,True,True,True,https://cloud.google.com/vertex-ai/generative-ai/pricing,Commercial License,,"Lithuanian, Norwegian, Polish, Portuguese, Romanian, Russian, Serbian, Slovak, Slovenian, Spanish, Swahili, Swedish, Thai, Turkish, Ukrainian, Vietnamese, Chinese, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, German, Greek, Hebrew, Hindi, Hungarian, Indonesian, Italian, Japanese, Korean, Latvian, Arabic, Bengali, Bulgarian",2024-05-24,False,131072,42.53666666666667,26.268280234692302,1760.0,True

src/process_data.py CHANGED Viewed

@@ -108,8 +108,56 @@ for col in additional_price_columns:
 # Clean and convert context to integer
 df['context_size'] = df['context_size'].replace({'k': ''}, regex=True).astype(int)
 df['parameter_size'] = df['parameter_size'].replace({'B': '', '': None}, regex=True).astype(float)
 # Keep only the specified columns
 df = df[[
     'model_name',

 # Clean and convert context to integer
 df['context_size'] = df['context_size'].replace({'k': ''}, regex=True).astype(int)
+df['context_size'] = df['context_size']*1024
 df['parameter_size'] = df['parameter_size'].replace({'B': '', '': None}, regex=True).astype(float)
+LANG_MAPPING = {
+    'el': 'Greek',
+    'id': 'Indonesian',
+    'ko': 'Korean',
+    'sv': 'Swedish',
+    'de': 'German',
+    'lv': 'Latvian',
+    'am': 'Amharic',
+    'fi': 'Finnish',
+    'da': 'Danish',
+    'pt': 'Portuguese',
+    'sw': 'Swahili',
+    'es': 'Spanish',
+    'it': 'Italian',
+    'bn': 'Bengali',
+    'nl': 'Dutch',
+    'lt': 'Lithuanian',
+    'ro': 'Romanian',
+    'sl': 'Slovenian',
+    'hu': 'Hungarian',
+    'hr': 'Croatian',
+    'vi': 'Vietnamese',
+    'hi': 'Hindi',
+    'zh': 'Chinese',
+    'pl': 'Polish',
+    'ar': 'Arabic',
+    'cs': 'Czech',
+    'sk': 'Slovak',
+    'ja': 'Japanese',
+    'no': 'Norwegian',
+    'uk': 'Ukrainian',
+    'fr': 'French',
+    'et': 'Estonian',
+    'ru': 'Russian',
+    'th': 'Thai',
+    'bg': 'Bulgarian',
+    'tr': 'Turkish',
+    'ms': 'Malay',
+    'he': 'Hebrew',
+    'tl': 'Tagalog',
+    'sr': 'Serbian',
+    'en': 'English'
+}
+df['languages'] = df['languages'].apply(lambda x: ', '.join([LANG_MAPPING.get(lang, lang) for lang in x.split(', ')]))
 # Keep only the specified columns
 df = df[[
     'model_name',

utils/__pycache__/filter_utils.cpython-310.pyc ADDED Viewed

Binary file (1.02 kB). View file

utils/filter_utils.py ADDED Viewed

	@@ -0,0 +1,29 @@

+# Utility functions for filtering the dataframe
+def filter_cols(df):
+    df = df[[
+    'model_name',
+    'input_price',
+    'output_price',
+    'release_date',
+    'context_size',
+    'average_clemscore',
+    'average_latency',
+    'parameter_size',
+    ]]
+    return df
+def filter(df, language_list, clemscore, input_price, output_price):
+    df = df[df['languages'].apply(lambda x: all(lang in x for lang in language_list))]
+    df = df[(df['average_clemscore'] >= clemscore[0]) & (df['average_clemscore'] <= clemscore[1])]
+    df = df[(df['input_price'] >= input_price[0]) & (df['input_price'] <= input_price[1])]
+    df = df[(df['output_price'] >= output_price[0]) & (df['output_price'] <= output_price[1])]
+    df = filter_cols(df)
+    return df  # Return the filtered dataframe