Upload 5 files
Browse files- README.md +13 -13
- elo_results_20240830.pkl +3 -0
- leaderboard_table_20240830.csv +29 -0
- requirements.txt +0 -0
- theme.json +1 -0
README.md
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
-
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
-
sdk: gradio
|
7 |
-
sdk_version: 4.44.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
license: apache-2.0
|
11 |
-
---
|
12 |
-
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
---
|
2 |
+
title: Llmarena Leaderboard
|
3 |
+
emoji: π
|
4 |
+
colorFrom: indigo
|
5 |
+
colorTo: pink
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 4.44.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
license: apache-2.0
|
11 |
+
---
|
12 |
+
|
13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
elo_results_20240830.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:317fe51082f935a603f94f1aa315c696c77eff705857145351397dcc2d4acdbd
|
3 |
+
size 1099231
|
leaderboard_table_20240830.csv
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
rating,variance,rating_q975,rating_q025,num_battles,final_ranking,key,Model,License,Organization,Knowledge cutoff date,Link,MT-bench (score),MMLU
|
2 |
+
1031.7325599881628,1319.4141323598274,1059.409330284346,918.4104238278062,470,1,gpt-4o-2024-05-13,gpt-4o-2024-05-13,Proprietary,OpenAI,10-2023,https://openai.com/api/,70.0,50.0
|
3 |
+
1109.3779556017184,1265.368563728756,1134.7083127122967,999.4535859548364,527,1,Claude 3.5 Sonnet,Claude 3.5 Sonnet,Proprietary,Anthropic,06-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-5-family,-,-
|
4 |
+
1064.8056481429333,1285.7898798298886,1087.6049607799116,952.7569955972164,610,1,gpt-4-turbo-2024-04-09,gpt-4-turbo-2024-04-09,Proprietary,OpenAI,04-2023,https://openai.com/api/,70.0,50.0
|
5 |
+
911.5137627607211,1154.4994891567385,929.6626728228481,807.3385223388616,809,8,gpt-4o-mini-2024-07-18,gpt-4o-mini-2024-07-18,Proprietary,OpenAI,07-2024,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/,70.0,50.0
|
6 |
+
933.3908256697254,1224.394614864855,953.081459308694,821.2872210234226,681,7,gpt-4-0613,gpt-4-0613,Proprietary,OpenAI,04-2023,https://openai.com/api/,70.0,50.0
|
7 |
+
892.103443075417,1187.2232337574437,910.4756962790046,784.9502063587175,795,12,Llama 3.1 405B Instruct Turbo,Llama 3.1 405B Instruct Turbo,Proprietary,Meta,-,https://ai.meta.com/blog/meta-llama-3-1/,-,-
|
8 |
+
938.6542846619731,1163.070502656141,956.5326574132677,830.9395648888644,817,7,Gemma 2 27B,Gemma 2 27B,Proprietary,Google,-,https://blog.google/technology/developers/google-gemma-2/,-,-
|
9 |
+
961.3132703667028,1171.367198143491,981.2249348395449,853.9627722960136,761,4,YandexGPT Experimental,YandexGPT Experimental,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-3,45.2,35.2
|
10 |
+
1002.1966955839318,1278.607587313062,1026.6023400892482,892.9881182349854,472,1,Claude 3 Haiku,Claude 3 Haiku,Proprietary,Anthropic,03-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-family,-,-
|
11 |
+
954.3499146241044,1237.1096945986797,976.6705044404565,848.1229059834299,472,4,saiga_llama3_70b,saiga_llama3_70b,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_llama3_70b_sft_m1_d5_abliterated_awq_4bit,-,-
|
12 |
+
1068.753149420043,1193.431124270226,1089.7256900690663,961.7183024005114,650,1,Qwen 2 Instruct (72B),Qwen 2 Instruct (72B),Open Source,Qwen,12-2023,https://llama.meta.com/llama3/,-,-
|
13 |
+
1018.8725795421834,1201.604789220443,1038.0471858098285,909.761905160084,683,1,Llama 3.1 70B Instruct Turbo,Llama 3.1 70B Instruct Turbo,Proprietary,Meta,-,https://ai.meta.com/blog/meta-llama-3-1/,-,-
|
14 |
+
947.2465325678799,1204.8080726635785,967.6962660991635,840.9325115565997,662,6,LLaMA-3 Chat (70B),LLaMA-3 Chat (70B),Proprietary,Meta,12-2023,https://llama.meta.com/llama3/,-,-
|
15 |
+
1020.7781976589234,1241.4480538764974,1045.1491832970908,913.2792086136737,478,1,YandexGPT 3 Pro,YandexGPT 3 Pro,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-3,65.2,45.2
|
16 |
+
979.1016833538558,1227.9057184379915,1004.9739078756248,871.2268457332833,592,2,gpt-3.5-turbo-0125,gpt-3.5-turbo-0125,Proprietary,OpenAI,09-2021,https://openai.com/api/,65.2,45.2
|
17 |
+
947.1974850841607,1275.1879081556872,973.8868901376999,838.7333160381975,372,4,T-lite-instruct-0.1,T-lite-instruct-0.1,Open Source,t-bank-ai,In training,https://huggingface.co/AnatoliiPotapov/T-lite-instruct-0.1,-,-
|
18 |
+
932.3414984331068,1186.8528437389484,952.9114084922326,823.9194558753339,728,7,saiga_llama3_8b_v7,saiga_llama3_8b_v7,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_llama3_8b,-,-
|
19 |
+
993.3717309470745,1153.4480847064751,1012.6095760433378,885.4226198248776,740,1,GigaChat-Pro 4.0.26.8,GigaChat-Pro 4.0.26.8,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|
20 |
+
1039.5729042528817,1218.87234324006,1062.8321471985744,926.6228425875917,488,1,saiga_phi3_medium,saiga_phi3_medium,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_phi3_medium_sft_m1_d2_kto_m5_d7,-,-
|
21 |
+
993.2468606050761,1180.5507491178575,1012.9642504117858,884.6140154936015,791,1,LLaMA-3 Chat (8B),LLaMA-3 Chat (8B),Proprietary,Meta,03-2023,https://llama.meta.com/llama3/,-,-
|
22 |
+
1074.2023896278488,1134.305469980829,1094.514886992407,969.4351042979471,886,1,Llama 3.1 8B Instruct Turbo,Llama 3.1 8B Instruct Turbo,Proprietary,Meta,-,https://ai.meta.com/blog/meta-llama-3-1/,-,-
|
23 |
+
1090.8592801570258,1140.3396239214605,1109.5820557811471,984.6181128879861,930,1,Vikhrmodels/it-5.2-fp16-cp,Vikhrmodels/it-5.2-fp16-cp,Open Source,Vikhrmodels,In training,https://huggingface.co/Vikhrmodels/it-5.2-fp16-cp,-,-
|
24 |
+
1117.0189539475066,1170.6680998872027,1136.6808066769574,1010.1171845107525,908,1,GigaChat-Pro 2.2.25.3,GigaChat-Pro 2.2.25.3,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|
25 |
+
1080.0632142443226,1250.7374533620869,1101.998696996777,969.5413704610022,664,1,GigaChat 4.0.26.8,GigaChat 4.0.26.8,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|
26 |
+
1023.5502336779675,1233.5530647127014,1046.6585304493597,911.4432496338418,543,1,YandexGPT 3 Lite,YandexGPT 3 Lite,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-3,45.2,35.2
|
27 |
+
930.8355157487584,1299.5274174541444,959.0654811276472,817.9483047283851,257,7,saiga_llama3_8b_v6,saiga_llama3_8b_v6,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_llama3_8b,-,-
|
28 |
+
970.60130272153,1303.4941408237394,996.4235752732167,857.2591240188583,446,3,GigaChat 3.1.25.3,GigaChat 3.1.25.3,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|
29 |
+
960.1369134796026,1215.3897603683151,981.8076801143751,850.9384856108709,496,4,GigaChat-Plus 3.1.25.3,GigaChat-Plus 3.1.25.3,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|
requirements.txt
ADDED
Binary file (2.2 kB). View file
|
|
theme.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"theme": {"text_size": "20px", "background_fill_primary": "white", "background_fill_primary_dark": "*neutral_950", "background_fill_secondary": "*neutral_50", "background_fill_secondary_dark": "*neutral_900", "block_background_fill": "*background_fill_primary", "block_background_fill_dark": "*neutral_800", "block_border_color": "*border_color_primary", "block_border_color_dark": "*border_color_primary", "block_border_width": "1px", "block_border_width_dark": "1px", "block_info_text_color": "*body_text_color_subdued", "block_info_text_color_dark": "*body_text_color_subdued", "block_info_text_size": "*text_sm", "block_info_text_weight": "400", "block_label_background_fill": "*background_fill_primary", "block_label_background_fill_dark": "*background_fill_secondary", "block_label_border_color": "*border_color_primary", "block_label_border_color_dark": "*border_color_primary", "block_label_border_width": "1px", "block_label_border_width_dark": "1px", "block_label_margin": "0", "block_label_padding": "*spacing_sm *spacing_lg", "block_label_radius": "calc(*radius_lg - 1px) 0 calc(*radius_lg - 1px) 0", "block_label_right_radius": "0 calc(*radius_lg - 1px) 0 calc(*radius_lg - 1px)", "block_label_shadow": "*block_shadow", "block_label_text_color": "*neutral_500", "block_label_text_color_dark": "*neutral_200", "block_label_text_size": "*text_sm", "block_label_text_weight": "400", "block_padding": "*spacing_xl calc(*spacing_xl + 2px)", "block_radius": "*radius_lg", "block_shadow": "none", "block_shadow_dark": "none", "block_title_background_fill": "none", "block_title_background_fill_dark": "none", "block_title_border_color": "none", "block_title_border_color_dark": "none", "block_title_border_width": "0px", "block_title_border_width_dark": "0px", "block_title_padding": "0", "block_title_radius": "none", "block_title_text_color": "*neutral_500", "block_title_text_color_dark": "*neutral_200", "block_title_text_size": "*text_md", "block_title_text_weight": "400", "body_background_fill": "*background_fill_primary", "body_background_fill_dark": "*background_fill_primary", "body_text_color": "*neutral_700", "body_text_color_dark": "*neutral_200", "body_text_color_subdued": "*neutral_400", "body_text_color_subdued_dark": "*neutral_500", "body_text_size": "*text_md", "body_text_weight": "400", "border_color_accent": "*primary_300", "border_color_accent_dark": "*neutral_600", "border_color_primary": "*neutral_200", "border_color_primary_dark": "*neutral_700", "button_border_width": "*input_border_width", "button_border_width_dark": "*input_border_width", "button_cancel_background_fill": "*button_secondary_background_fill", "button_cancel_background_fill_dark": "*button_secondary_background_fill", "button_cancel_background_fill_hover": "*button_cancel_background_fill", "button_cancel_background_fill_hover_dark": "*button_cancel_background_fill", "button_cancel_border_color": "*button_secondary_border_color", "button_cancel_border_color_dark": "*button_secondary_border_color", "button_cancel_border_color_hover": "*button_cancel_border_color", "button_cancel_border_color_hover_dark": "*button_cancel_border_color", "button_cancel_text_color": "*button_secondary_text_color", "button_cancel_text_color_dark": "*button_secondary_text_color", "button_cancel_text_color_hover": "*button_cancel_text_color", "button_cancel_text_color_hover_dark": "*button_cancel_text_color", "button_large_padding": "*spacing_lg calc(2 * *spacing_lg)", "button_large_radius": "*radius_lg", "button_large_text_size": "*text_lg", "button_large_text_weight": "500", "button_primary_background_fill": "*primary_200", "button_primary_background_fill_dark": "*primary_700", "button_primary_background_fill_hover": "*button_primary_background_fill", "button_primary_background_fill_hover_dark": "*button_primary_background_fill", "button_primary_border_color": "*primary_200", "button_primary_border_color_dark": "*primary_600", "button_primary_border_color_hover": "*button_primary_border_color", "button_primary_border_color_hover_dark": "*button_primary_border_color", "button_primary_text_color": "*primary_600", "button_primary_text_color_dark": "white", "button_primary_text_color_hover": "*button_primary_text_color", "button_primary_text_color_hover_dark": "*button_primary_text_color", "button_secondary_background_fill": "*neutral_200", "button_secondary_background_fill_dark": "*neutral_600", "button_secondary_background_fill_hover": "*neutral_300", "button_secondary_background_fill_hover_dark": "*neutral_500", "button_secondary_border_color": "*neutral_200", "button_secondary_border_color_dark": "*neutral_600", "button_secondary_border_color_hover": "*button_secondary_border_color", "button_secondary_border_color_hover_dark": "*button_secondary_border_color", "button_secondary_text_color": "*neutral_700", "button_secondary_text_color_dark": "white", "button_secondary_text_color_hover": "*button_secondary_text_color", "button_secondary_text_color_hover_dark": "*button_secondary_text_color", "button_shadow": "none", "button_shadow_active": "none", "button_shadow_hover": "none", "button_small_padding": "*spacing_sm calc(2 * *spacing_sm)", "button_small_radius": "*radius_lg", "button_small_text_size": "*text_md", "button_small_text_weight": "400", "button_transition": "background-color 0.2s ease", "checkbox_background_color": "*background_fill_primary", "checkbox_background_color_dark": "*neutral_800", "checkbox_background_color_focus": "*checkbox_background_color", "checkbox_background_color_focus_dark": "*checkbox_background_color", "checkbox_background_color_hover": "*checkbox_background_color", "checkbox_background_color_hover_dark": "*checkbox_background_color", "checkbox_background_color_selected": "*secondary_600", "checkbox_background_color_selected_dark": "*secondary_600", "checkbox_border_color": "*neutral_300", "checkbox_border_color_dark": "*neutral_700", "checkbox_border_color_focus": "*secondary_500", "checkbox_border_color_focus_dark": "*secondary_500", "checkbox_border_color_hover": "*neutral_300", "checkbox_border_color_hover_dark": "*neutral_600", "checkbox_border_color_selected": "*secondary_600", "checkbox_border_color_selected_dark": "*secondary_600", "checkbox_border_radius": "*radius_sm", "checkbox_border_width": "*input_border_width", "checkbox_border_width_dark": "*input_border_width", "checkbox_check": "url(\"data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3cpath d='M12.207 4.793a1 1 0 010 1.414l-5 5a1 1 0 01-1.414 0l-2-2a1 1 0 011.414-1.414L6.5 9.086l4.293-4.293a1 1 0 011.414 0z'/%3e%3c/svg%3e\")", "checkbox_label_background_fill": "*button_secondary_background_fill", "checkbox_label_background_fill_dark": "*button_secondary_background_fill", "checkbox_label_background_fill_hover": "*button_secondary_background_fill_hover", "checkbox_label_background_fill_hover_dark": "*button_secondary_background_fill_hover", "checkbox_label_background_fill_selected": "*checkbox_label_background_fill", "checkbox_label_background_fill_selected_dark": "*checkbox_label_background_fill", "checkbox_label_border_color": "*border_color_primary", "checkbox_label_border_color_dark": "*border_color_primary", "checkbox_label_border_color_hover": "*checkbox_label_border_color", "checkbox_label_border_color_hover_dark": "*checkbox_label_border_color", "checkbox_label_border_width": "*input_border_width", "checkbox_label_border_width_dark": "*input_border_width", "checkbox_label_gap": "*spacing_lg", "checkbox_label_padding": "*spacing_md calc(2 * *spacing_md)", "checkbox_label_shadow": "none", "checkbox_label_text_color": "*body_text_color", "checkbox_label_text_color_dark": "*body_text_color", "checkbox_label_text_color_selected": "*checkbox_label_text_color", "checkbox_label_text_color_selected_dark": "*checkbox_label_text_color", "checkbox_label_text_size": "*text_md", "checkbox_label_text_weight": "400", "checkbox_shadow": "*input_shadow", "color_accent": "*primary_500", "color_accent_soft": "*primary_50", "color_accent_soft_dark": "*neutral_700", "container_radius": "*radius_lg", "embed_radius": "*radius_md", "error_background_fill": "#fee2e2", "error_background_fill_dark": "*background_fill_primary", "error_border_color": "#fecaca", "error_border_color_dark": "*border_color_primary", "error_border_width": "1px", "error_border_width_dark": "1px", "error_text_color": "#ef4444", "error_text_color_dark": "#ef4444", "form_gap_width": "0px", "input_background_fill": "*neutral_100", "input_background_fill_dark": "*neutral_700", "input_background_fill_focus": "*secondary_500", "input_background_fill_focus_dark": "*secondary_600", "input_background_fill_hover": "*input_background_fill", "input_background_fill_hover_dark": "*input_background_fill", "input_border_color": "*border_color_primary", "input_border_color_dark": "*border_color_primary", "input_border_color_focus": "*secondary_300", "input_border_color_focus_dark": "*neutral_700", "input_border_color_hover": "*input_border_color", "input_border_color_hover_dark": "*input_border_color", "input_border_width": "0px", "input_border_width_dark": "0px", "input_padding": "*spacing_xl", "input_placeholder_color": "*neutral_400", "input_placeholder_color_dark": "*neutral_500", "input_radius": "*radius_lg", "input_shadow": "none", "input_shadow_dark": "none", "input_shadow_focus": "*input_shadow", "input_shadow_focus_dark": "*input_shadow", "input_text_size": "*text_md", "input_text_weight": "400", "layout_gap": "*spacing_xxl", "link_text_color": "*secondary_600", "link_text_color_active": "*secondary_600", "link_text_color_active_dark": "*secondary_500", "link_text_color_dark": "*secondary_500", "link_text_color_hover": "*secondary_700", "link_text_color_hover_dark": "*secondary_400", "link_text_color_visited": "*secondary_500", "link_text_color_visited_dark": "*secondary_600", "loader_color": "*color_accent", "loader_color_dark": "*color_accent", "name": "base", "neutral_100": "#f5f5f4", "neutral_200": "#e7e5e4", "neutral_300": "#d6d3d1", "neutral_400": "#a8a29e", "neutral_50": "#fafaf9", "neutral_500": "#78716c", "neutral_600": "#57534e", "neutral_700": "#44403c", "neutral_800": "#292524", "neutral_900": "#1c1917", "neutral_950": "#0f0e0d", "panel_background_fill": "*background_fill_secondary", "panel_background_fill_dark": "*background_fill_secondary", "panel_border_color": "*border_color_primary", "panel_border_color_dark": "*border_color_primary", "panel_border_width": "0", "panel_border_width_dark": "0", "primary_100": "#e0f2fe", "primary_200": "#bae6fd", "primary_300": "#7dd3fc", "primary_400": "#38bdf8", "primary_50": "#f0f9ff", "primary_500": "#0ea5e9", "primary_600": "#0284c7", "primary_700": "#0369a1", "primary_800": "#075985", "primary_900": "#0c4a6e", "primary_950": "#0b4165", "prose_header_text_weight": "500", "prose_text_size": "*text_md", "prose_text_weight": "400", "radio_circle": "url(\"data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3ccircle cx='8' cy='8' r='3'/%3e%3c/svg%3e\")", "radius_lg": "3px", "radius_md": "3px", "radius_sm": "3px", "radius_xl": "3px", "radius_xs": "3px", "radius_xxl": "3px", "radius_xxs": "3px", "secondary_100": "#e0f2fe", "secondary_200": "#bae6fd", "secondary_300": "#7dd3fc", "secondary_400": "#38bdf8", "secondary_50": "#f0f9ff", "secondary_500": "#0ea5e9", "secondary_600": "#0284c7", "secondary_700": "#0369a1", "secondary_800": "#075985", "secondary_900": "#0c4a6e", "secondary_950": "#0b4165", "section_header_text_size": "*text_md", "section_header_text_weight": "400", "shadow_drop": "rgba(0,0,0,0.05) 0px 1px 2px 0px", "shadow_drop_lg": "0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1)", "shadow_inset": "rgba(0,0,0,0.05) 0px 2px 4px 0px inset", "shadow_spread": "3px", "shadow_spread_dark": "1px", "slider_color": "*primary_600", "slider_color_dark": "*primary_600", "spacing_lg": "8px", "spacing_md": "6px", "spacing_sm": "4px", "spacing_xl": "10px", "spacing_xs": "2px", "spacing_xxl": "16px", "spacing_xxs": "1px", "stat_background_fill": "*primary_300", "stat_background_fill_dark": "*primary_500", "table_border_color": "*neutral_300", "table_border_color_dark": "*neutral_700", "table_even_background_fill": "white", "table_even_background_fill_dark": "*neutral_950", "table_odd_background_fill": "*neutral_50", "table_odd_background_fill_dark": "*neutral_900", "table_radius": "*radius_lg", "table_row_focus": "*color_accent_soft", "table_row_focus_dark": "*color_accent_soft", "text_lg": "20px", "text_md": "16px", "text_sm": "14px", "text_xl": "24px", "text_xs": "12px", "text_xxl": "28px", "text_xxs": "10px"}, "version": "0.0.1"}
|