Link to discussion with custom url
Browse files
app.py
CHANGED
|
@@ -12,7 +12,7 @@ from transformers import AutoConfig
|
|
| 12 |
|
| 13 |
from content import *
|
| 14 |
from elo_utils import get_elo_plots, get_elo_results_dicts
|
| 15 |
-
from utils import get_eval_results_dicts, make_clickable_model
|
| 16 |
|
| 17 |
# clone / pull the lmeh eval data
|
| 18 |
H4_TOKEN = os.environ.get("H4_TOKEN", None)
|
|
@@ -25,7 +25,9 @@ api = HfApi()
|
|
| 25 |
|
| 26 |
|
| 27 |
def restart_space():
|
| 28 |
-
api.restart_space(
|
|
|
|
|
|
|
| 29 |
|
| 30 |
|
| 31 |
def get_all_requested_models(requested_models_dir):
|
|
@@ -203,7 +205,7 @@ def get_leaderboard_df():
|
|
| 203 |
def get_evaluation_queue_df():
|
| 204 |
if repo:
|
| 205 |
print("Pulling changes for the evaluation queue.")
|
| 206 |
-
repo.git_pull()
|
| 207 |
|
| 208 |
entries = [
|
| 209 |
entry
|
|
@@ -396,6 +398,9 @@ def search_table(df, query):
|
|
| 396 |
filtered_df = df[df["model_name_for_query"].str.contains(query, case=False)]
|
| 397 |
return filtered_df
|
| 398 |
|
|
|
|
|
|
|
|
|
|
| 399 |
|
| 400 |
custom_css = """
|
| 401 |
#changelog-text {
|
|
@@ -410,6 +415,10 @@ custom_css = """
|
|
| 410 |
font-size: 16px !important;
|
| 411 |
}
|
| 412 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 413 |
#citation-button span {
|
| 414 |
font-size: 16px !important;
|
| 415 |
}
|
|
@@ -452,7 +461,7 @@ table th:first-child {
|
|
| 452 |
}
|
| 453 |
|
| 454 |
.tab-buttons button {
|
| 455 |
-
font-size:
|
| 456 |
}
|
| 457 |
|
| 458 |
#scale-logo {
|
|
@@ -475,7 +484,7 @@ with demo:
|
|
| 475 |
gr.HTML(TITLE)
|
| 476 |
with gr.Row():
|
| 477 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
| 478 |
-
|
| 479 |
with gr.Row():
|
| 480 |
with gr.Column():
|
| 481 |
with gr.Accordion("π Citation", open=False):
|
|
@@ -488,8 +497,8 @@ with demo:
|
|
| 488 |
with gr.Accordion("β¨ CHANGELOG", open=False):
|
| 489 |
changelog = gr.Markdown(CHANGELOG_TEXT, elem_id="changelog-text")
|
| 490 |
|
| 491 |
-
with gr.Tabs(elem_classes="tab-buttons"):
|
| 492 |
-
with gr.TabItem("π LLM Benchmarks", elem_id="llm-benchmark-tab-table"):
|
| 493 |
with gr.Column():
|
| 494 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 495 |
with gr.Box(elem_id="search-bar-table-box"):
|
|
@@ -598,7 +607,7 @@ with demo:
|
|
| 598 |
submission_result,
|
| 599 |
)
|
| 600 |
with gr.TabItem(
|
| 601 |
-
"π§ββοΈ Human & GPT-4 Evaluations π€", elem_id="human-gpt-tab-table"
|
| 602 |
):
|
| 603 |
with gr.Row():
|
| 604 |
with gr.Column(scale=2):
|
|
@@ -623,7 +632,25 @@ with demo:
|
|
| 623 |
max_rows=5,
|
| 624 |
)
|
| 625 |
|
| 626 |
-
gr.Markdown(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 627 |
# with gr.Box():
|
| 628 |
# visualization_title = gr.HTML(VISUALIZATION_TITLE)
|
| 629 |
# with gr.Row():
|
|
|
|
| 12 |
|
| 13 |
from content import *
|
| 14 |
from elo_utils import get_elo_plots, get_elo_results_dicts
|
| 15 |
+
from utils import get_eval_results_dicts, make_clickable_model, get_window_url_params
|
| 16 |
|
| 17 |
# clone / pull the lmeh eval data
|
| 18 |
H4_TOKEN = os.environ.get("H4_TOKEN", None)
|
|
|
|
| 25 |
|
| 26 |
|
| 27 |
def restart_space():
|
| 28 |
+
api.restart_space(
|
| 29 |
+
repo_id="HuggingFaceH4/open_llm_leaderboard", token=H4_TOKEN
|
| 30 |
+
)
|
| 31 |
|
| 32 |
|
| 33 |
def get_all_requested_models(requested_models_dir):
|
|
|
|
| 205 |
def get_evaluation_queue_df():
|
| 206 |
if repo:
|
| 207 |
print("Pulling changes for the evaluation queue.")
|
| 208 |
+
# repo.git_pull()
|
| 209 |
|
| 210 |
entries = [
|
| 211 |
entry
|
|
|
|
| 398 |
filtered_df = df[df["model_name_for_query"].str.contains(query, case=False)]
|
| 399 |
return filtered_df
|
| 400 |
|
| 401 |
+
def change_tab(query_param):
|
| 402 |
+
if query_param == "{'tab': 'evaluation'}":
|
| 403 |
+
return gr.Tabs.update(selected=1)
|
| 404 |
|
| 405 |
custom_css = """
|
| 406 |
#changelog-text {
|
|
|
|
| 415 |
font-size: 16px !important;
|
| 416 |
}
|
| 417 |
|
| 418 |
+
#models-to-add-text {
|
| 419 |
+
font-size: 18px !important;
|
| 420 |
+
}
|
| 421 |
+
|
| 422 |
#citation-button span {
|
| 423 |
font-size: 16px !important;
|
| 424 |
}
|
|
|
|
| 461 |
}
|
| 462 |
|
| 463 |
.tab-buttons button {
|
| 464 |
+
font-size: 20px;
|
| 465 |
}
|
| 466 |
|
| 467 |
#scale-logo {
|
|
|
|
| 484 |
gr.HTML(TITLE)
|
| 485 |
with gr.Row():
|
| 486 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
| 487 |
+
|
| 488 |
with gr.Row():
|
| 489 |
with gr.Column():
|
| 490 |
with gr.Accordion("π Citation", open=False):
|
|
|
|
| 497 |
with gr.Accordion("β¨ CHANGELOG", open=False):
|
| 498 |
changelog = gr.Markdown(CHANGELOG_TEXT, elem_id="changelog-text")
|
| 499 |
|
| 500 |
+
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 501 |
+
with gr.TabItem("π LLM Benchmarks", elem_id="llm-benchmark-tab-table", id=0):
|
| 502 |
with gr.Column():
|
| 503 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 504 |
with gr.Box(elem_id="search-bar-table-box"):
|
|
|
|
| 607 |
submission_result,
|
| 608 |
)
|
| 609 |
with gr.TabItem(
|
| 610 |
+
"π§ββοΈ Human & GPT-4 Evaluations π€", elem_id="human-gpt-tab-table", id=1
|
| 611 |
):
|
| 612 |
with gr.Row():
|
| 613 |
with gr.Column(scale=2):
|
|
|
|
| 632 |
max_rows=5,
|
| 633 |
)
|
| 634 |
|
| 635 |
+
gr.Markdown(
|
| 636 |
+
"\* Results when the scores of 4 and 5 were treated as ties.",
|
| 637 |
+
elem_classes="markdown-text",
|
| 638 |
+
)
|
| 639 |
+
|
| 640 |
+
gr.Markdown(
|
| 641 |
+
"Let us know in [this discussion](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/65) which models we should add!",
|
| 642 |
+
elem_id="models-to-add-text",
|
| 643 |
+
)
|
| 644 |
+
|
| 645 |
+
|
| 646 |
+
|
| 647 |
+
dummy = gr.Textbox(visible=False)
|
| 648 |
+
demo.load(
|
| 649 |
+
change_tab,
|
| 650 |
+
dummy,
|
| 651 |
+
tabs,
|
| 652 |
+
_js=get_window_url_params,
|
| 653 |
+
)
|
| 654 |
# with gr.Box():
|
| 655 |
# visualization_title = gr.HTML(VISUALIZATION_TITLE)
|
| 656 |
# with gr.Row():
|
utils.py
CHANGED
|
@@ -139,3 +139,11 @@ def get_eval_results_dicts(is_public=True) -> List[Dict]:
|
|
| 139 |
eval_results = get_eval_results(is_public)
|
| 140 |
|
| 141 |
return [e.to_dict() for e in eval_results]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
eval_results = get_eval_results(is_public)
|
| 140 |
|
| 141 |
return [e.to_dict() for e in eval_results]
|
| 142 |
+
|
| 143 |
+
get_window_url_params = """
|
| 144 |
+
function(url_params) {
|
| 145 |
+
const params = new URLSearchParams(window.location.search);
|
| 146 |
+
url_params = Object.fromEntries(params);
|
| 147 |
+
return url_params;
|
| 148 |
+
}
|
| 149 |
+
"""
|