Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Link to discussion with custom url
Browse files
    	
        app.py
    CHANGED
    
    | @@ -12,7 +12,7 @@ from transformers import AutoConfig | |
| 12 |  | 
| 13 | 
             
            from content import *
         | 
| 14 | 
             
            from elo_utils import get_elo_plots, get_elo_results_dicts
         | 
| 15 | 
            -
            from utils import get_eval_results_dicts, make_clickable_model
         | 
| 16 |  | 
| 17 | 
             
            # clone / pull the lmeh eval data
         | 
| 18 | 
             
            H4_TOKEN = os.environ.get("H4_TOKEN", None)
         | 
| @@ -25,7 +25,9 @@ api = HfApi() | |
| 25 |  | 
| 26 |  | 
| 27 | 
             
            def restart_space():
         | 
| 28 | 
            -
                api.restart_space( | 
|  | |
|  | |
| 29 |  | 
| 30 |  | 
| 31 | 
             
            def get_all_requested_models(requested_models_dir):
         | 
| @@ -203,7 +205,7 @@ def get_leaderboard_df(): | |
| 203 | 
             
            def get_evaluation_queue_df():
         | 
| 204 | 
             
                if repo:
         | 
| 205 | 
             
                    print("Pulling changes for the evaluation queue.")
         | 
| 206 | 
            -
                    repo.git_pull()
         | 
| 207 |  | 
| 208 | 
             
                entries = [
         | 
| 209 | 
             
                    entry
         | 
| @@ -396,6 +398,9 @@ def search_table(df, query): | |
| 396 | 
             
                filtered_df = df[df["model_name_for_query"].str.contains(query, case=False)]
         | 
| 397 | 
             
                return filtered_df
         | 
| 398 |  | 
|  | |
|  | |
|  | |
| 399 |  | 
| 400 | 
             
            custom_css = """
         | 
| 401 | 
             
            #changelog-text {
         | 
| @@ -410,6 +415,10 @@ custom_css = """ | |
| 410 | 
             
                font-size: 16px !important;
         | 
| 411 | 
             
            }
         | 
| 412 |  | 
|  | |
|  | |
|  | |
|  | |
| 413 | 
             
            #citation-button span {
         | 
| 414 | 
             
                font-size: 16px !important;
         | 
| 415 | 
             
            }
         | 
| @@ -452,7 +461,7 @@ table th:first-child { | |
| 452 | 
             
            }
         | 
| 453 |  | 
| 454 | 
             
            .tab-buttons button {
         | 
| 455 | 
            -
                font-size:  | 
| 456 | 
             
            }
         | 
| 457 |  | 
| 458 | 
             
            #scale-logo {
         | 
| @@ -475,7 +484,7 @@ with demo: | |
| 475 | 
             
                gr.HTML(TITLE)
         | 
| 476 | 
             
                with gr.Row():
         | 
| 477 | 
             
                    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
         | 
| 478 | 
            -
             | 
| 479 | 
             
                with gr.Row():
         | 
| 480 | 
             
                    with gr.Column():
         | 
| 481 | 
             
                        with gr.Accordion("π Citation", open=False):
         | 
| @@ -488,8 +497,8 @@ with demo: | |
| 488 | 
             
                        with gr.Accordion("β¨ CHANGELOG", open=False):
         | 
| 489 | 
             
                            changelog = gr.Markdown(CHANGELOG_TEXT, elem_id="changelog-text")
         | 
| 490 |  | 
| 491 | 
            -
                with gr.Tabs(elem_classes="tab-buttons"):
         | 
| 492 | 
            -
                    with gr.TabItem("π LLM Benchmarks", elem_id="llm-benchmark-tab-table"):
         | 
| 493 | 
             
                        with gr.Column():
         | 
| 494 | 
             
                            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
         | 
| 495 | 
             
                            with gr.Box(elem_id="search-bar-table-box"):
         | 
| @@ -598,7 +607,7 @@ with demo: | |
| 598 | 
             
                                    submission_result,
         | 
| 599 | 
             
                                )
         | 
| 600 | 
             
                    with gr.TabItem(
         | 
| 601 | 
            -
                        "π§ββοΈ Human & GPT-4 Evaluations π€", elem_id="human-gpt-tab-table"
         | 
| 602 | 
             
                    ):
         | 
| 603 | 
             
                        with gr.Row():
         | 
| 604 | 
             
                            with gr.Column(scale=2):
         | 
| @@ -623,7 +632,25 @@ with demo: | |
| 623 | 
             
                            max_rows=5,
         | 
| 624 | 
             
                        )
         | 
| 625 |  | 
| 626 | 
            -
                        gr.Markdown( | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 627 | 
             
                    # with gr.Box():
         | 
| 628 | 
             
                    #     visualization_title = gr.HTML(VISUALIZATION_TITLE)
         | 
| 629 | 
             
                    #     with gr.Row():
         | 
|  | |
| 12 |  | 
| 13 | 
             
            from content import *
         | 
| 14 | 
             
            from elo_utils import get_elo_plots, get_elo_results_dicts
         | 
| 15 | 
            +
            from utils import get_eval_results_dicts, make_clickable_model, get_window_url_params
         | 
| 16 |  | 
| 17 | 
             
            # clone / pull the lmeh eval data
         | 
| 18 | 
             
            H4_TOKEN = os.environ.get("H4_TOKEN", None)
         | 
|  | |
| 25 |  | 
| 26 |  | 
| 27 | 
             
            def restart_space():
         | 
| 28 | 
            +
                api.restart_space(
         | 
| 29 | 
            +
                    repo_id="HuggingFaceH4/open_llm_leaderboard", token=H4_TOKEN
         | 
| 30 | 
            +
                )
         | 
| 31 |  | 
| 32 |  | 
| 33 | 
             
            def get_all_requested_models(requested_models_dir):
         | 
|  | |
| 205 | 
             
            def get_evaluation_queue_df():
         | 
| 206 | 
             
                if repo:
         | 
| 207 | 
             
                    print("Pulling changes for the evaluation queue.")
         | 
| 208 | 
            +
                    # repo.git_pull()
         | 
| 209 |  | 
| 210 | 
             
                entries = [
         | 
| 211 | 
             
                    entry
         | 
|  | |
| 398 | 
             
                filtered_df = df[df["model_name_for_query"].str.contains(query, case=False)]
         | 
| 399 | 
             
                return filtered_df
         | 
| 400 |  | 
| 401 | 
            +
            def change_tab(query_param):
         | 
| 402 | 
            +
                if query_param == "{'tab': 'evaluation'}":
         | 
| 403 | 
            +
                    return gr.Tabs.update(selected=1)
         | 
| 404 |  | 
| 405 | 
             
            custom_css = """
         | 
| 406 | 
             
            #changelog-text {
         | 
|  | |
| 415 | 
             
                font-size: 16px !important;
         | 
| 416 | 
             
            }
         | 
| 417 |  | 
| 418 | 
            +
            #models-to-add-text {
         | 
| 419 | 
            +
                font-size: 18px !important;
         | 
| 420 | 
            +
            }
         | 
| 421 | 
            +
             | 
| 422 | 
             
            #citation-button span {
         | 
| 423 | 
             
                font-size: 16px !important;
         | 
| 424 | 
             
            }
         | 
|  | |
| 461 | 
             
            }
         | 
| 462 |  | 
| 463 | 
             
            .tab-buttons button {
         | 
| 464 | 
            +
                font-size: 20px;
         | 
| 465 | 
             
            }
         | 
| 466 |  | 
| 467 | 
             
            #scale-logo {
         | 
|  | |
| 484 | 
             
                gr.HTML(TITLE)
         | 
| 485 | 
             
                with gr.Row():
         | 
| 486 | 
             
                    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
         | 
| 487 | 
            +
                
         | 
| 488 | 
             
                with gr.Row():
         | 
| 489 | 
             
                    with gr.Column():
         | 
| 490 | 
             
                        with gr.Accordion("π Citation", open=False):
         | 
|  | |
| 497 | 
             
                        with gr.Accordion("β¨ CHANGELOG", open=False):
         | 
| 498 | 
             
                            changelog = gr.Markdown(CHANGELOG_TEXT, elem_id="changelog-text")
         | 
| 499 |  | 
| 500 | 
            +
                with gr.Tabs(elem_classes="tab-buttons") as tabs:
         | 
| 501 | 
            +
                    with gr.TabItem("π LLM Benchmarks", elem_id="llm-benchmark-tab-table", id=0):
         | 
| 502 | 
             
                        with gr.Column():
         | 
| 503 | 
             
                            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
         | 
| 504 | 
             
                            with gr.Box(elem_id="search-bar-table-box"):
         | 
|  | |
| 607 | 
             
                                    submission_result,
         | 
| 608 | 
             
                                )
         | 
| 609 | 
             
                    with gr.TabItem(
         | 
| 610 | 
            +
                        "π§ββοΈ Human & GPT-4 Evaluations π€", elem_id="human-gpt-tab-table", id=1
         | 
| 611 | 
             
                    ):
         | 
| 612 | 
             
                        with gr.Row():
         | 
| 613 | 
             
                            with gr.Column(scale=2):
         | 
|  | |
| 632 | 
             
                            max_rows=5,
         | 
| 633 | 
             
                        )
         | 
| 634 |  | 
| 635 | 
            +
                        gr.Markdown(
         | 
| 636 | 
            +
                            "\* Results when the scores of 4 and 5 were treated as ties.",
         | 
| 637 | 
            +
                            elem_classes="markdown-text",
         | 
| 638 | 
            +
                        )
         | 
| 639 | 
            +
             | 
| 640 | 
            +
                        gr.Markdown(
         | 
| 641 | 
            +
                            "Let us know in [this discussion](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/65) which models we should add!",
         | 
| 642 | 
            +
                            elem_id="models-to-add-text",
         | 
| 643 | 
            +
                        )
         | 
| 644 | 
            +
             | 
| 645 | 
            +
                        
         | 
| 646 | 
            +
                    
         | 
| 647 | 
            +
                dummy = gr.Textbox(visible=False)
         | 
| 648 | 
            +
                demo.load(
         | 
| 649 | 
            +
                    change_tab,
         | 
| 650 | 
            +
                    dummy,
         | 
| 651 | 
            +
                    tabs,
         | 
| 652 | 
            +
                    _js=get_window_url_params,
         | 
| 653 | 
            +
                )
         | 
| 654 | 
             
                    # with gr.Box():
         | 
| 655 | 
             
                    #     visualization_title = gr.HTML(VISUALIZATION_TITLE)
         | 
| 656 | 
             
                    #     with gr.Row():
         | 
    	
        utils.py
    CHANGED
    
    | @@ -139,3 +139,11 @@ def get_eval_results_dicts(is_public=True) -> List[Dict]: | |
| 139 | 
             
                eval_results = get_eval_results(is_public)
         | 
| 140 |  | 
| 141 | 
             
                return [e.to_dict() for e in eval_results]
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 139 | 
             
                eval_results = get_eval_results(is_public)
         | 
| 140 |  | 
| 141 | 
             
                return [e.to_dict() for e in eval_results]
         | 
| 142 | 
            +
             | 
| 143 | 
            +
            get_window_url_params = """
         | 
| 144 | 
            +
                function(url_params) {
         | 
| 145 | 
            +
                    const params = new URLSearchParams(window.location.search);
         | 
| 146 | 
            +
                    url_params = Object.fromEntries(params);
         | 
| 147 | 
            +
                    return url_params;
         | 
| 148 | 
            +
                    }
         | 
| 149 | 
            +
                """
         | 

