Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -25,7 +25,7 @@ def make_leaderboard_md(elo_results): | |
| 25 | 
             
            - [MT-Bench](https://arxiv.org/abs/2306.05685) - a set of challenging multi-turn questions. We use GPT-4 to grade the model responses.
         | 
| 26 | 
             
            - [MMLU](https://arxiv.org/abs/2009.03300) (5-shot) - a test to measure a model's multitask accuracy on 57 tasks.
         | 
| 27 |  | 
| 28 | 
            -
            💻 Code: The Arena Elo ratings are computed by this [notebook]({notebook_url}). The MT-bench scores (single-answer grading on a scale of 10) are computed by [fastchat.llm_judge](https://github.com/lm-sys/FastChat/tree/main/fastchat/llm_judge). The MMLU scores are computed by [InstructEval](https://github.com/declare-lab/instruct-eval) and [Chain-of-Thought Hub](https://github.com/FranxYao/chain-of-thought-hub). Higher values are better for all benchmarks. Empty cells mean not available.
         | 
| 29 | 
             
            """
         | 
| 30 | 
             
                return leaderboard_md
         | 
| 31 |  | 
| @@ -233,6 +233,9 @@ Please note that you may see different orders from different ranking methods. Th | |
| 233 | 
             
                            "#### Figure 4: Average Win Rate Against All Other Models (Assuming Uniform Sampling and No Ties)"
         | 
| 234 | 
             
                        )
         | 
| 235 | 
             
                        plot_4 = gr.Plot(p4, show_label=False)
         | 
|  | |
|  | |
|  | |
| 236 | 
             
                return [md_1, plot_1, plot_2, plot_3, plot_4]
         | 
| 237 |  | 
| 238 | 
             
            block_css = """
         | 
| @@ -294,7 +297,6 @@ def build_demo(elo_results_file, leaderboard_table_file): | |
| 294 | 
             
                    leader_components = build_leaderboard_tab(
         | 
| 295 | 
             
                        elo_results_file, leaderboard_table_file
         | 
| 296 | 
             
                    )
         | 
| 297 | 
            -
                    gr.Markdown(acknowledgment_md)
         | 
| 298 |  | 
| 299 | 
             
                return demo
         | 
| 300 |  | 
|  | |
| 25 | 
             
            - [MT-Bench](https://arxiv.org/abs/2306.05685) - a set of challenging multi-turn questions. We use GPT-4 to grade the model responses.
         | 
| 26 | 
             
            - [MMLU](https://arxiv.org/abs/2009.03300) (5-shot) - a test to measure a model's multitask accuracy on 57 tasks.
         | 
| 27 |  | 
| 28 | 
            +
            💻 Code: The Arena Elo ratings are computed by this [notebook]({notebook_url}). The MT-bench scores (single-answer grading on a scale of 10) are computed by [fastchat.llm_judge](https://github.com/lm-sys/FastChat/tree/main/fastchat/llm_judge). The MMLU scores are computed by [InstructEval](https://github.com/declare-lab/instruct-eval) and [Chain-of-Thought Hub](https://github.com/FranxYao/chain-of-thought-hub). Higher values are better for all benchmarks. Empty cells mean not available. Last updated: Sept, 2023.
         | 
| 29 | 
             
            """
         | 
| 30 | 
             
                return leaderboard_md
         | 
| 31 |  | 
|  | |
| 233 | 
             
                            "#### Figure 4: Average Win Rate Against All Other Models (Assuming Uniform Sampling and No Ties)"
         | 
| 234 | 
             
                        )
         | 
| 235 | 
             
                        plot_4 = gr.Plot(p4, show_label=False)
         | 
| 236 | 
            +
             | 
| 237 | 
            +
                gr.Markdown(acknowledgment_md)
         | 
| 238 | 
            +
             | 
| 239 | 
             
                return [md_1, plot_1, plot_2, plot_3, plot_4]
         | 
| 240 |  | 
| 241 | 
             
            block_css = """
         | 
|  | |
| 297 | 
             
                    leader_components = build_leaderboard_tab(
         | 
| 298 | 
             
                        elo_results_file, leaderboard_table_file
         | 
| 299 | 
             
                    )
         | 
|  | |
| 300 |  | 
| 301 | 
             
                return demo
         | 
| 302 |  | 
 
			

