Spaces:
				
			
			
	
			
			
					
		Running
		
			on 
			
			CPU Upgrade
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
			on 
			
			CPU Upgrade
	Add a baseline
Browse files
    	
        app.py
    CHANGED
    
    | @@ -1,13 +1,11 @@ | |
| 1 | 
             
            import os
         | 
| 2 | 
            -
            import shutil
         | 
| 3 | 
             
            import numpy as np
         | 
| 4 | 
             
            import gradio as gr
         | 
| 5 | 
             
            from huggingface_hub import Repository, HfApi
         | 
| 6 | 
             
            from transformers import AutoConfig
         | 
| 7 | 
             
            import json
         | 
| 8 | 
            -
            from apscheduler.schedulers.background import BackgroundScheduler
         | 
| 9 | 
             
            import pandas as pd
         | 
| 10 | 
            -
            import  | 
| 11 | 
             
            from utils import get_eval_results_dicts, make_clickable_model
         | 
| 12 |  | 
| 13 | 
             
            # clone / pull the lmeh eval data
         | 
| @@ -140,6 +138,19 @@ def get_leaderboard(): | |
| 140 | 
             
                    }
         | 
| 141 | 
             
                    all_data.append(gpt35_values)
         | 
| 142 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 143 | 
             
                df = pd.DataFrame.from_records(all_data)
         | 
| 144 | 
             
                df = df.sort_values(by=["Average ⬆️"], ascending=False)
         | 
| 145 | 
             
                df = df[COLS]
         | 
| @@ -323,7 +334,7 @@ We chose these benchmarks as they test a variety of reasoning and general knowle | |
| 323 |  | 
| 324 | 
             
                """
         | 
| 325 | 
             
                    )
         | 
| 326 | 
            -
                with gr.Accordion("Finished Evaluations", open=False):
         | 
| 327 | 
             
                    with gr.Row():
         | 
| 328 | 
             
                        finished_eval_table = gr.components.Dataframe(
         | 
| 329 | 
             
                            value=finished_eval_queue,
         | 
| @@ -331,7 +342,7 @@ We chose these benchmarks as they test a variety of reasoning and general knowle | |
| 331 | 
             
                            datatype=EVAL_TYPES,
         | 
| 332 | 
             
                            max_rows=5,
         | 
| 333 | 
             
                        )
         | 
| 334 | 
            -
                with gr.Accordion("Running Evaluation Queue", open=False):
         | 
| 335 | 
             
                    with gr.Row():
         | 
| 336 | 
             
                        running_eval_table = gr.components.Dataframe(
         | 
| 337 | 
             
                            value=running_eval_queue,
         | 
| @@ -340,7 +351,7 @@ We chose these benchmarks as they test a variety of reasoning and general knowle | |
| 340 | 
             
                            max_rows=5,
         | 
| 341 | 
             
                        )
         | 
| 342 |  | 
| 343 | 
            -
                with gr.Accordion("Pending Evaluation Queue", open=False):
         | 
| 344 | 
             
                    with gr.Row():
         | 
| 345 | 
             
                        pending_eval_table = gr.components.Dataframe(
         | 
| 346 | 
             
                            value=pending_eval_queue,
         | 
| @@ -378,6 +389,7 @@ We chose these benchmarks as they test a variety of reasoning and general knowle | |
| 378 |  | 
| 379 | 
             
                    with gr.Row():
         | 
| 380 | 
             
                        submit_button = gr.Button("Submit Eval")
         | 
|  | |
| 381 | 
             
                    with gr.Row():
         | 
| 382 | 
             
                        submission_result = gr.Markdown()
         | 
| 383 | 
             
                        submit_button.click(
         | 
|  | |
| 1 | 
             
            import os
         | 
|  | |
| 2 | 
             
            import numpy as np
         | 
| 3 | 
             
            import gradio as gr
         | 
| 4 | 
             
            from huggingface_hub import Repository, HfApi
         | 
| 5 | 
             
            from transformers import AutoConfig
         | 
| 6 | 
             
            import json
         | 
|  | |
| 7 | 
             
            import pandas as pd
         | 
| 8 | 
            +
            from content import CHANGELOG_TEXT
         | 
| 9 | 
             
            from utils import get_eval_results_dicts, make_clickable_model
         | 
| 10 |  | 
| 11 | 
             
            # clone / pull the lmeh eval data
         | 
|  | |
| 138 | 
             
                    }
         | 
| 139 | 
             
                    all_data.append(gpt35_values)
         | 
| 140 |  | 
| 141 | 
            +
                base_line = {
         | 
| 142 | 
            +
                        "Model": '<p>Baseline</p>',
         | 
| 143 | 
            +
                        "Revision": "N/A",
         | 
| 144 | 
            +
                        "8bit": None,
         | 
| 145 | 
            +
                        "Average ⬆️": 25.0,
         | 
| 146 | 
            +
                        "ARC (25-shot) ⬆️": 25.0,
         | 
| 147 | 
            +
                        "HellaSwag (10-shot) ⬆️": 25.0,
         | 
| 148 | 
            +
                        "MMLU (5-shot) ⬆️": 25.0,
         | 
| 149 | 
            +
                        "TruthfulQA (0-shot) ⬆️": 25.0,
         | 
| 150 | 
            +
                    }
         | 
| 151 | 
            +
                
         | 
| 152 | 
            +
                all_data.append(base_line)
         | 
| 153 | 
            +
                
         | 
| 154 | 
             
                df = pd.DataFrame.from_records(all_data)
         | 
| 155 | 
             
                df = df.sort_values(by=["Average ⬆️"], ascending=False)
         | 
| 156 | 
             
                df = df[COLS]
         | 
|  | |
| 334 |  | 
| 335 | 
             
                """
         | 
| 336 | 
             
                    )
         | 
| 337 | 
            +
                with gr.Accordion("✅ Finished Evaluations", open=False):
         | 
| 338 | 
             
                    with gr.Row():
         | 
| 339 | 
             
                        finished_eval_table = gr.components.Dataframe(
         | 
| 340 | 
             
                            value=finished_eval_queue,
         | 
|  | |
| 342 | 
             
                            datatype=EVAL_TYPES,
         | 
| 343 | 
             
                            max_rows=5,
         | 
| 344 | 
             
                        )
         | 
| 345 | 
            +
                with gr.Accordion("🔄 Running Evaluation Queue", open=False):
         | 
| 346 | 
             
                    with gr.Row():
         | 
| 347 | 
             
                        running_eval_table = gr.components.Dataframe(
         | 
| 348 | 
             
                            value=running_eval_queue,
         | 
|  | |
| 351 | 
             
                            max_rows=5,
         | 
| 352 | 
             
                        )
         | 
| 353 |  | 
| 354 | 
            +
                with gr.Accordion("⏳ Pending Evaluation Queue", open=False):
         | 
| 355 | 
             
                    with gr.Row():
         | 
| 356 | 
             
                        pending_eval_table = gr.components.Dataframe(
         | 
| 357 | 
             
                            value=pending_eval_queue,
         | 
|  | |
| 389 |  | 
| 390 | 
             
                    with gr.Row():
         | 
| 391 | 
             
                        submit_button = gr.Button("Submit Eval")
         | 
| 392 | 
            +
             | 
| 393 | 
             
                    with gr.Row():
         | 
| 394 | 
             
                        submission_result = gr.Markdown()
         | 
| 395 | 
             
                        submit_button.click(
         | 
 
			

