Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	add t4 to leaderboard (#30)
Browse files- add t4 to leaderboard (1b7fb055871b54c99cf75616570506f50c7e9322)
- fix readme (38a9948acfac829033f4aa926a80abb5fab74cc8)
- .gitignore +2 -1
- README.md +59 -1
- app.py +1 -0
- src/llm_perf.py +8 -3
    	
        .gitignore
    CHANGED
    
    | @@ -4,4 +4,5 @@ __pycache__/ | |
| 4 | 
             
            *ipynb
         | 
| 5 | 
             
            .vscode/
         | 
| 6 |  | 
| 7 | 
            -
            dataset/
         | 
|  | 
|  | |
| 4 | 
             
            *ipynb
         | 
| 5 | 
             
            .vscode/
         | 
| 6 |  | 
| 7 | 
            +
            dataset/
         | 
| 8 | 
            +
            .venv
         | 
    	
        README.md
    CHANGED
    
    | @@ -11,4 +11,62 @@ license: apache-2.0 | |
| 11 | 
             
            tags: [llm perf leaderboard, llm performance leaderboard, llm, performance, leaderboard]
         | 
| 12 | 
             
            ---
         | 
| 13 |  | 
| 14 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 11 | 
             
            tags: [llm perf leaderboard, llm performance leaderboard, llm, performance, leaderboard]
         | 
| 12 | 
             
            ---
         | 
| 13 |  | 
| 14 | 
            +
            # LLM-perf leaderboard
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            ## π About
         | 
| 17 | 
            +
            The π€ LLM-Perf Leaderboard ποΈ is a laderboard at the intersection of quality and performance.
         | 
| 18 | 
            +
            Its aim is to benchmark the performance (latency, throughput, memory & energy) 
         | 
| 19 | 
            +
            of Large Language Models (LLMs) with different hardwares, backends and optimizations 
         | 
| 20 | 
            +
            using [Optimum-Benhcmark](https://github.com/huggingface/optimum-benchmark).
         | 
| 21 | 
            +
             | 
| 22 | 
            +
            Anyone from the community can request a new base model or hardware/backend/optimization 
         | 
| 23 | 
            +
            configuration for automated benchmarking:
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            - Model evaluation requests should be made in the 
         | 
| 26 | 
            +
            [π€ Open LLM Leaderboard π
](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) ;
         | 
| 27 | 
            +
            we scrape the [list of canonical base models](https://github.com/huggingface/optimum-benchmark/blob/main/llm_perf/utils.py) from there.
         | 
| 28 | 
            +
            - Hardware/Backend/Optimization configuration requests should be made in the 
         | 
| 29 | 
            +
            [π€ LLM-Perf Leaderboard ποΈ](https://huggingface.co/spaces/optimum/llm-perf-leaderboard) or 
         | 
| 30 | 
            +
            [Optimum-Benhcmark](https://github.com/huggingface/optimum-benchmark) repository (where the code is hosted).
         | 
| 31 | 
            +
             | 
| 32 | 
            +
            ## βοΈ Details
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            - To avoid communication-dependent results, only one GPU is used.
         | 
| 35 | 
            +
            - Score is the average evaluation score obtained from the [π€ Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
         | 
| 36 | 
            +
            - LLMs are running on a singleton batch with a prompt size of 256 and generating a 64 tokens for at least 10 iterations and 10 seconds.
         | 
| 37 | 
            +
            - Energy consumption is measured in kWh using CodeCarbon and taking into consideration the GPU, CPU, RAM and location of the machine.
         | 
| 38 | 
            +
            - We measure three types of memory: Max Allocated Memory, Max Reserved Memory and Max Used Memory. The first two being reported by PyTorch and the last one being observed using PyNVML.
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            All of our benchmarks are ran by this single script
         | 
| 41 | 
            +
            [benchmark_cuda_pytorch.py](https://github.com/huggingface/optimum-benchmark/blob/llm-perf/llm-perf/benchmark_cuda_pytorch.py)
         | 
| 42 | 
            +
            using the power of [Optimum-Benhcmark](https://github.com/huggingface/optimum-benchmark) to garantee reproducibility and consistency.
         | 
| 43 | 
            +
             | 
| 44 | 
            +
            ## π How to run locally 
         | 
| 45 | 
            +
             | 
| 46 | 
            +
            To run the LLM-Perf Leaderboard locally on your machine, follow these steps:
         | 
| 47 | 
            +
             | 
| 48 | 
            +
            ### 1. Clone the Repository
         | 
| 49 | 
            +
             | 
| 50 | 
            +
            First, clone the repository to your local machine:
         | 
| 51 | 
            +
             | 
| 52 | 
            +
            ```bash
         | 
| 53 | 
            +
            git clone https://huggingface.co/spaces/optimum/llm-perf-leaderboard
         | 
| 54 | 
            +
            cd llm-perf-leaderboard
         | 
| 55 | 
            +
            ```
         | 
| 56 | 
            +
             | 
| 57 | 
            +
            ### 2. Install the Required Dependencies
         | 
| 58 | 
            +
             | 
| 59 | 
            +
            Install the necessary Python packages listed in the requirements.txt file:
         | 
| 60 | 
            +
            `pip install -r requirements.txt`
         | 
| 61 | 
            +
             | 
| 62 | 
            +
            ###  3. Run the Application
         | 
| 63 | 
            +
             | 
| 64 | 
            +
            You can run the Gradio application in one of the following ways:
         | 
| 65 | 
            +
            - Option 1: Using Python
         | 
| 66 | 
            +
            `python app.py`
         | 
| 67 | 
            +
            - Option 2: Using Gradio CLI (include hot-reload)
         | 
| 68 | 
            +
            `gradio app.py`
         | 
| 69 | 
            +
             | 
| 70 | 
            +
            ### 4. Access the Application
         | 
| 71 | 
            +
             | 
| 72 | 
            +
            Once the application is running, you can access it locally in your web browser at http://127.0.0.1:7860/
         | 
    	
        app.py
    CHANGED
    
    | @@ -18,6 +18,7 @@ from src.panel import ( | |
| 18 | 
             
            MACHINE_TO_HARDWARE = {
         | 
| 19 | 
             
                "1xA10": "A10-24GB-150W π₯οΈ",
         | 
| 20 | 
             
                "1xA100": "A100-80GB-275W π₯οΈ",
         | 
|  | |
| 21 | 
             
                # "1xH100": "H100-80GB-700W π₯οΈ",
         | 
| 22 | 
             
            }
         | 
| 23 |  | 
|  | |
| 18 | 
             
            MACHINE_TO_HARDWARE = {
         | 
| 19 | 
             
                "1xA10": "A10-24GB-150W π₯οΈ",
         | 
| 20 | 
             
                "1xA100": "A100-80GB-275W π₯οΈ",
         | 
| 21 | 
            +
                "1xT4": "T4-16GB-70W π₯οΈ",
         | 
| 22 | 
             
                # "1xH100": "H100-80GB-700W π₯οΈ",
         | 
| 23 | 
             
            }
         | 
| 24 |  | 
    	
        src/llm_perf.py
    CHANGED
    
    | @@ -4,6 +4,8 @@ import pandas as pd | |
| 4 |  | 
| 5 | 
             
            from .utils import process_kernels, process_quantizations
         | 
| 6 |  | 
|  | |
|  | |
| 7 | 
             
            COLUMNS_MAPPING = {
         | 
| 8 | 
             
                "config.name": "Experiment π§ͺ",
         | 
| 9 | 
             
                "config.backend.model": "Model π€",
         | 
| @@ -109,11 +111,14 @@ def processed_llm_perf_df(llm_perf_df): | |
| 109 |  | 
| 110 |  | 
| 111 | 
             
            def get_llm_perf_df(machine: str = "1xA10"):
         | 
| 112 | 
            -
                if os.path.exists( | 
| 113 | 
            -
                     | 
|  | |
|  | |
|  | |
| 114 | 
             
                else:
         | 
| 115 | 
             
                    llm_perf_df = get_raw_llm_perf_df(machine)
         | 
| 116 | 
             
                    llm_perf_df = processed_llm_perf_df(llm_perf_df)
         | 
| 117 | 
            -
                    llm_perf_df.to_csv(f"llm-perf-leaderboard-{machine}.csv", index=False)
         | 
| 118 |  | 
| 119 | 
             
                return llm_perf_df
         | 
|  | |
| 4 |  | 
| 5 | 
             
            from .utils import process_kernels, process_quantizations
         | 
| 6 |  | 
| 7 | 
            +
            DATASET_DIRECTORY = "dataset"
         | 
| 8 | 
            +
             | 
| 9 | 
             
            COLUMNS_MAPPING = {
         | 
| 10 | 
             
                "config.name": "Experiment π§ͺ",
         | 
| 11 | 
             
                "config.backend.model": "Model π€",
         | 
|  | |
| 111 |  | 
| 112 |  | 
| 113 | 
             
            def get_llm_perf_df(machine: str = "1xA10"):
         | 
| 114 | 
            +
                if not os.path.exists(DATASET_DIRECTORY):
         | 
| 115 | 
            +
                    os.makedirs(DATASET_DIRECTORY)
         | 
| 116 | 
            +
             | 
| 117 | 
            +
                if os.path.exists(f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv"):
         | 
| 118 | 
            +
                    llm_perf_df = pd.read_csv(f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv")
         | 
| 119 | 
             
                else:
         | 
| 120 | 
             
                    llm_perf_df = get_raw_llm_perf_df(machine)
         | 
| 121 | 
             
                    llm_perf_df = processed_llm_perf_df(llm_perf_df)
         | 
| 122 | 
            +
                    llm_perf_df.to_csv(f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv", index=False)
         | 
| 123 |  | 
| 124 | 
             
                return llm_perf_df
         | 

