inference-benchmarking-results-phi4-8000-tokens

Sleeping

App Files Files Community

loghugging25 commited on May 21

Commit

3ccce1a

1 Parent(s): c8d26ac

update

Browse files

Files changed (5) hide show

README.md +1 -3
dashboard/app.py +13 -192
results/{RedHatAI_phi-4-FP8-dynamic_2025-05-21-09-15-05.json → RedHatAI_phi-4-FP8-dynamic_2025-05-21-13-56-47.json} +164 -164
results/microsoft_phi-4_2025-05-21-12-47-52.json +0 -296
results/microsoft_phi-4_2025-05-21-13-17-26.json +0 -296

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Inference Fp8 Results
 emoji: 📊
 colorFrom: gray
 colorTo: green
@@ -9,8 +9,6 @@ app_file: dashboard/app.py
 pinned: false
 license: mit
-env:
-  DASHBOARD_FROM_RESULTS_DIR: results
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Inference Benchmarking Results Phi-4 (3000 Tokens)
 emoji: 📊
 colorFrom: gray
 colorTo: green
 pinned: false
 license: mit
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

dashboard/app.py CHANGED Viewed

@@ -3,14 +3,9 @@ from dataclasses import dataclass
 from typing import List
 import click
-import os
 import gradio as gr
 import pandas as pd
-import traceback
-import glob
-import json
 from parse_results import build_results
@@ -21,22 +16,8 @@ class PlotConfig:
     title: str
     percentiles: List[float] = None
-def check_file_exists(path, label=""):
-    if os.path.exists(path):
-        print(f"✅ {label} file exists: {path}")
-        print(f"   File size: {os.path.getsize(path)} bytes")
-        print(f"   Absolute path: {os.path.abspath(path)}")
-    else:
-        print(f"❌ {label} file NOT found: {path}")
-        print(f"   Current working directory: {os.getcwd()}")
-        print(f"   Directory contents: {os.listdir(os.path.dirname(path) if os.path.dirname(path) else '.')}")
 def run(from_results_dir, datasource, port):
-    print(f"💡 Debug - from_results_dir: {from_results_dir}")
-    print(f"💡 Debug - datasource: {datasource}")
-    print(f"💡 Debug - current directory: {os.getcwd()}")
     css = '''
     .summary span {
         font-size: 10px;
@@ -48,17 +29,17 @@ def run(from_results_dir, datasource, port):
     summary_desc = '''
     ## Summary
     This table shows the average of the metrics for each model and QPS rate.
     The metrics are:
-    * Inter token latency: Time to generate a new output token for each user querying the system.
       It translates as the “speed” perceived by the end-user. We aim for at least 300 words per minute (average reading speed), so ITL<150ms
-    * Time to First Token: Time the user has to wait before seeing the first token of its answer.
       Lower waiting time are essential for real-time interactions, less so for offline workloads.
     * End-to-end latency: The overall time the system took to generate the full response to the user.
     * Throughput: The number of tokens per second the system can generate across all requests
     * Successful requests: The number of requests the system was able to honor in the benchmark timeframe
-    * Error rate: The percentage of requests that ended up in error, as the system could not process them in time or failed to process them.
     '''
     df_bench = pd.DataFrame()
@@ -129,76 +110,17 @@ def run(from_results_dir, datasource, port):
         return res
     def load_datasource(datasource, fn):
-        print(f"💡 Debug - load_datasource called with: {datasource}")
         if datasource.startswith('file://'):
-            local_path = datasource[len('file://'):]
-            print(f"💡 Debug - Extracted local path: {local_path}")
-            check_file_exists(local_path, "Local")
-            return fn(local_path)
         elif datasource.startswith('s3://'):
             return fn(datasource)
         else:
-            # If no scheme is provided, assume it's a local path.
-            print(f"💡 Debug - Using path as-is: {datasource}")
-            check_file_exists(datasource, "Direct")
-            return fn(datasource)
-    parquet_file_to_load = None
     if from_results_dir is not None:
-        # If from_results_dir is specified, results are built into 'benchmarks.parquet'
-        # within that directory.
-        output_filename = 'benchmarks.parquet'
-        print(f"💡 Debug - Building results from directory: {from_results_dir}")
-        # Check if results directory exists
-        check_file_exists(from_results_dir, "Results directory")
-        # Create absolute path for results directory
-        abs_results_dir = os.path.abspath(from_results_dir)
-        print(f"💡 Debug - Absolute results directory: {abs_results_dir}")
-        # Create the results directory if it doesn't exist
-        if not os.path.exists(abs_results_dir):
-            print(f"💡 Debug - Creating results directory: {abs_results_dir}")
-            os.makedirs(abs_results_dir, exist_ok=True)
-        # Call build_results with absolute paths
-        full_output_path = os.path.join(abs_results_dir, output_filename)
-        print(f"💡 Debug - Expected output path: {full_output_path}")
-        build_results(abs_results_dir, output_filename, None)
-        # Check if the file was created
-        check_file_exists(full_output_path, "Generated parquet")
-        # The file to load is now in from_results_dir/output_filename
-        parquet_file_to_load = full_output_path
-    else:
-        # If not building from results_dir, use the provided datasource directly.
-        parquet_file_to_load = datasource
-    print(f"💡 Debug - Final parquet_file_to_load: {parquet_file_to_load}")
     # Load data
-    try:
-        df_bench = load_datasource(parquet_file_to_load, load_bench_results)
-        print(f"✅ Successfully loaded data with {len(df_bench)} rows")
-    except Exception as e:
-        print(f"❌ Error loading data: {str(e)}")
-        print(f"Stack trace: {traceback.format_exc()}")
-        # Create a minimal DataFrame to prevent further errors
-        df_bench = pd.DataFrame({
-            "model": ["error"],
-            "run_id": ["error"],
-            "rate": [0],
-            "inter_token_latency_ms_p90": [0],
-            "time_to_first_token_ms_p90": [0],
-            "e2e_latency_ms_p90": [0],
-            "token_throughput_secs": [0],
-            "successful_requests": [0],
-            "error_rate": [0]
-        })
     # Define metrics
     metrics = {
@@ -276,112 +198,11 @@ def run(from_results_dir, datasource, port):
 @click.command()
-@click.option('--from-results-dir', 'cli_from_results_dir', default=None, help='Load inference-benchmarker results from this directory. Overrides DASHBOARD_FROM_RESULTS_DIR.')
-@click.option('--datasource', 'cli_datasource', default='file://benchmarks.parquet', help='Load this Parquet file directly if not building from a results directory.')
 @click.option('--port', default=7860, help='Port to run the dashboard')
-def main(cli_from_results_dir, cli_datasource, port):
-    print("===== Starting Application =====")
-    # print(f"Environment variables: {os.environ}") # Already in user's code or logs
-    # Determine the directory from which to process JSON results
-    # Priority: 1. CLI option, 2. Env Var, 3. Default to 'results' dir
-    processing_dir = cli_from_results_dir
-    if processing_dir is None:
-        env_var_value = os.environ.get('DASHBOARD_FROM_RESULTS_DIR')
-        if env_var_value:
-            print(f"Using environment variable DASHBOARD_FROM_RESULTS_DIR='{env_var_value}' for processing.")
-            processing_dir = env_var_value
-        elif os.path.exists('results') and os.path.isdir('results'):
-            print(f"No --from-results-dir option or DASHBOARD_FROM_RESULTS_DIR env var. Defaulting to 'results' directory for processing as it exists.")
-            processing_dir = 'results'
-        else:
-            print(f"No directory specified for processing (no --from-results-dir, no DASHBOARD_FROM_RESULTS_DIR env var, and 'results' dir not found).")
-            # processing_dir remains None
-    path_to_load_by_run_function = None # This will be the path to the .parquet file
-    if processing_dir:
-        # A directory for processing JSONs has been determined.
-        # Use the existing logic to build/fallback and generate benchmarks.parquet.
-        output_filename = 'benchmarks.parquet'
-        abs_processing_dir = os.path.abspath(processing_dir)
-        print(f"💡 Debug - Will process JSONs from directory: {abs_processing_dir}")
-        check_file_exists(abs_processing_dir, "Source directory for JSONs")
-        # Ensure the directory exists (it might be 'results' or user-provided)
-        # build_results might expect the output directory to exist.
-        if not os.path.exists(abs_processing_dir):
-            print(f"💡 Debug - Creating directory for processing/output: {abs_processing_dir}")
-            os.makedirs(abs_processing_dir, exist_ok=True)
-        # The generated parquet file will be placed inside the abs_processing_dir
-        generated_parquet_filepath = os.path.join(abs_processing_dir, output_filename)
-        print(f"💡 Debug - Expected path for generated parquet file: {generated_parquet_filepath}")
-        try:
-            build_results(abs_processing_dir, output_filename, None) # output_filename is relative to abs_processing_dir
-            print("✅ Build results completed using build_results.")
-        except Exception as e_build:
-            print(f"❌ Error in build_results: {str(e_build)}")
-            print(f"Stack trace: {traceback.format_exc()}")
-            print("⚠️ Attempting fallback method: direct JSON processing")
-            try:
-                json_files = glob.glob(os.path.join(abs_processing_dir, "*.json"))
-                print(f"Found {len(json_files)} JSON files for fallback: {json_files}")
-                if not json_files:
-                    raise FileNotFoundError("Fallback: No JSON files found in results directory")
-                combined_data = []
-                for json_file in json_files:
-                    try:
-                        with open(json_file, 'r') as f:
-                            data = json.load(f)
-                        filename = os.path.basename(json_file)
-                        model_name_parts = filename.split('_')
-                        model_name = f"{model_name_parts[0]}_{model_name_parts[1]}" if len(model_name_parts) > 1 else model_name_parts[0]
-                        if 'benchmarks' in data:
-                            for benchmark in data['benchmarks']:
-                                benchmark['model'] = model_name
-                                benchmark['run_id'] = os.path.splitext(filename)[0]
-                                combined_data.append(benchmark)
-                        else:
-                            print(f"⚠️ Fallback: No 'benchmarks' key in {json_file}")
-                    except Exception as json_err:
-                        print(f"❌ Fallback: Error processing {json_file}: {str(json_err)}")
-                if combined_data:
-                    df_direct = pd.DataFrame(combined_data)
-                    df_direct.to_parquet(generated_parquet_filepath)
-                    print(f"✅ Created parquet file via fallback method: {generated_parquet_filepath}")
-                else:
-                    raise ValueError("Fallback: No data could be extracted from JSON files")
-            except Exception as e_fallback:
-                print(f"❌ Fallback method failed: {str(e_fallback)}")
-                print(f"Stack trace: {traceback.format_exc()}")
-        # After attempting to build/generate, check if the file exists
-        check_file_exists(generated_parquet_filepath, "Parquet file after build/fallback attempts")
-        if os.path.exists(generated_parquet_filepath):
-            path_to_load_by_run_function = generated_parquet_filepath
-        else:
-            print(f"❌ CRITICAL: Failed to generate or find parquet file at '{generated_parquet_filepath}' after all attempts.")
-            # path_to_load_by_run_function remains None here, will be handled below.
-    # If path_to_load_by_run_function is still None at this point
-    # (either because processing_dir was not set, or all generation attempts failed),
-    # default to the original cli_datasource.
-    if path_to_load_by_run_function is None:
-        print(f"⚠️ Defaulting to cli_datasource '{cli_datasource}' as parquet generation failed or was skipped.")
-        path_to_load_by_run_function = cli_datasource
-    print(f"💡 Final path to be loaded by run() function: '{path_to_load_by_run_function}'")
-    # Call run(). The first argument (from_results_dir for run()) is None because main handles processing.
-    # The second argument (datasource for run()) is the actual file path to load.
-    run(None, path_to_load_by_run_function, port)
 if __name__ == '__main__':

 from typing import List
 import click
 import gradio as gr
 import pandas as pd
 from parse_results import build_results
     title: str
     percentiles: List[float] = None
 def run(from_results_dir, datasource, port):
     css = '''
     .summary span {
         font-size: 10px;
     summary_desc = '''
     ## Summary
     This table shows the average of the metrics for each model and QPS rate.
     The metrics are:
+    * Inter token latency: Time to generate a new output token for each user querying the system.
       It translates as the “speed” perceived by the end-user. We aim for at least 300 words per minute (average reading speed), so ITL<150ms
+    * Time to First Token: Time the user has to wait before seeing the first token of its answer.
       Lower waiting time are essential for real-time interactions, less so for offline workloads.
     * End-to-end latency: The overall time the system took to generate the full response to the user.
     * Throughput: The number of tokens per second the system can generate across all requests
     * Successful requests: The number of requests the system was able to honor in the benchmark timeframe
+    * Error rate: The percentage of requests that ended up in error, as the system could not process them in time or failed to process them.
     '''
     df_bench = pd.DataFrame()
         return res
     def load_datasource(datasource, fn):
         if datasource.startswith('file://'):
+            return fn(datasource)
         elif datasource.startswith('s3://'):
             return fn(datasource)
         else:
+            raise ValueError(f"Unknown datasource: {datasource}")
     if from_results_dir is not None:
+        build_results(from_results_dir, 'benchmarks.parquet', None)
     # Load data
+    df_bench = load_datasource(datasource, load_bench_results)
     # Define metrics
     metrics = {
 @click.command()
+@click.option('--from-results-dir', default=None, help='Load inference-benchmarker results from a directory')
+@click.option('--datasource', default='file://benchmarks.parquet', help='Load a Parquet file already generated')
 @click.option('--port', default=7860, help='Port to run the dashboard')
+def main(from_results_dir, datasource, port):
+    run(from_results_dir, datasource, port)
 if __name__ == '__main__':

results/{RedHatAI_phi-4-FP8-dynamic_2025-05-21-09-15-05.json → RedHatAI_phi-4-FP8-dynamic_2025-05-21-13-56-47.json} RENAMED Viewed

@@ -12,22 +12,22 @@
 		],
 		"num_rates": 10,
 		"prompt_options": {
-			"num_tokens": 200,
-			"min_tokens": 180,
-			"max_tokens": 220,
 			"variance": 10
 		},
 		"decode_options": {
-			"num_tokens": 200,
-			"min_tokens": 180,
-			"max_tokens": 220,
 			"variance": 10
 		},
 		"tokenizer": "RedHatAI/phi-4-FP8-dynamic",
 		"model_name": "phi-4",
 		"profile": null,
 		"meta": null,
-		"run_id": "vLLM: RedHatAI/phi-4-FP8-dynamic (200 tokens)"
 	},
 	"results": [
 		{
@@ -38,43 +38,43 @@
 				"duration_secs": 30,
 				"rate": null
 			},
-			"total_requests": 7,
-			"total_tokens": 1401,
-			"token_throughput_secs": 41.207311909734074,
-			"duration_ms": 33998,
 			"time_to_first_token_ms": {
-				"p50": 30.74,
-				"p60": 30.848,
-				"p70": 31.032,
-				"p80": 31.367,
-				"p90": 600.369,
-				"p95": 1027.036,
-				"p99": 1368.37,
-				"avg": 233.964
 			},
 			"inter_token_latency_ms": {
-				"p50": 23.217,
-				"p60": 23.222,
-				"p70": 23.228,
-				"p80": 23.236,
-				"p90": 23.248,
-				"p95": 23.254,
-				"p99": 23.26,
-				"avg": 23.213
 			},
 			"failed_requests": 0,
-			"successful_requests": 7,
-			"request_rate": 0.2058894956232252,
-			"total_tokens_sent": 1400,
 			"e2e_latency_ms": {
-				"p50": 4743.409,
-				"p60": 4751.971,
-				"p70": 4775.205,
-				"p80": 4827.785,
-				"p90": 5318.839,
-				"p95": 5673.985,
-				"p99": 5958.102,
-				"avg": 4856.823
 			}
 		},
 		{
@@ -85,43 +85,43 @@
 				"duration_secs": 120,
 				"rate": 1.0
 			},
-			"total_requests": 115,
-			"total_tokens": 22163,
-			"token_throughput_secs": 186.64991064360598,
-			"duration_ms": 118741,
 			"time_to_first_token_ms": {
-				"p50": 43.445,
-				"p60": 45.341,
-				"p70": 47.407,
-				"p80": 50.324,
-				"p90": 53.509,
-				"p95": 54.94,
-				"p99": 57.022,
-				"avg": 43.314
 			},
 			"inter_token_latency_ms": {
-				"p50": 24.082,
-				"p60": 24.1,
-				"p70": 24.124,
-				"p80": 24.146,
-				"p90": 24.21,
-				"p95": 24.288,
-				"p99": 24.376,
-				"avg": 24.09
 			},
 			"failed_requests": 0,
-			"successful_requests": 115,
-			"request_rate": 0.9684943249566704,
-			"total_tokens_sent": 23000,
 			"e2e_latency_ms": {
-				"p50": 4814.201,
-				"p60": 4873.26,
-				"p70": 4947.365,
-				"p80": 5011.934,
-				"p90": 5104.903,
-				"p95": 5182.844,
-				"p99": 5309.301,
-				"avg": 4665.197
 			}
 		},
 		{
@@ -132,43 +132,43 @@
 				"duration_secs": 120,
 				"rate": 10.0
 			},
-			"total_requests": 1149,
-			"total_tokens": 217686,
-			"token_throughput_secs": 1837.4411468828155,
-			"duration_ms": 118472,
 			"time_to_first_token_ms": {
-				"p50": 55.249,
-				"p60": 57.796,
-				"p70": 60.296,
-				"p80": 63.162,
-				"p90": 66.14,
-				"p95": 67.799,
-				"p99": 70.85,
-				"avg": 55.52
 			},
 			"inter_token_latency_ms": {
-				"p50": 28.914,
-				"p60": 28.973,
-				"p70": 29.029,
-				"p80": 29.089,
-				"p90": 29.168,
-				"p95": 29.211,
-				"p99": 29.331,
-				"avg": 28.737
 			},
 			"failed_requests": 0,
-			"successful_requests": 1149,
-			"request_rate": 9.698464199665366,
-			"total_tokens_sent": 229800,
 			"e2e_latency_ms": {
-				"p50": 5707.118,
-				"p60": 5793.95,
-				"p70": 5885.254,
-				"p80": 5983.201,
-				"p90": 6126.889,
-				"p95": 6219.476,
-				"p99": 6386.803,
-				"avg": 5477.946
 			}
 		},
 		{
@@ -179,43 +179,43 @@
 				"duration_secs": 120,
 				"rate": 30.0
 			},
-			"total_requests": 1889,
-			"total_tokens": 348708,
-			"token_throughput_secs": 2911.7479692043544,
-			"duration_ms": 119758,
 			"time_to_first_token_ms": {
-				"p50": 22192.744,
-				"p60": 26837.194,
-				"p70": 29205.612,
-				"p80": 33069.312,
-				"p90": 35968.562,
-				"p95": 36825.858,
-				"p99": 37298.867,
-				"avg": 19829.052
 			},
 			"inter_token_latency_ms": {
-				"p50": 64.987,
-				"p60": 66.093,
-				"p70": 67.344,
-				"p80": 72.108,
-				"p90": 90.713,
-				"p95": 98.38,
-				"p99": 177.348,
-				"avg": 69.926
 			},
 			"failed_requests": 0,
-			"successful_requests": 1889,
-			"request_rate": 15.77334593363796,
-			"total_tokens_sent": 377800,
 			"e2e_latency_ms": {
-				"p50": 33837.749,
-				"p60": 38364.805,
-				"p70": 42612.972,
-				"p80": 45779.935,
-				"p90": 48249.655,
-				"p95": 49268.594,
-				"p99": 50884.661,
-				"avg": 32263.266
 			}
 		},
 		{
@@ -226,48 +226,48 @@
 				"duration_secs": 120,
 				"rate": 100.0
 			},
-			"total_requests": 1923,
-			"total_tokens": 355495,
-			"token_throughput_secs": 2963.3510051149824,
-			"duration_ms": 119963,
 			"time_to_first_token_ms": {
-				"p50": 30849.07,
-				"p60": 32647.17,
-				"p70": 35695.762,
-				"p80": 36657.309,
-				"p90": 37063.893,
-				"p95": 37265.804,
-				"p99": 37693.244,
-				"avg": 25983.203
 			},
 			"inter_token_latency_ms": {
-				"p50": 64.756,
-				"p60": 66.434,
-				"p70": 68.803,
-				"p80": 83.204,
-				"p90": 96.295,
-				"p95": 103.874,
-				"p99": 163.895,
-				"avg": 73.033
 			},
 			"failed_requests": 0,
-			"successful_requests": 1923,
-			"request_rate": 16.02982878194099,
-			"total_tokens_sent": 384600,
 			"e2e_latency_ms": {
-				"p50": 44432.763,
-				"p60": 46273.082,
-				"p70": 47729.904,
-				"p80": 48714.768,
-				"p90": 49917.33,
-				"p95": 50686.527,
-				"p99": 51992.951,
-				"avg": 38685.294
 			}
 		}
 	],
-	"start_time": "2025-05-21T09:04:59.479961191+00:00",
-	"end_time": "2025-05-21T09:15:05.115323148+00:00",
 	"system": {
 		"cpu": [
 			"AMD Ryzen 7 9800X3D 8-Core Processor cpu0@4699MHz",

 		],
 		"num_rates": 10,
 		"prompt_options": {
+			"num_tokens": 8000,
+			"min_tokens": 7980,
+			"max_tokens": 8020,
 			"variance": 10
 		},
 		"decode_options": {
+			"num_tokens": 8000,
+			"min_tokens": 7980,
+			"max_tokens": 8020,
 			"variance": 10
 		},
 		"tokenizer": "RedHatAI/phi-4-FP8-dynamic",
 		"model_name": "phi-4",
 		"profile": null,
 		"meta": null,
+		"run_id": "vLLM: RedHatAI/phi-4-FP8-dynamic (8000 tokens)"
 	},
 	"results": [
 		{
 				"duration_secs": 30,
 				"rate": null
 			},
+			"total_requests": 2,
+			"total_tokens": 1643,
+			"token_throughput_secs": 38.490013255851395,
+			"duration_ms": 42686,
 			"time_to_first_token_ms": {
+				"p50": 1276.801,
+				"p60": 1388.913,
+				"p70": 1501.026,
+				"p80": 1613.139,
+				"p90": 1725.252,
+				"p95": 1781.309,
+				"p99": 1826.154,
+				"avg": 1276.801
 			},
 			"inter_token_latency_ms": {
+				"p50": 24.424,
+				"p60": 24.432,
+				"p70": 24.44,
+				"p80": 24.448,
+				"p90": 24.456,
+				"p95": 24.46,
+				"p99": 24.463,
+				"avg": 24.424
 			},
 			"failed_requests": 0,
+			"successful_requests": 2,
+			"request_rate": 0.0468533332390157,
+			"total_tokens_sent": 16000,
 			"e2e_latency_ms": {
+				"p50": 21343.075,
+				"p60": 21391.438,
+				"p70": 21439.801,
+				"p80": 21488.164,
+				"p90": 21536.527,
+				"p95": 21560.709,
+				"p99": 21580.054,
+				"avg": 21343.075
 			}
 		},
 		{
 				"duration_secs": 120,
 				"rate": 1.0
 			},
+			"total_requests": 90,
+			"total_tokens": 55892,
+			"token_throughput_secs": 478.696852515677,
+			"duration_ms": 116758,
 			"time_to_first_token_ms": {
+				"p50": 118.856,
+				"p60": 124.707,
+				"p70": 131.654,
+				"p80": 135.562,
+				"p90": 145.529,
+				"p95": 150.366,
+				"p99": 715.649,
+				"avg": 128.611
 			},
 			"inter_token_latency_ms": {
+				"p50": 45.758,
+				"p60": 46.229,
+				"p70": 46.314,
+				"p80": 46.373,
+				"p90": 46.483,
+				"p95": 46.581,
+				"p99": 46.871,
+				"avg": 43.271
 			},
 			"failed_requests": 0,
+			"successful_requests": 90,
+			"request_rate": 0.7708208102485317,
+			"total_tokens_sent": 720000,
 			"e2e_latency_ms": {
+				"p50": 27887.256,
+				"p60": 30188.411,
+				"p70": 31661.903,
+				"p80": 35685.812,
+				"p90": 45661.636,
+				"p95": 50093.628,
+				"p99": 59727.184,
+				"avg": 27093.895
 			}
 		},
 		{
 				"duration_secs": 120,
 				"rate": 10.0
 			},
+			"total_requests": 97,
+			"total_tokens": 45779,
+			"token_throughput_secs": 385.8671945353039,
+			"duration_ms": 118639,
 			"time_to_first_token_ms": {
+				"p50": 264.625,
+				"p60": 314.639,
+				"p70": 341.786,
+				"p80": 416.021,
+				"p90": 502.604,
+				"p95": 608.336,
+				"p99": 712.908,
+				"avg": 278.878
 			},
 			"inter_token_latency_ms": {
+				"p50": 152.068,
+				"p60": 183.639,
+				"p70": 208.294,
+				"p80": 210.057,
+				"p90": 211.894,
+				"p95": 421.244,
+				"p99": 436.578,
+				"avg": 190.502
 			},
 			"failed_requests": 0,
+			"successful_requests": 97,
+			"request_rate": 0.8176045319890011,
+			"total_tokens_sent": 776000,
 			"e2e_latency_ms": {
+				"p50": 89809.719,
+				"p60": 90599.198,
+				"p70": 97086.861,
+				"p80": 97763.592,
+				"p90": 102705.608,
+				"p95": 105891.319,
+				"p99": 109209.372,
+				"avg": 80168.287
 			}
 		},
 		{
 				"duration_secs": 120,
 				"rate": 30.0
 			},
+			"total_requests": 108,
+			"total_tokens": 48755,
+			"token_throughput_secs": 408.5182278415837,
+			"duration_ms": 119345,
 			"time_to_first_token_ms": {
+				"p50": 315.639,
+				"p60": 364.113,
+				"p70": 440.936,
+				"p80": 517.15,
+				"p90": 635.496,
+				"p95": 743.467,
+				"p99": 886.077,
+				"avg": 348.945
 			},
 			"inter_token_latency_ms": {
+				"p50": 172.827,
+				"p60": 189.057,
+				"p70": 196.538,
+				"p80": 201.266,
+				"p90": 442.975,
+				"p95": 465.991,
+				"p99": 473.842,
+				"avg": 207.845
 			},
 			"failed_requests": 0,
+			"successful_requests": 108,
+			"request_rate": 0.9049321835071489,
+			"total_tokens_sent": 864000,
 			"e2e_latency_ms": {
+				"p50": 89868.756,
+				"p60": 96902.23,
+				"p70": 98937.333,
+				"p80": 102789.849,
+				"p90": 109541.9,
+				"p95": 111388.456,
+				"p99": 114281.927,
+				"avg": 82072.638
 			}
 		},
 		{
 				"duration_secs": 120,
 				"rate": 100.0
 			},
+			"total_requests": 125,
+			"total_tokens": 57918,
+			"token_throughput_secs": 485.359321343381,
+			"duration_ms": 119330,
 			"time_to_first_token_ms": {
+				"p50": 1154.434,
+				"p60": 1276.393,
+				"p70": 1440.368,
+				"p80": 1604.069,
+				"p90": 1768.54,
+				"p95": 1850.13,
+				"p99": 1919.678,
+				"avg": 1208.132
 			},
 			"inter_token_latency_ms": {
+				"p50": 166.875,
+				"p60": 166.884,
+				"p70": 167.245,
+				"p80": 188.28,
+				"p90": 350.172,
+				"p95": 417.485,
+				"p99": 437.566,
+				"avg": 186.06
 			},
 			"failed_requests": 0,
+			"successful_requests": 125,
+			"request_rate": 1.047513988188864,
+			"total_tokens_sent": 1000000,
 			"e2e_latency_ms": {
+				"p50": 82803.004,
+				"p60": 89976.229,
+				"p70": 90374.914,
+				"p80": 99727.225,
+				"p90": 108866.194,
+				"p95": 113444.528,
+				"p99": 116545.189,
+				"avg": 77917.015
 			}
 		}
 	],
+	"start_time": "2025-05-21T13:41:44.260015742+00:00",
+	"end_time": "2025-05-21T13:56:47.150683889+00:00",
 	"system": {
 		"cpu": [
 			"AMD Ryzen 7 9800X3D 8-Core Processor cpu0@4699MHz",

results/microsoft_phi-4_2025-05-21-12-47-52.json DELETED Viewed

@@ -1,296 +0,0 @@
-{
-	"config": {
-		"max_vus": 800,
-		"duration_secs": 120,
-		"benchmark_kind": "Rate",
-		"warmup_duration_secs": 30,
-		"rates": [
-			1.0,
-			10.0,
-			30.0,
-			100.0
-		],
-		"num_rates": 10,
-		"prompt_options": {
-			"num_tokens": 200,
-			"min_tokens": 180,
-			"max_tokens": 220,
-			"variance": 10
-		},
-		"decode_options": {
-			"num_tokens": 200,
-			"min_tokens": 180,
-			"max_tokens": 220,
-			"variance": 10
-		},
-		"tokenizer": "microsoft/phi-4",
-		"model_name": "phi-4",
-		"profile": null,
-		"meta": null,
-		"run_id": "Ollama: unsloth/phi-4-GGUF:Q8_0 (200 tokens)"
-	},
-	"results": [
-		{
-			"id": "warmup",
-			"executor_type": "ConstantVUs",
-			"config": {
-				"max_vus": 1,
-				"duration_secs": 30,
-				"rate": null
-			},
-			"total_requests": 17,
-			"total_tokens": 2560,
-			"token_throughput_secs": 81.92346820970964,
-			"duration_ms": 31248,
-			"time_to_first_token_ms": {
-				"p50": 48.023,
-				"p60": 48.316,
-				"p70": 48.704,
-				"p80": 49.172,
-				"p90": 50.133,
-				"p95": 79.141,
-				"p99": 171.884,
-				"avg": 56.904
-			},
-			"inter_token_latency_ms": {
-				"p50": 11.835,
-				"p60": 11.849,
-				"p70": 11.866,
-				"p80": 11.888,
-				"p90": 11.999,
-				"p95": 12.031,
-				"p99": 12.057,
-				"avg": 11.863
-			},
-			"failed_requests": 0,
-			"successful_requests": 17,
-			"request_rate": 0.5440230310801031,
-			"total_tokens_sent": 3400,
-			"e2e_latency_ms": {
-				"p50": 2193.161,
-				"p60": 2256.189,
-				"p70": 2409.636,
-				"p80": 2503.287,
-				"p90": 2558.373,
-				"p95": 2565.267,
-				"p99": 2582.093,
-				"avg": 1837.986
-			}
-		},
-		{
-			"id": "[email protected]/s",
-			"executor_type": "ConstantArrivalRate",
-			"config": {
-				"max_vus": 800,
-				"duration_secs": 120,
-				"rate": 1.0
-			},
-			"total_requests": 68,
-			"total_tokens": 13393,
-			"token_throughput_secs": 113.50678834081126,
-			"duration_ms": 117992,
-			"time_to_first_token_ms": {
-				"p50": 23628.355,
-				"p60": 28364.866,
-				"p70": 33468.314,
-				"p80": 37116.28,
-				"p90": 42197.075,
-				"p95": 44792.584,
-				"p99": 46808.871,
-				"avg": 23527.531
-			},
-			"inter_token_latency_ms": {
-				"p50": 17.148,
-				"p60": 17.164,
-				"p70": 17.183,
-				"p80": 17.199,
-				"p90": 17.22,
-				"p95": 17.235,
-				"p99": 17.256,
-				"avg": 17.123
-			},
-			"failed_requests": 0,
-			"successful_requests": 68,
-			"request_rate": 0.5763056527421164,
-			"total_tokens_sent": 13600,
-			"e2e_latency_ms": {
-				"p50": 26918.292,
-				"p60": 31837.746,
-				"p70": 36426.629,
-				"p80": 40565.391,
-				"p90": 45507.834,
-				"p95": 48259.487,
-				"p99": 50280.92,
-				"avg": 26884.974
-			}
-		},
-		{
-			"id": "[email protected]/s",
-			"executor_type": "ConstantArrivalRate",
-			"config": {
-				"max_vus": 800,
-				"duration_secs": 120,
-				"rate": 10.0
-			},
-			"total_requests": 69,
-			"total_tokens": 13411,
-			"token_throughput_secs": 112.91469560470007,
-			"duration_ms": 118771,
-			"time_to_first_token_ms": {
-				"p50": 54889.419,
-				"p60": 66226.724,
-				"p70": 77657.43,
-				"p80": 87194.269,
-				"p90": 97361.153,
-				"p95": 102660.303,
-				"p99": 106894.626,
-				"avg": 54527.075
-			},
-			"inter_token_latency_ms": {
-				"p50": 17.284,
-				"p60": 17.295,
-				"p70": 17.305,
-				"p80": 17.328,
-				"p90": 17.385,
-				"p95": 17.394,
-				"p99": 17.447,
-				"avg": 17.279
-			},
-			"failed_requests": 0,
-			"successful_requests": 69,
-			"request_rate": 0.5809495188072705,
-			"total_tokens_sent": 13800,
-			"e2e_latency_ms": {
-				"p50": 58021.804,
-				"p60": 69751.13,
-				"p70": 80116.293,
-				"p80": 90587.03,
-				"p90": 100535.513,
-				"p95": 105903.68,
-				"p99": 110535.65,
-				"avg": 57868.946
-			}
-		},
-		{
-			"id": "[email protected]/s",
-			"executor_type": "ConstantArrivalRate",
-			"config": {
-				"max_vus": 800,
-				"duration_secs": 120,
-				"rate": 30.0
-			},
-			"total_requests": 70,
-			"total_tokens": 13581,
-			"token_throughput_secs": 113.61611267427078,
-			"duration_ms": 119534,
-			"time_to_first_token_ms": {
-				"p50": 56313.526,
-				"p60": 68465.8,
-				"p70": 78580.113,
-				"p80": 90639.114,
-				"p90": 102040.301,
-				"p95": 108031.928,
-				"p99": 112499.04,
-				"avg": 56639.341
-			},
-			"inter_token_latency_ms": {
-				"p50": 17.172,
-				"p60": 17.182,
-				"p70": 17.217,
-				"p80": 17.235,
-				"p90": 17.256,
-				"p95": 17.31,
-				"p99": 17.346,
-				"avg": 17.18
-			},
-			"failed_requests": 0,
-			"successful_requests": 70,
-			"request_rate": 0.5856069425814708,
-			"total_tokens_sent": 14000,
-			"e2e_latency_ms": {
-				"p50": 59683.651,
-				"p60": 71746.875,
-				"p70": 81953.181,
-				"p80": 94277.653,
-				"p90": 105378.271,
-				"p95": 111453.36,
-				"p99": 115949.496,
-				"avg": 59958.385
-			}
-		},
-		{
-			"id": "[email protected]/s",
-			"executor_type": "ConstantArrivalRate",
-			"config": {
-				"max_vus": 800,
-				"duration_secs": 120,
-				"rate": 100.0
-			},
-			"total_requests": 70,
-			"total_tokens": 13359,
-			"token_throughput_secs": 114.42379660997986,
-			"duration_ms": 116750,
-			"time_to_first_token_ms": {
-				"p50": 57218.949,
-				"p60": 67960.841,
-				"p70": 79764.715,
-				"p80": 91579.471,
-				"p90": 102620.956,
-				"p95": 107961.016,
-				"p99": 112866.279,
-				"avg": 56772.876
-			},
-			"inter_token_latency_ms": {
-				"p50": 17.171,
-				"p60": 17.189,
-				"p70": 17.201,
-				"p80": 17.215,
-				"p90": 17.245,
-				"p95": 17.299,
-				"p99": 17.353,
-				"avg": 17.179
-			},
-			"failed_requests": 0,
-			"successful_requests": 70,
-			"request_rate": 0.5995707584922966,
-			"total_tokens_sent": 14000,
-			"e2e_latency_ms": {
-				"p50": 60551.916,
-				"p60": 71380.408,
-				"p70": 83198.203,
-				"p80": 93909.886,
-				"p90": 105788.774,
-				"p95": 111364.807,
-				"p99": 115968.729,
-				"avg": 60037.39
-			}
-		}
-	],
-	"start_time": "2025-05-21T12:32:04.299141299+00:00",
-	"end_time": "2025-05-21T12:47:52.695866821+00:00",
-	"system": {
-		"cpu": [
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu0@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu1@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu2@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu3@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu4@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu5@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu6@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu7@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu8@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu9@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu10@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu11@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu12@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu13@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu14@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu15@4699MHz"
-		],
-		"memory": "83.47 GB",
-		"os_name": "Debian GNU/Linux",
-		"os_version": "11",
-		"kernel": "5.15.167.4-microsoft-standard-WSL2",
-		"hostname": "computer"
-	}
-}

results/microsoft_phi-4_2025-05-21-13-17-26.json DELETED Viewed

@@ -1,296 +0,0 @@
-{
-	"config": {
-		"max_vus": 800,
-		"duration_secs": 120,
-		"benchmark_kind": "Rate",
-		"warmup_duration_secs": 30,
-		"rates": [
-			1.0,
-			10.0,
-			30.0,
-			100.0
-		],
-		"num_rates": 10,
-		"prompt_options": {
-			"num_tokens": 200,
-			"min_tokens": 180,
-			"max_tokens": 220,
-			"variance": 10
-		},
-		"decode_options": {
-			"num_tokens": 200,
-			"min_tokens": 180,
-			"max_tokens": 220,
-			"variance": 10
-		},
-		"tokenizer": "microsoft/phi-4",
-		"model_name": "phi-4",
-		"profile": null,
-		"meta": null,
-		"run_id": "LM Studio: lmstudio-community/phi-4-GGUF:Q8_0 (200 tokens)"
-	},
-	"results": [
-		{
-			"id": "warmup",
-			"executor_type": "ConstantVUs",
-			"config": {
-				"max_vus": 1,
-				"duration_secs": 30,
-				"rate": null
-			},
-			"total_requests": 13,
-			"total_tokens": 2610,
-			"token_throughput_secs": 83.60700961692694,
-			"duration_ms": 31217,
-			"time_to_first_token_ms": {
-				"p50": 90.517,
-				"p60": 93.25,
-				"p70": 102.443,
-				"p80": 109.227,
-				"p90": 130.959,
-				"p95": 207.294,
-				"p99": 293.629,
-				"avg": 108.58
-			},
-			"inter_token_latency_ms": {
-				"p50": 11.513,
-				"p60": 11.519,
-				"p70": 11.534,
-				"p80": 11.548,
-				"p90": 11.559,
-				"p95": 11.574,
-				"p99": 11.589,
-				"avg": 11.472
-			},
-			"failed_requests": 0,
-			"successful_requests": 13,
-			"request_rate": 0.41643338123373574,
-			"total_tokens_sent": 2600,
-			"e2e_latency_ms": {
-				"p50": 2419.372,
-				"p60": 2423.796,
-				"p70": 2432.426,
-				"p80": 2458.236,
-				"p90": 2525.006,
-				"p95": 2596.86,
-				"p99": 2667.757,
-				"avg": 2401.195
-			}
-		},
-		{
-			"id": "[email protected]/s",
-			"executor_type": "ConstantArrivalRate",
-			"config": {
-				"max_vus": 800,
-				"duration_secs": 120,
-				"rate": 1.0
-			},
-			"total_requests": 52,
-			"total_tokens": 9915,
-			"token_throughput_secs": 84.1224984364473,
-			"duration_ms": 117863,
-			"time_to_first_token_ms": {
-				"p50": 31149.018,
-				"p60": 38159.307,
-				"p70": 44798.95,
-				"p80": 51599.01,
-				"p90": 58334.517,
-				"p95": 61414.588,
-				"p99": 63915.289,
-				"avg": 32379.62
-			},
-			"inter_token_latency_ms": {
-				"p50": 11.473,
-				"p60": 11.501,
-				"p70": 11.517,
-				"p80": 11.529,
-				"p90": 11.563,
-				"p95": 11.598,
-				"p99": 11.775,
-				"avg": 11.254
-			},
-			"failed_requests": 0,
-			"successful_requests": 52,
-			"request_rate": 0.4411870820670963,
-			"total_tokens_sent": 10400,
-			"e2e_latency_ms": {
-				"p50": 33388.263,
-				"p60": 40395.415,
-				"p70": 47230.795,
-				"p80": 53979.194,
-				"p90": 60382.07,
-				"p95": 63519.032,
-				"p99": 66184.234,
-				"avg": 34556.301
-			}
-		},
-		{
-			"id": "[email protected]/s",
-			"executor_type": "ConstantArrivalRate",
-			"config": {
-				"max_vus": 800,
-				"duration_secs": 120,
-				"rate": 10.0
-			},
-			"total_requests": 51,
-			"total_tokens": 10041,
-			"token_throughput_secs": 84.04049965954646,
-			"duration_ms": 119478,
-			"time_to_first_token_ms": {
-				"p50": 55889.645,
-				"p60": 67098.347,
-				"p70": 78905.359,
-				"p80": 90289.182,
-				"p90": 101201.112,
-				"p95": 106805.272,
-				"p99": 111193.127,
-				"avg": 56139.066
-			},
-			"inter_token_latency_ms": {
-				"p50": 11.487,
-				"p60": 11.498,
-				"p70": 11.51,
-				"p80": 11.536,
-				"p90": 11.584,
-				"p95": 11.638,
-				"p99": 11.883,
-				"avg": 11.474
-			},
-			"failed_requests": 0,
-			"successful_requests": 51,
-			"request_rate": 0.4268564368725096,
-			"total_tokens_sent": 10200,
-			"e2e_latency_ms": {
-				"p50": 58084.912,
-				"p60": 69432.711,
-				"p70": 81080.254,
-				"p80": 92442.614,
-				"p90": 103527.041,
-				"p95": 108999.672,
-				"p99": 113397.637,
-				"avg": 58387.662
-			}
-		},
-		{
-			"id": "[email protected]/s",
-			"executor_type": "ConstantArrivalRate",
-			"config": {
-				"max_vus": 800,
-				"duration_secs": 120,
-				"rate": 30.0
-			},
-			"total_requests": 51,
-			"total_tokens": 9889,
-			"token_throughput_secs": 84.08188681268076,
-			"duration_ms": 117611,
-			"time_to_first_token_ms": {
-				"p50": 55982.506,
-				"p60": 68000.692,
-				"p70": 79600.152,
-				"p80": 91108.706,
-				"p90": 101995.453,
-				"p95": 107929.312,
-				"p99": 112340.212,
-				"avg": 56754.648
-			},
-			"inter_token_latency_ms": {
-				"p50": 11.503,
-				"p60": 11.515,
-				"p70": 11.531,
-				"p80": 11.564,
-				"p90": 11.589,
-				"p95": 11.633,
-				"p99": 11.795,
-				"avg": 11.477
-			},
-			"failed_requests": 0,
-			"successful_requests": 51,
-			"request_rate": 0.43363092602353315,
-			"total_tokens_sent": 10200,
-			"e2e_latency_ms": {
-				"p50": 58352.067,
-				"p60": 70321.743,
-				"p70": 81960.377,
-				"p80": 93288.338,
-				"p90": 104277.554,
-				"p95": 110084.734,
-				"p99": 114675.842,
-				"avg": 58969.412
-			}
-		},
-		{
-			"id": "[email protected]/s",
-			"executor_type": "ConstantArrivalRate",
-			"config": {
-				"max_vus": 800,
-				"duration_secs": 120,
-				"rate": 100.0
-			},
-			"total_requests": 57,
-			"total_tokens": 9983,
-			"token_throughput_secs": 83.83914212119033,
-			"duration_ms": 119073,
-			"time_to_first_token_ms": {
-				"p50": 60425.652,
-				"p60": 73426.16,
-				"p70": 83375.468,
-				"p80": 96034.495,
-				"p90": 104082.959,
-				"p95": 110616.366,
-				"p99": 114826.821,
-				"avg": 59050.64
-			},
-			"inter_token_latency_ms": {
-				"p50": 11.528,
-				"p60": 11.552,
-				"p70": 11.577,
-				"p80": 11.595,
-				"p90": 11.625,
-				"p95": 11.656,
-				"p99": 11.7,
-				"avg": 11.281
-			},
-			"failed_requests": 0,
-			"successful_requests": 57,
-			"request_rate": 0.4786968948119652,
-			"total_tokens_sent": 11400,
-			"e2e_latency_ms": {
-				"p50": 62519.008,
-				"p60": 74991.853,
-				"p70": 85562.76,
-				"p80": 96625.366,
-				"p90": 106351.421,
-				"p95": 112531.399,
-				"p99": 117196.304,
-				"avg": 61050.657
-			}
-		}
-	],
-	"start_time": "2025-05-21T13:01:17.074891817+00:00",
-	"end_time": "2025-05-21T13:17:26.396424745+00:00",
-	"system": {
-		"cpu": [
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu0@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu1@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu2@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu3@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu4@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu5@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu6@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu7@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu8@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu9@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu10@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu11@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu12@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu13@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu14@4699MHz",
-			"AMD Ryzen 7 9800X3D 8-Core Processor cpu15@4699MHz"
-		],
-		"memory": "83.47 GB",
-		"os_name": "Debian GNU/Linux",
-		"os_version": "11",
-		"kernel": "5.15.167.4-microsoft-standard-WSL2",
-		"hostname": "computer"
-	}
-}