Commit
ยท
3ccce1a
1
Parent(s):
c8d26ac
update
Browse files
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title: Inference
|
3 |
emoji: ๐
|
4 |
colorFrom: gray
|
5 |
colorTo: green
|
@@ -9,8 +9,6 @@ app_file: dashboard/app.py
|
|
9 |
pinned: false
|
10 |
license: mit
|
11 |
|
12 |
-
env:
|
13 |
-
DASHBOARD_FROM_RESULTS_DIR: results
|
14 |
---
|
15 |
|
16 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: Inference Benchmarking Results Phi-4 (3000 Tokens)
|
3 |
emoji: ๐
|
4 |
colorFrom: gray
|
5 |
colorTo: green
|
|
|
9 |
pinned: false
|
10 |
license: mit
|
11 |
|
|
|
|
|
12 |
---
|
13 |
|
14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
dashboard/app.py
CHANGED
@@ -3,14 +3,9 @@ from dataclasses import dataclass
|
|
3 |
from typing import List
|
4 |
|
5 |
import click
|
6 |
-
import os
|
7 |
import gradio as gr
|
8 |
import pandas as pd
|
9 |
|
10 |
-
import traceback
|
11 |
-
import glob
|
12 |
-
import json
|
13 |
-
|
14 |
from parse_results import build_results
|
15 |
|
16 |
|
@@ -21,22 +16,8 @@ class PlotConfig:
|
|
21 |
title: str
|
22 |
percentiles: List[float] = None
|
23 |
|
24 |
-
def check_file_exists(path, label=""):
|
25 |
-
if os.path.exists(path):
|
26 |
-
print(f"โ
{label} file exists: {path}")
|
27 |
-
print(f" File size: {os.path.getsize(path)} bytes")
|
28 |
-
print(f" Absolute path: {os.path.abspath(path)}")
|
29 |
-
else:
|
30 |
-
print(f"โ {label} file NOT found: {path}")
|
31 |
-
print(f" Current working directory: {os.getcwd()}")
|
32 |
-
print(f" Directory contents: {os.listdir(os.path.dirname(path) if os.path.dirname(path) else '.')}")
|
33 |
-
|
34 |
|
35 |
def run(from_results_dir, datasource, port):
|
36 |
-
print(f"๐ก Debug - from_results_dir: {from_results_dir}")
|
37 |
-
print(f"๐ก Debug - datasource: {datasource}")
|
38 |
-
print(f"๐ก Debug - current directory: {os.getcwd()}")
|
39 |
-
|
40 |
css = '''
|
41 |
.summary span {
|
42 |
font-size: 10px;
|
@@ -48,17 +29,17 @@ def run(from_results_dir, datasource, port):
|
|
48 |
summary_desc = '''
|
49 |
## Summary
|
50 |
This table shows the average of the metrics for each model and QPS rate.
|
51 |
-
|
52 |
The metrics are:
|
53 |
-
* Inter token latency: Time to generate a new output token for each user querying the system.
|
54 |
It translates as the โspeedโ perceived by the end-user. We aim for at least 300 words per minute (average reading speed), so ITL<150ms
|
55 |
-
* Time to First Token: Time the user has to wait before seeing the first token of its answer.
|
56 |
Lower waiting time are essential for real-time interactions, less so for offline workloads.
|
57 |
* End-to-end latency: The overall time the system took to generate the full response to the user.
|
58 |
* Throughput: The number of tokens per second the system can generate across all requests
|
59 |
* Successful requests: The number of requests the system was able to honor in the benchmark timeframe
|
60 |
-
* Error rate: The percentage of requests that ended up in error, as the system could not process them in time or failed to process them.
|
61 |
-
|
62 |
'''
|
63 |
|
64 |
df_bench = pd.DataFrame()
|
@@ -129,76 +110,17 @@ def run(from_results_dir, datasource, port):
|
|
129 |
return res
|
130 |
|
131 |
def load_datasource(datasource, fn):
|
132 |
-
print(f"๐ก Debug - load_datasource called with: {datasource}")
|
133 |
if datasource.startswith('file://'):
|
134 |
-
|
135 |
-
print(f"๐ก Debug - Extracted local path: {local_path}")
|
136 |
-
check_file_exists(local_path, "Local")
|
137 |
-
return fn(local_path)
|
138 |
elif datasource.startswith('s3://'):
|
139 |
return fn(datasource)
|
140 |
else:
|
141 |
-
|
142 |
-
print(f"๐ก Debug - Using path as-is: {datasource}")
|
143 |
-
check_file_exists(datasource, "Direct")
|
144 |
-
return fn(datasource)
|
145 |
-
|
146 |
-
parquet_file_to_load = None
|
147 |
|
148 |
if from_results_dir is not None:
|
149 |
-
|
150 |
-
# within that directory.
|
151 |
-
output_filename = 'benchmarks.parquet'
|
152 |
-
print(f"๐ก Debug - Building results from directory: {from_results_dir}")
|
153 |
-
|
154 |
-
# Check if results directory exists
|
155 |
-
check_file_exists(from_results_dir, "Results directory")
|
156 |
-
|
157 |
-
# Create absolute path for results directory
|
158 |
-
abs_results_dir = os.path.abspath(from_results_dir)
|
159 |
-
print(f"๐ก Debug - Absolute results directory: {abs_results_dir}")
|
160 |
-
|
161 |
-
# Create the results directory if it doesn't exist
|
162 |
-
if not os.path.exists(abs_results_dir):
|
163 |
-
print(f"๐ก Debug - Creating results directory: {abs_results_dir}")
|
164 |
-
os.makedirs(abs_results_dir, exist_ok=True)
|
165 |
-
|
166 |
-
# Call build_results with absolute paths
|
167 |
-
full_output_path = os.path.join(abs_results_dir, output_filename)
|
168 |
-
print(f"๐ก Debug - Expected output path: {full_output_path}")
|
169 |
-
|
170 |
-
build_results(abs_results_dir, output_filename, None)
|
171 |
-
|
172 |
-
# Check if the file was created
|
173 |
-
check_file_exists(full_output_path, "Generated parquet")
|
174 |
-
|
175 |
-
# The file to load is now in from_results_dir/output_filename
|
176 |
-
parquet_file_to_load = full_output_path
|
177 |
-
else:
|
178 |
-
# If not building from results_dir, use the provided datasource directly.
|
179 |
-
parquet_file_to_load = datasource
|
180 |
-
|
181 |
-
print(f"๐ก Debug - Final parquet_file_to_load: {parquet_file_to_load}")
|
182 |
-
|
183 |
# Load data
|
184 |
-
|
185 |
-
df_bench = load_datasource(parquet_file_to_load, load_bench_results)
|
186 |
-
print(f"โ
Successfully loaded data with {len(df_bench)} rows")
|
187 |
-
except Exception as e:
|
188 |
-
print(f"โ Error loading data: {str(e)}")
|
189 |
-
print(f"Stack trace: {traceback.format_exc()}")
|
190 |
-
# Create a minimal DataFrame to prevent further errors
|
191 |
-
df_bench = pd.DataFrame({
|
192 |
-
"model": ["error"],
|
193 |
-
"run_id": ["error"],
|
194 |
-
"rate": [0],
|
195 |
-
"inter_token_latency_ms_p90": [0],
|
196 |
-
"time_to_first_token_ms_p90": [0],
|
197 |
-
"e2e_latency_ms_p90": [0],
|
198 |
-
"token_throughput_secs": [0],
|
199 |
-
"successful_requests": [0],
|
200 |
-
"error_rate": [0]
|
201 |
-
})
|
202 |
|
203 |
# Define metrics
|
204 |
metrics = {
|
@@ -276,112 +198,11 @@ def run(from_results_dir, datasource, port):
|
|
276 |
|
277 |
|
278 |
@click.command()
|
279 |
-
@click.option('--from-results-dir',
|
280 |
-
@click.option('--datasource',
|
281 |
@click.option('--port', default=7860, help='Port to run the dashboard')
|
282 |
-
def main(
|
283 |
-
|
284 |
-
# print(f"Environment variables: {os.environ}") # Already in user's code or logs
|
285 |
-
|
286 |
-
# Determine the directory from which to process JSON results
|
287 |
-
# Priority: 1. CLI option, 2. Env Var, 3. Default to 'results' dir
|
288 |
-
processing_dir = cli_from_results_dir
|
289 |
-
|
290 |
-
if processing_dir is None:
|
291 |
-
env_var_value = os.environ.get('DASHBOARD_FROM_RESULTS_DIR')
|
292 |
-
if env_var_value:
|
293 |
-
print(f"Using environment variable DASHBOARD_FROM_RESULTS_DIR='{env_var_value}' for processing.")
|
294 |
-
processing_dir = env_var_value
|
295 |
-
elif os.path.exists('results') and os.path.isdir('results'):
|
296 |
-
print(f"No --from-results-dir option or DASHBOARD_FROM_RESULTS_DIR env var. Defaulting to 'results' directory for processing as it exists.")
|
297 |
-
processing_dir = 'results'
|
298 |
-
else:
|
299 |
-
print(f"No directory specified for processing (no --from-results-dir, no DASHBOARD_FROM_RESULTS_DIR env var, and 'results' dir not found).")
|
300 |
-
# processing_dir remains None
|
301 |
-
|
302 |
-
path_to_load_by_run_function = None # This will be the path to the .parquet file
|
303 |
-
|
304 |
-
if processing_dir:
|
305 |
-
# A directory for processing JSONs has been determined.
|
306 |
-
# Use the existing logic to build/fallback and generate benchmarks.parquet.
|
307 |
-
output_filename = 'benchmarks.parquet'
|
308 |
-
abs_processing_dir = os.path.abspath(processing_dir)
|
309 |
-
|
310 |
-
print(f"๐ก Debug - Will process JSONs from directory: {abs_processing_dir}")
|
311 |
-
check_file_exists(abs_processing_dir, "Source directory for JSONs")
|
312 |
-
|
313 |
-
# Ensure the directory exists (it might be 'results' or user-provided)
|
314 |
-
# build_results might expect the output directory to exist.
|
315 |
-
if not os.path.exists(abs_processing_dir):
|
316 |
-
print(f"๐ก Debug - Creating directory for processing/output: {abs_processing_dir}")
|
317 |
-
os.makedirs(abs_processing_dir, exist_ok=True)
|
318 |
-
|
319 |
-
# The generated parquet file will be placed inside the abs_processing_dir
|
320 |
-
generated_parquet_filepath = os.path.join(abs_processing_dir, output_filename)
|
321 |
-
print(f"๐ก Debug - Expected path for generated parquet file: {generated_parquet_filepath}")
|
322 |
-
|
323 |
-
try:
|
324 |
-
build_results(abs_processing_dir, output_filename, None) # output_filename is relative to abs_processing_dir
|
325 |
-
print("โ
Build results completed using build_results.")
|
326 |
-
except Exception as e_build:
|
327 |
-
print(f"โ Error in build_results: {str(e_build)}")
|
328 |
-
print(f"Stack trace: {traceback.format_exc()}")
|
329 |
-
print("โ ๏ธ Attempting fallback method: direct JSON processing")
|
330 |
-
try:
|
331 |
-
json_files = glob.glob(os.path.join(abs_processing_dir, "*.json"))
|
332 |
-
print(f"Found {len(json_files)} JSON files for fallback: {json_files}")
|
333 |
-
if not json_files:
|
334 |
-
raise FileNotFoundError("Fallback: No JSON files found in results directory")
|
335 |
-
|
336 |
-
combined_data = []
|
337 |
-
for json_file in json_files:
|
338 |
-
try:
|
339 |
-
with open(json_file, 'r') as f:
|
340 |
-
data = json.load(f)
|
341 |
-
filename = os.path.basename(json_file)
|
342 |
-
model_name_parts = filename.split('_')
|
343 |
-
model_name = f"{model_name_parts[0]}_{model_name_parts[1]}" if len(model_name_parts) > 1 else model_name_parts[0]
|
344 |
-
|
345 |
-
if 'benchmarks' in data:
|
346 |
-
for benchmark in data['benchmarks']:
|
347 |
-
benchmark['model'] = model_name
|
348 |
-
benchmark['run_id'] = os.path.splitext(filename)[0]
|
349 |
-
combined_data.append(benchmark)
|
350 |
-
else:
|
351 |
-
print(f"โ ๏ธ Fallback: No 'benchmarks' key in {json_file}")
|
352 |
-
except Exception as json_err:
|
353 |
-
print(f"โ Fallback: Error processing {json_file}: {str(json_err)}")
|
354 |
-
|
355 |
-
if combined_data:
|
356 |
-
df_direct = pd.DataFrame(combined_data)
|
357 |
-
df_direct.to_parquet(generated_parquet_filepath)
|
358 |
-
print(f"โ
Created parquet file via fallback method: {generated_parquet_filepath}")
|
359 |
-
else:
|
360 |
-
raise ValueError("Fallback: No data could be extracted from JSON files")
|
361 |
-
except Exception as e_fallback:
|
362 |
-
print(f"โ Fallback method failed: {str(e_fallback)}")
|
363 |
-
print(f"Stack trace: {traceback.format_exc()}")
|
364 |
-
|
365 |
-
# After attempting to build/generate, check if the file exists
|
366 |
-
check_file_exists(generated_parquet_filepath, "Parquet file after build/fallback attempts")
|
367 |
-
if os.path.exists(generated_parquet_filepath):
|
368 |
-
path_to_load_by_run_function = generated_parquet_filepath
|
369 |
-
else:
|
370 |
-
print(f"โ CRITICAL: Failed to generate or find parquet file at '{generated_parquet_filepath}' after all attempts.")
|
371 |
-
# path_to_load_by_run_function remains None here, will be handled below.
|
372 |
-
|
373 |
-
# If path_to_load_by_run_function is still None at this point
|
374 |
-
# (either because processing_dir was not set, or all generation attempts failed),
|
375 |
-
# default to the original cli_datasource.
|
376 |
-
if path_to_load_by_run_function is None:
|
377 |
-
print(f"โ ๏ธ Defaulting to cli_datasource '{cli_datasource}' as parquet generation failed or was skipped.")
|
378 |
-
path_to_load_by_run_function = cli_datasource
|
379 |
-
|
380 |
-
print(f"๐ก Final path to be loaded by run() function: '{path_to_load_by_run_function}'")
|
381 |
-
|
382 |
-
# Call run(). The first argument (from_results_dir for run()) is None because main handles processing.
|
383 |
-
# The second argument (datasource for run()) is the actual file path to load.
|
384 |
-
run(None, path_to_load_by_run_function, port)
|
385 |
|
386 |
|
387 |
if __name__ == '__main__':
|
|
|
3 |
from typing import List
|
4 |
|
5 |
import click
|
|
|
6 |
import gradio as gr
|
7 |
import pandas as pd
|
8 |
|
|
|
|
|
|
|
|
|
9 |
from parse_results import build_results
|
10 |
|
11 |
|
|
|
16 |
title: str
|
17 |
percentiles: List[float] = None
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
def run(from_results_dir, datasource, port):
|
|
|
|
|
|
|
|
|
21 |
css = '''
|
22 |
.summary span {
|
23 |
font-size: 10px;
|
|
|
29 |
summary_desc = '''
|
30 |
## Summary
|
31 |
This table shows the average of the metrics for each model and QPS rate.
|
32 |
+
|
33 |
The metrics are:
|
34 |
+
* Inter token latency: Time to generate a new output token for each user querying the system.
|
35 |
It translates as the โspeedโ perceived by the end-user. We aim for at least 300 words per minute (average reading speed), so ITL<150ms
|
36 |
+
* Time to First Token: Time the user has to wait before seeing the first token of its answer.
|
37 |
Lower waiting time are essential for real-time interactions, less so for offline workloads.
|
38 |
* End-to-end latency: The overall time the system took to generate the full response to the user.
|
39 |
* Throughput: The number of tokens per second the system can generate across all requests
|
40 |
* Successful requests: The number of requests the system was able to honor in the benchmark timeframe
|
41 |
+
* Error rate: The percentage of requests that ended up in error, as the system could not process them in time or failed to process them.
|
42 |
+
|
43 |
'''
|
44 |
|
45 |
df_bench = pd.DataFrame()
|
|
|
110 |
return res
|
111 |
|
112 |
def load_datasource(datasource, fn):
|
|
|
113 |
if datasource.startswith('file://'):
|
114 |
+
return fn(datasource)
|
|
|
|
|
|
|
115 |
elif datasource.startswith('s3://'):
|
116 |
return fn(datasource)
|
117 |
else:
|
118 |
+
raise ValueError(f"Unknown datasource: {datasource}")
|
|
|
|
|
|
|
|
|
|
|
119 |
|
120 |
if from_results_dir is not None:
|
121 |
+
build_results(from_results_dir, 'benchmarks.parquet', None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
# Load data
|
123 |
+
df_bench = load_datasource(datasource, load_bench_results)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
|
125 |
# Define metrics
|
126 |
metrics = {
|
|
|
198 |
|
199 |
|
200 |
@click.command()
|
201 |
+
@click.option('--from-results-dir', default=None, help='Load inference-benchmarker results from a directory')
|
202 |
+
@click.option('--datasource', default='file://benchmarks.parquet', help='Load a Parquet file already generated')
|
203 |
@click.option('--port', default=7860, help='Port to run the dashboard')
|
204 |
+
def main(from_results_dir, datasource, port):
|
205 |
+
run(from_results_dir, datasource, port)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
|
207 |
|
208 |
if __name__ == '__main__':
|
results/{RedHatAI_phi-4-FP8-dynamic_2025-05-21-09-15-05.json โ RedHatAI_phi-4-FP8-dynamic_2025-05-21-13-56-47.json}
RENAMED
@@ -12,22 +12,22 @@
|
|
12 |
],
|
13 |
"num_rates": 10,
|
14 |
"prompt_options": {
|
15 |
-
"num_tokens":
|
16 |
-
"min_tokens":
|
17 |
-
"max_tokens":
|
18 |
"variance": 10
|
19 |
},
|
20 |
"decode_options": {
|
21 |
-
"num_tokens":
|
22 |
-
"min_tokens":
|
23 |
-
"max_tokens":
|
24 |
"variance": 10
|
25 |
},
|
26 |
"tokenizer": "RedHatAI/phi-4-FP8-dynamic",
|
27 |
"model_name": "phi-4",
|
28 |
"profile": null,
|
29 |
"meta": null,
|
30 |
-
"run_id": "vLLM: RedHatAI/phi-4-FP8-dynamic (
|
31 |
},
|
32 |
"results": [
|
33 |
{
|
@@ -38,43 +38,43 @@
|
|
38 |
"duration_secs": 30,
|
39 |
"rate": null
|
40 |
},
|
41 |
-
"total_requests":
|
42 |
-
"total_tokens":
|
43 |
-
"token_throughput_secs":
|
44 |
-
"duration_ms":
|
45 |
"time_to_first_token_ms": {
|
46 |
-
"p50":
|
47 |
-
"p60":
|
48 |
-
"p70":
|
49 |
-
"p80":
|
50 |
-
"p90":
|
51 |
-
"p95":
|
52 |
-
"p99":
|
53 |
-
"avg":
|
54 |
},
|
55 |
"inter_token_latency_ms": {
|
56 |
-
"p50":
|
57 |
-
"p60":
|
58 |
-
"p70":
|
59 |
-
"p80":
|
60 |
-
"p90":
|
61 |
-
"p95":
|
62 |
-
"p99":
|
63 |
-
"avg":
|
64 |
},
|
65 |
"failed_requests": 0,
|
66 |
-
"successful_requests":
|
67 |
-
"request_rate": 0.
|
68 |
-
"total_tokens_sent":
|
69 |
"e2e_latency_ms": {
|
70 |
-
"p50":
|
71 |
-
"p60":
|
72 |
-
"p70":
|
73 |
-
"p80":
|
74 |
-
"p90":
|
75 |
-
"p95":
|
76 |
-
"p99":
|
77 |
-
"avg":
|
78 |
}
|
79 |
},
|
80 |
{
|
@@ -85,43 +85,43 @@
|
|
85 |
"duration_secs": 120,
|
86 |
"rate": 1.0
|
87 |
},
|
88 |
-
"total_requests":
|
89 |
-
"total_tokens":
|
90 |
-
"token_throughput_secs":
|
91 |
-
"duration_ms":
|
92 |
"time_to_first_token_ms": {
|
93 |
-
"p50":
|
94 |
-
"p60":
|
95 |
-
"p70":
|
96 |
-
"p80":
|
97 |
-
"p90":
|
98 |
-
"p95":
|
99 |
-
"p99":
|
100 |
-
"avg":
|
101 |
},
|
102 |
"inter_token_latency_ms": {
|
103 |
-
"p50":
|
104 |
-
"p60":
|
105 |
-
"p70":
|
106 |
-
"p80":
|
107 |
-
"p90":
|
108 |
-
"p95":
|
109 |
-
"p99":
|
110 |
-
"avg":
|
111 |
},
|
112 |
"failed_requests": 0,
|
113 |
-
"successful_requests":
|
114 |
-
"request_rate": 0.
|
115 |
-
"total_tokens_sent":
|
116 |
"e2e_latency_ms": {
|
117 |
-
"p50":
|
118 |
-
"p60":
|
119 |
-
"p70":
|
120 |
-
"p80":
|
121 |
-
"p90":
|
122 |
-
"p95":
|
123 |
-
"p99":
|
124 |
-
"avg":
|
125 |
}
|
126 |
},
|
127 |
{
|
@@ -132,43 +132,43 @@
|
|
132 |
"duration_secs": 120,
|
133 |
"rate": 10.0
|
134 |
},
|
135 |
-
"total_requests":
|
136 |
-
"total_tokens":
|
137 |
-
"token_throughput_secs":
|
138 |
-
"duration_ms":
|
139 |
"time_to_first_token_ms": {
|
140 |
-
"p50":
|
141 |
-
"p60":
|
142 |
-
"p70":
|
143 |
-
"p80":
|
144 |
-
"p90":
|
145 |
-
"p95":
|
146 |
-
"p99":
|
147 |
-
"avg":
|
148 |
},
|
149 |
"inter_token_latency_ms": {
|
150 |
-
"p50":
|
151 |
-
"p60":
|
152 |
-
"p70":
|
153 |
-
"p80":
|
154 |
-
"p90":
|
155 |
-
"p95":
|
156 |
-
"p99":
|
157 |
-
"avg":
|
158 |
},
|
159 |
"failed_requests": 0,
|
160 |
-
"successful_requests":
|
161 |
-
"request_rate":
|
162 |
-
"total_tokens_sent":
|
163 |
"e2e_latency_ms": {
|
164 |
-
"p50":
|
165 |
-
"p60":
|
166 |
-
"p70":
|
167 |
-
"p80":
|
168 |
-
"p90":
|
169 |
-
"p95":
|
170 |
-
"p99":
|
171 |
-
"avg":
|
172 |
}
|
173 |
},
|
174 |
{
|
@@ -179,43 +179,43 @@
|
|
179 |
"duration_secs": 120,
|
180 |
"rate": 30.0
|
181 |
},
|
182 |
-
"total_requests":
|
183 |
-
"total_tokens":
|
184 |
-
"token_throughput_secs":
|
185 |
-
"duration_ms":
|
186 |
"time_to_first_token_ms": {
|
187 |
-
"p50":
|
188 |
-
"p60":
|
189 |
-
"p70":
|
190 |
-
"p80":
|
191 |
-
"p90":
|
192 |
-
"p95":
|
193 |
-
"p99":
|
194 |
-
"avg":
|
195 |
},
|
196 |
"inter_token_latency_ms": {
|
197 |
-
"p50":
|
198 |
-
"p60":
|
199 |
-
"p70":
|
200 |
-
"p80":
|
201 |
-
"p90":
|
202 |
-
"p95":
|
203 |
-
"p99":
|
204 |
-
"avg":
|
205 |
},
|
206 |
"failed_requests": 0,
|
207 |
-
"successful_requests":
|
208 |
-
"request_rate":
|
209 |
-
"total_tokens_sent":
|
210 |
"e2e_latency_ms": {
|
211 |
-
"p50":
|
212 |
-
"p60":
|
213 |
-
"p70":
|
214 |
-
"p80":
|
215 |
-
"p90":
|
216 |
-
"p95":
|
217 |
-
"p99":
|
218 |
-
"avg":
|
219 |
}
|
220 |
},
|
221 |
{
|
@@ -226,48 +226,48 @@
|
|
226 |
"duration_secs": 120,
|
227 |
"rate": 100.0
|
228 |
},
|
229 |
-
"total_requests":
|
230 |
-
"total_tokens":
|
231 |
-
"token_throughput_secs":
|
232 |
-
"duration_ms":
|
233 |
"time_to_first_token_ms": {
|
234 |
-
"p50":
|
235 |
-
"p60":
|
236 |
-
"p70":
|
237 |
-
"p80":
|
238 |
-
"p90":
|
239 |
-
"p95":
|
240 |
-
"p99":
|
241 |
-
"avg":
|
242 |
},
|
243 |
"inter_token_latency_ms": {
|
244 |
-
"p50":
|
245 |
-
"p60":
|
246 |
-
"p70":
|
247 |
-
"p80":
|
248 |
-
"p90":
|
249 |
-
"p95":
|
250 |
-
"p99":
|
251 |
-
"avg":
|
252 |
},
|
253 |
"failed_requests": 0,
|
254 |
-
"successful_requests":
|
255 |
-
"request_rate":
|
256 |
-
"total_tokens_sent":
|
257 |
"e2e_latency_ms": {
|
258 |
-
"p50":
|
259 |
-
"p60":
|
260 |
-
"p70":
|
261 |
-
"p80":
|
262 |
-
"p90":
|
263 |
-
"p95":
|
264 |
-
"p99":
|
265 |
-
"avg":
|
266 |
}
|
267 |
}
|
268 |
],
|
269 |
-
"start_time": "2025-05-
|
270 |
-
"end_time": "2025-05-
|
271 |
"system": {
|
272 |
"cpu": [
|
273 |
"AMD Ryzen 7 9800X3D 8-Core Processor cpu0@4699MHz",
|
|
|
12 |
],
|
13 |
"num_rates": 10,
|
14 |
"prompt_options": {
|
15 |
+
"num_tokens": 8000,
|
16 |
+
"min_tokens": 7980,
|
17 |
+
"max_tokens": 8020,
|
18 |
"variance": 10
|
19 |
},
|
20 |
"decode_options": {
|
21 |
+
"num_tokens": 8000,
|
22 |
+
"min_tokens": 7980,
|
23 |
+
"max_tokens": 8020,
|
24 |
"variance": 10
|
25 |
},
|
26 |
"tokenizer": "RedHatAI/phi-4-FP8-dynamic",
|
27 |
"model_name": "phi-4",
|
28 |
"profile": null,
|
29 |
"meta": null,
|
30 |
+
"run_id": "vLLM: RedHatAI/phi-4-FP8-dynamic (8000 tokens)"
|
31 |
},
|
32 |
"results": [
|
33 |
{
|
|
|
38 |
"duration_secs": 30,
|
39 |
"rate": null
|
40 |
},
|
41 |
+
"total_requests": 2,
|
42 |
+
"total_tokens": 1643,
|
43 |
+
"token_throughput_secs": 38.490013255851395,
|
44 |
+
"duration_ms": 42686,
|
45 |
"time_to_first_token_ms": {
|
46 |
+
"p50": 1276.801,
|
47 |
+
"p60": 1388.913,
|
48 |
+
"p70": 1501.026,
|
49 |
+
"p80": 1613.139,
|
50 |
+
"p90": 1725.252,
|
51 |
+
"p95": 1781.309,
|
52 |
+
"p99": 1826.154,
|
53 |
+
"avg": 1276.801
|
54 |
},
|
55 |
"inter_token_latency_ms": {
|
56 |
+
"p50": 24.424,
|
57 |
+
"p60": 24.432,
|
58 |
+
"p70": 24.44,
|
59 |
+
"p80": 24.448,
|
60 |
+
"p90": 24.456,
|
61 |
+
"p95": 24.46,
|
62 |
+
"p99": 24.463,
|
63 |
+
"avg": 24.424
|
64 |
},
|
65 |
"failed_requests": 0,
|
66 |
+
"successful_requests": 2,
|
67 |
+
"request_rate": 0.0468533332390157,
|
68 |
+
"total_tokens_sent": 16000,
|
69 |
"e2e_latency_ms": {
|
70 |
+
"p50": 21343.075,
|
71 |
+
"p60": 21391.438,
|
72 |
+
"p70": 21439.801,
|
73 |
+
"p80": 21488.164,
|
74 |
+
"p90": 21536.527,
|
75 |
+
"p95": 21560.709,
|
76 |
+
"p99": 21580.054,
|
77 |
+
"avg": 21343.075
|
78 |
}
|
79 |
},
|
80 |
{
|
|
|
85 |
"duration_secs": 120,
|
86 |
"rate": 1.0
|
87 |
},
|
88 |
+
"total_requests": 90,
|
89 |
+
"total_tokens": 55892,
|
90 |
+
"token_throughput_secs": 478.696852515677,
|
91 |
+
"duration_ms": 116758,
|
92 |
"time_to_first_token_ms": {
|
93 |
+
"p50": 118.856,
|
94 |
+
"p60": 124.707,
|
95 |
+
"p70": 131.654,
|
96 |
+
"p80": 135.562,
|
97 |
+
"p90": 145.529,
|
98 |
+
"p95": 150.366,
|
99 |
+
"p99": 715.649,
|
100 |
+
"avg": 128.611
|
101 |
},
|
102 |
"inter_token_latency_ms": {
|
103 |
+
"p50": 45.758,
|
104 |
+
"p60": 46.229,
|
105 |
+
"p70": 46.314,
|
106 |
+
"p80": 46.373,
|
107 |
+
"p90": 46.483,
|
108 |
+
"p95": 46.581,
|
109 |
+
"p99": 46.871,
|
110 |
+
"avg": 43.271
|
111 |
},
|
112 |
"failed_requests": 0,
|
113 |
+
"successful_requests": 90,
|
114 |
+
"request_rate": 0.7708208102485317,
|
115 |
+
"total_tokens_sent": 720000,
|
116 |
"e2e_latency_ms": {
|
117 |
+
"p50": 27887.256,
|
118 |
+
"p60": 30188.411,
|
119 |
+
"p70": 31661.903,
|
120 |
+
"p80": 35685.812,
|
121 |
+
"p90": 45661.636,
|
122 |
+
"p95": 50093.628,
|
123 |
+
"p99": 59727.184,
|
124 |
+
"avg": 27093.895
|
125 |
}
|
126 |
},
|
127 |
{
|
|
|
132 |
"duration_secs": 120,
|
133 |
"rate": 10.0
|
134 |
},
|
135 |
+
"total_requests": 97,
|
136 |
+
"total_tokens": 45779,
|
137 |
+
"token_throughput_secs": 385.8671945353039,
|
138 |
+
"duration_ms": 118639,
|
139 |
"time_to_first_token_ms": {
|
140 |
+
"p50": 264.625,
|
141 |
+
"p60": 314.639,
|
142 |
+
"p70": 341.786,
|
143 |
+
"p80": 416.021,
|
144 |
+
"p90": 502.604,
|
145 |
+
"p95": 608.336,
|
146 |
+
"p99": 712.908,
|
147 |
+
"avg": 278.878
|
148 |
},
|
149 |
"inter_token_latency_ms": {
|
150 |
+
"p50": 152.068,
|
151 |
+
"p60": 183.639,
|
152 |
+
"p70": 208.294,
|
153 |
+
"p80": 210.057,
|
154 |
+
"p90": 211.894,
|
155 |
+
"p95": 421.244,
|
156 |
+
"p99": 436.578,
|
157 |
+
"avg": 190.502
|
158 |
},
|
159 |
"failed_requests": 0,
|
160 |
+
"successful_requests": 97,
|
161 |
+
"request_rate": 0.8176045319890011,
|
162 |
+
"total_tokens_sent": 776000,
|
163 |
"e2e_latency_ms": {
|
164 |
+
"p50": 89809.719,
|
165 |
+
"p60": 90599.198,
|
166 |
+
"p70": 97086.861,
|
167 |
+
"p80": 97763.592,
|
168 |
+
"p90": 102705.608,
|
169 |
+
"p95": 105891.319,
|
170 |
+
"p99": 109209.372,
|
171 |
+
"avg": 80168.287
|
172 |
}
|
173 |
},
|
174 |
{
|
|
|
179 |
"duration_secs": 120,
|
180 |
"rate": 30.0
|
181 |
},
|
182 |
+
"total_requests": 108,
|
183 |
+
"total_tokens": 48755,
|
184 |
+
"token_throughput_secs": 408.5182278415837,
|
185 |
+
"duration_ms": 119345,
|
186 |
"time_to_first_token_ms": {
|
187 |
+
"p50": 315.639,
|
188 |
+
"p60": 364.113,
|
189 |
+
"p70": 440.936,
|
190 |
+
"p80": 517.15,
|
191 |
+
"p90": 635.496,
|
192 |
+
"p95": 743.467,
|
193 |
+
"p99": 886.077,
|
194 |
+
"avg": 348.945
|
195 |
},
|
196 |
"inter_token_latency_ms": {
|
197 |
+
"p50": 172.827,
|
198 |
+
"p60": 189.057,
|
199 |
+
"p70": 196.538,
|
200 |
+
"p80": 201.266,
|
201 |
+
"p90": 442.975,
|
202 |
+
"p95": 465.991,
|
203 |
+
"p99": 473.842,
|
204 |
+
"avg": 207.845
|
205 |
},
|
206 |
"failed_requests": 0,
|
207 |
+
"successful_requests": 108,
|
208 |
+
"request_rate": 0.9049321835071489,
|
209 |
+
"total_tokens_sent": 864000,
|
210 |
"e2e_latency_ms": {
|
211 |
+
"p50": 89868.756,
|
212 |
+
"p60": 96902.23,
|
213 |
+
"p70": 98937.333,
|
214 |
+
"p80": 102789.849,
|
215 |
+
"p90": 109541.9,
|
216 |
+
"p95": 111388.456,
|
217 |
+
"p99": 114281.927,
|
218 |
+
"avg": 82072.638
|
219 |
}
|
220 |
},
|
221 |
{
|
|
|
226 |
"duration_secs": 120,
|
227 |
"rate": 100.0
|
228 |
},
|
229 |
+
"total_requests": 125,
|
230 |
+
"total_tokens": 57918,
|
231 |
+
"token_throughput_secs": 485.359321343381,
|
232 |
+
"duration_ms": 119330,
|
233 |
"time_to_first_token_ms": {
|
234 |
+
"p50": 1154.434,
|
235 |
+
"p60": 1276.393,
|
236 |
+
"p70": 1440.368,
|
237 |
+
"p80": 1604.069,
|
238 |
+
"p90": 1768.54,
|
239 |
+
"p95": 1850.13,
|
240 |
+
"p99": 1919.678,
|
241 |
+
"avg": 1208.132
|
242 |
},
|
243 |
"inter_token_latency_ms": {
|
244 |
+
"p50": 166.875,
|
245 |
+
"p60": 166.884,
|
246 |
+
"p70": 167.245,
|
247 |
+
"p80": 188.28,
|
248 |
+
"p90": 350.172,
|
249 |
+
"p95": 417.485,
|
250 |
+
"p99": 437.566,
|
251 |
+
"avg": 186.06
|
252 |
},
|
253 |
"failed_requests": 0,
|
254 |
+
"successful_requests": 125,
|
255 |
+
"request_rate": 1.047513988188864,
|
256 |
+
"total_tokens_sent": 1000000,
|
257 |
"e2e_latency_ms": {
|
258 |
+
"p50": 82803.004,
|
259 |
+
"p60": 89976.229,
|
260 |
+
"p70": 90374.914,
|
261 |
+
"p80": 99727.225,
|
262 |
+
"p90": 108866.194,
|
263 |
+
"p95": 113444.528,
|
264 |
+
"p99": 116545.189,
|
265 |
+
"avg": 77917.015
|
266 |
}
|
267 |
}
|
268 |
],
|
269 |
+
"start_time": "2025-05-21T13:41:44.260015742+00:00",
|
270 |
+
"end_time": "2025-05-21T13:56:47.150683889+00:00",
|
271 |
"system": {
|
272 |
"cpu": [
|
273 |
"AMD Ryzen 7 9800X3D 8-Core Processor cpu0@4699MHz",
|
results/microsoft_phi-4_2025-05-21-12-47-52.json
DELETED
@@ -1,296 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"config": {
|
3 |
-
"max_vus": 800,
|
4 |
-
"duration_secs": 120,
|
5 |
-
"benchmark_kind": "Rate",
|
6 |
-
"warmup_duration_secs": 30,
|
7 |
-
"rates": [
|
8 |
-
1.0,
|
9 |
-
10.0,
|
10 |
-
30.0,
|
11 |
-
100.0
|
12 |
-
],
|
13 |
-
"num_rates": 10,
|
14 |
-
"prompt_options": {
|
15 |
-
"num_tokens": 200,
|
16 |
-
"min_tokens": 180,
|
17 |
-
"max_tokens": 220,
|
18 |
-
"variance": 10
|
19 |
-
},
|
20 |
-
"decode_options": {
|
21 |
-
"num_tokens": 200,
|
22 |
-
"min_tokens": 180,
|
23 |
-
"max_tokens": 220,
|
24 |
-
"variance": 10
|
25 |
-
},
|
26 |
-
"tokenizer": "microsoft/phi-4",
|
27 |
-
"model_name": "phi-4",
|
28 |
-
"profile": null,
|
29 |
-
"meta": null,
|
30 |
-
"run_id": "Ollama: unsloth/phi-4-GGUF:Q8_0 (200 tokens)"
|
31 |
-
},
|
32 |
-
"results": [
|
33 |
-
{
|
34 |
-
"id": "warmup",
|
35 |
-
"executor_type": "ConstantVUs",
|
36 |
-
"config": {
|
37 |
-
"max_vus": 1,
|
38 |
-
"duration_secs": 30,
|
39 |
-
"rate": null
|
40 |
-
},
|
41 |
-
"total_requests": 17,
|
42 |
-
"total_tokens": 2560,
|
43 |
-
"token_throughput_secs": 81.92346820970964,
|
44 |
-
"duration_ms": 31248,
|
45 |
-
"time_to_first_token_ms": {
|
46 |
-
"p50": 48.023,
|
47 |
-
"p60": 48.316,
|
48 |
-
"p70": 48.704,
|
49 |
-
"p80": 49.172,
|
50 |
-
"p90": 50.133,
|
51 |
-
"p95": 79.141,
|
52 |
-
"p99": 171.884,
|
53 |
-
"avg": 56.904
|
54 |
-
},
|
55 |
-
"inter_token_latency_ms": {
|
56 |
-
"p50": 11.835,
|
57 |
-
"p60": 11.849,
|
58 |
-
"p70": 11.866,
|
59 |
-
"p80": 11.888,
|
60 |
-
"p90": 11.999,
|
61 |
-
"p95": 12.031,
|
62 |
-
"p99": 12.057,
|
63 |
-
"avg": 11.863
|
64 |
-
},
|
65 |
-
"failed_requests": 0,
|
66 |
-
"successful_requests": 17,
|
67 |
-
"request_rate": 0.5440230310801031,
|
68 |
-
"total_tokens_sent": 3400,
|
69 |
-
"e2e_latency_ms": {
|
70 |
-
"p50": 2193.161,
|
71 |
-
"p60": 2256.189,
|
72 |
-
"p70": 2409.636,
|
73 |
-
"p80": 2503.287,
|
74 |
-
"p90": 2558.373,
|
75 |
-
"p95": 2565.267,
|
76 |
-
"p99": 2582.093,
|
77 |
-
"avg": 1837.986
|
78 |
-
}
|
79 |
-
},
|
80 |
-
{
|
81 |
-
"id": "[email protected]/s",
|
82 |
-
"executor_type": "ConstantArrivalRate",
|
83 |
-
"config": {
|
84 |
-
"max_vus": 800,
|
85 |
-
"duration_secs": 120,
|
86 |
-
"rate": 1.0
|
87 |
-
},
|
88 |
-
"total_requests": 68,
|
89 |
-
"total_tokens": 13393,
|
90 |
-
"token_throughput_secs": 113.50678834081126,
|
91 |
-
"duration_ms": 117992,
|
92 |
-
"time_to_first_token_ms": {
|
93 |
-
"p50": 23628.355,
|
94 |
-
"p60": 28364.866,
|
95 |
-
"p70": 33468.314,
|
96 |
-
"p80": 37116.28,
|
97 |
-
"p90": 42197.075,
|
98 |
-
"p95": 44792.584,
|
99 |
-
"p99": 46808.871,
|
100 |
-
"avg": 23527.531
|
101 |
-
},
|
102 |
-
"inter_token_latency_ms": {
|
103 |
-
"p50": 17.148,
|
104 |
-
"p60": 17.164,
|
105 |
-
"p70": 17.183,
|
106 |
-
"p80": 17.199,
|
107 |
-
"p90": 17.22,
|
108 |
-
"p95": 17.235,
|
109 |
-
"p99": 17.256,
|
110 |
-
"avg": 17.123
|
111 |
-
},
|
112 |
-
"failed_requests": 0,
|
113 |
-
"successful_requests": 68,
|
114 |
-
"request_rate": 0.5763056527421164,
|
115 |
-
"total_tokens_sent": 13600,
|
116 |
-
"e2e_latency_ms": {
|
117 |
-
"p50": 26918.292,
|
118 |
-
"p60": 31837.746,
|
119 |
-
"p70": 36426.629,
|
120 |
-
"p80": 40565.391,
|
121 |
-
"p90": 45507.834,
|
122 |
-
"p95": 48259.487,
|
123 |
-
"p99": 50280.92,
|
124 |
-
"avg": 26884.974
|
125 |
-
}
|
126 |
-
},
|
127 |
-
{
|
128 |
-
"id": "[email protected]/s",
|
129 |
-
"executor_type": "ConstantArrivalRate",
|
130 |
-
"config": {
|
131 |
-
"max_vus": 800,
|
132 |
-
"duration_secs": 120,
|
133 |
-
"rate": 10.0
|
134 |
-
},
|
135 |
-
"total_requests": 69,
|
136 |
-
"total_tokens": 13411,
|
137 |
-
"token_throughput_secs": 112.91469560470007,
|
138 |
-
"duration_ms": 118771,
|
139 |
-
"time_to_first_token_ms": {
|
140 |
-
"p50": 54889.419,
|
141 |
-
"p60": 66226.724,
|
142 |
-
"p70": 77657.43,
|
143 |
-
"p80": 87194.269,
|
144 |
-
"p90": 97361.153,
|
145 |
-
"p95": 102660.303,
|
146 |
-
"p99": 106894.626,
|
147 |
-
"avg": 54527.075
|
148 |
-
},
|
149 |
-
"inter_token_latency_ms": {
|
150 |
-
"p50": 17.284,
|
151 |
-
"p60": 17.295,
|
152 |
-
"p70": 17.305,
|
153 |
-
"p80": 17.328,
|
154 |
-
"p90": 17.385,
|
155 |
-
"p95": 17.394,
|
156 |
-
"p99": 17.447,
|
157 |
-
"avg": 17.279
|
158 |
-
},
|
159 |
-
"failed_requests": 0,
|
160 |
-
"successful_requests": 69,
|
161 |
-
"request_rate": 0.5809495188072705,
|
162 |
-
"total_tokens_sent": 13800,
|
163 |
-
"e2e_latency_ms": {
|
164 |
-
"p50": 58021.804,
|
165 |
-
"p60": 69751.13,
|
166 |
-
"p70": 80116.293,
|
167 |
-
"p80": 90587.03,
|
168 |
-
"p90": 100535.513,
|
169 |
-
"p95": 105903.68,
|
170 |
-
"p99": 110535.65,
|
171 |
-
"avg": 57868.946
|
172 |
-
}
|
173 |
-
},
|
174 |
-
{
|
175 |
-
"id": "[email protected]/s",
|
176 |
-
"executor_type": "ConstantArrivalRate",
|
177 |
-
"config": {
|
178 |
-
"max_vus": 800,
|
179 |
-
"duration_secs": 120,
|
180 |
-
"rate": 30.0
|
181 |
-
},
|
182 |
-
"total_requests": 70,
|
183 |
-
"total_tokens": 13581,
|
184 |
-
"token_throughput_secs": 113.61611267427078,
|
185 |
-
"duration_ms": 119534,
|
186 |
-
"time_to_first_token_ms": {
|
187 |
-
"p50": 56313.526,
|
188 |
-
"p60": 68465.8,
|
189 |
-
"p70": 78580.113,
|
190 |
-
"p80": 90639.114,
|
191 |
-
"p90": 102040.301,
|
192 |
-
"p95": 108031.928,
|
193 |
-
"p99": 112499.04,
|
194 |
-
"avg": 56639.341
|
195 |
-
},
|
196 |
-
"inter_token_latency_ms": {
|
197 |
-
"p50": 17.172,
|
198 |
-
"p60": 17.182,
|
199 |
-
"p70": 17.217,
|
200 |
-
"p80": 17.235,
|
201 |
-
"p90": 17.256,
|
202 |
-
"p95": 17.31,
|
203 |
-
"p99": 17.346,
|
204 |
-
"avg": 17.18
|
205 |
-
},
|
206 |
-
"failed_requests": 0,
|
207 |
-
"successful_requests": 70,
|
208 |
-
"request_rate": 0.5856069425814708,
|
209 |
-
"total_tokens_sent": 14000,
|
210 |
-
"e2e_latency_ms": {
|
211 |
-
"p50": 59683.651,
|
212 |
-
"p60": 71746.875,
|
213 |
-
"p70": 81953.181,
|
214 |
-
"p80": 94277.653,
|
215 |
-
"p90": 105378.271,
|
216 |
-
"p95": 111453.36,
|
217 |
-
"p99": 115949.496,
|
218 |
-
"avg": 59958.385
|
219 |
-
}
|
220 |
-
},
|
221 |
-
{
|
222 |
-
"id": "[email protected]/s",
|
223 |
-
"executor_type": "ConstantArrivalRate",
|
224 |
-
"config": {
|
225 |
-
"max_vus": 800,
|
226 |
-
"duration_secs": 120,
|
227 |
-
"rate": 100.0
|
228 |
-
},
|
229 |
-
"total_requests": 70,
|
230 |
-
"total_tokens": 13359,
|
231 |
-
"token_throughput_secs": 114.42379660997986,
|
232 |
-
"duration_ms": 116750,
|
233 |
-
"time_to_first_token_ms": {
|
234 |
-
"p50": 57218.949,
|
235 |
-
"p60": 67960.841,
|
236 |
-
"p70": 79764.715,
|
237 |
-
"p80": 91579.471,
|
238 |
-
"p90": 102620.956,
|
239 |
-
"p95": 107961.016,
|
240 |
-
"p99": 112866.279,
|
241 |
-
"avg": 56772.876
|
242 |
-
},
|
243 |
-
"inter_token_latency_ms": {
|
244 |
-
"p50": 17.171,
|
245 |
-
"p60": 17.189,
|
246 |
-
"p70": 17.201,
|
247 |
-
"p80": 17.215,
|
248 |
-
"p90": 17.245,
|
249 |
-
"p95": 17.299,
|
250 |
-
"p99": 17.353,
|
251 |
-
"avg": 17.179
|
252 |
-
},
|
253 |
-
"failed_requests": 0,
|
254 |
-
"successful_requests": 70,
|
255 |
-
"request_rate": 0.5995707584922966,
|
256 |
-
"total_tokens_sent": 14000,
|
257 |
-
"e2e_latency_ms": {
|
258 |
-
"p50": 60551.916,
|
259 |
-
"p60": 71380.408,
|
260 |
-
"p70": 83198.203,
|
261 |
-
"p80": 93909.886,
|
262 |
-
"p90": 105788.774,
|
263 |
-
"p95": 111364.807,
|
264 |
-
"p99": 115968.729,
|
265 |
-
"avg": 60037.39
|
266 |
-
}
|
267 |
-
}
|
268 |
-
],
|
269 |
-
"start_time": "2025-05-21T12:32:04.299141299+00:00",
|
270 |
-
"end_time": "2025-05-21T12:47:52.695866821+00:00",
|
271 |
-
"system": {
|
272 |
-
"cpu": [
|
273 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu0@4699MHz",
|
274 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu1@4699MHz",
|
275 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu2@4699MHz",
|
276 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu3@4699MHz",
|
277 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu4@4699MHz",
|
278 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu5@4699MHz",
|
279 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu6@4699MHz",
|
280 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu7@4699MHz",
|
281 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu8@4699MHz",
|
282 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu9@4699MHz",
|
283 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu10@4699MHz",
|
284 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu11@4699MHz",
|
285 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu12@4699MHz",
|
286 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu13@4699MHz",
|
287 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu14@4699MHz",
|
288 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu15@4699MHz"
|
289 |
-
],
|
290 |
-
"memory": "83.47 GB",
|
291 |
-
"os_name": "Debian GNU/Linux",
|
292 |
-
"os_version": "11",
|
293 |
-
"kernel": "5.15.167.4-microsoft-standard-WSL2",
|
294 |
-
"hostname": "computer"
|
295 |
-
}
|
296 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
results/microsoft_phi-4_2025-05-21-13-17-26.json
DELETED
@@ -1,296 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"config": {
|
3 |
-
"max_vus": 800,
|
4 |
-
"duration_secs": 120,
|
5 |
-
"benchmark_kind": "Rate",
|
6 |
-
"warmup_duration_secs": 30,
|
7 |
-
"rates": [
|
8 |
-
1.0,
|
9 |
-
10.0,
|
10 |
-
30.0,
|
11 |
-
100.0
|
12 |
-
],
|
13 |
-
"num_rates": 10,
|
14 |
-
"prompt_options": {
|
15 |
-
"num_tokens": 200,
|
16 |
-
"min_tokens": 180,
|
17 |
-
"max_tokens": 220,
|
18 |
-
"variance": 10
|
19 |
-
},
|
20 |
-
"decode_options": {
|
21 |
-
"num_tokens": 200,
|
22 |
-
"min_tokens": 180,
|
23 |
-
"max_tokens": 220,
|
24 |
-
"variance": 10
|
25 |
-
},
|
26 |
-
"tokenizer": "microsoft/phi-4",
|
27 |
-
"model_name": "phi-4",
|
28 |
-
"profile": null,
|
29 |
-
"meta": null,
|
30 |
-
"run_id": "LM Studio: lmstudio-community/phi-4-GGUF:Q8_0 (200 tokens)"
|
31 |
-
},
|
32 |
-
"results": [
|
33 |
-
{
|
34 |
-
"id": "warmup",
|
35 |
-
"executor_type": "ConstantVUs",
|
36 |
-
"config": {
|
37 |
-
"max_vus": 1,
|
38 |
-
"duration_secs": 30,
|
39 |
-
"rate": null
|
40 |
-
},
|
41 |
-
"total_requests": 13,
|
42 |
-
"total_tokens": 2610,
|
43 |
-
"token_throughput_secs": 83.60700961692694,
|
44 |
-
"duration_ms": 31217,
|
45 |
-
"time_to_first_token_ms": {
|
46 |
-
"p50": 90.517,
|
47 |
-
"p60": 93.25,
|
48 |
-
"p70": 102.443,
|
49 |
-
"p80": 109.227,
|
50 |
-
"p90": 130.959,
|
51 |
-
"p95": 207.294,
|
52 |
-
"p99": 293.629,
|
53 |
-
"avg": 108.58
|
54 |
-
},
|
55 |
-
"inter_token_latency_ms": {
|
56 |
-
"p50": 11.513,
|
57 |
-
"p60": 11.519,
|
58 |
-
"p70": 11.534,
|
59 |
-
"p80": 11.548,
|
60 |
-
"p90": 11.559,
|
61 |
-
"p95": 11.574,
|
62 |
-
"p99": 11.589,
|
63 |
-
"avg": 11.472
|
64 |
-
},
|
65 |
-
"failed_requests": 0,
|
66 |
-
"successful_requests": 13,
|
67 |
-
"request_rate": 0.41643338123373574,
|
68 |
-
"total_tokens_sent": 2600,
|
69 |
-
"e2e_latency_ms": {
|
70 |
-
"p50": 2419.372,
|
71 |
-
"p60": 2423.796,
|
72 |
-
"p70": 2432.426,
|
73 |
-
"p80": 2458.236,
|
74 |
-
"p90": 2525.006,
|
75 |
-
"p95": 2596.86,
|
76 |
-
"p99": 2667.757,
|
77 |
-
"avg": 2401.195
|
78 |
-
}
|
79 |
-
},
|
80 |
-
{
|
81 |
-
"id": "[email protected]/s",
|
82 |
-
"executor_type": "ConstantArrivalRate",
|
83 |
-
"config": {
|
84 |
-
"max_vus": 800,
|
85 |
-
"duration_secs": 120,
|
86 |
-
"rate": 1.0
|
87 |
-
},
|
88 |
-
"total_requests": 52,
|
89 |
-
"total_tokens": 9915,
|
90 |
-
"token_throughput_secs": 84.1224984364473,
|
91 |
-
"duration_ms": 117863,
|
92 |
-
"time_to_first_token_ms": {
|
93 |
-
"p50": 31149.018,
|
94 |
-
"p60": 38159.307,
|
95 |
-
"p70": 44798.95,
|
96 |
-
"p80": 51599.01,
|
97 |
-
"p90": 58334.517,
|
98 |
-
"p95": 61414.588,
|
99 |
-
"p99": 63915.289,
|
100 |
-
"avg": 32379.62
|
101 |
-
},
|
102 |
-
"inter_token_latency_ms": {
|
103 |
-
"p50": 11.473,
|
104 |
-
"p60": 11.501,
|
105 |
-
"p70": 11.517,
|
106 |
-
"p80": 11.529,
|
107 |
-
"p90": 11.563,
|
108 |
-
"p95": 11.598,
|
109 |
-
"p99": 11.775,
|
110 |
-
"avg": 11.254
|
111 |
-
},
|
112 |
-
"failed_requests": 0,
|
113 |
-
"successful_requests": 52,
|
114 |
-
"request_rate": 0.4411870820670963,
|
115 |
-
"total_tokens_sent": 10400,
|
116 |
-
"e2e_latency_ms": {
|
117 |
-
"p50": 33388.263,
|
118 |
-
"p60": 40395.415,
|
119 |
-
"p70": 47230.795,
|
120 |
-
"p80": 53979.194,
|
121 |
-
"p90": 60382.07,
|
122 |
-
"p95": 63519.032,
|
123 |
-
"p99": 66184.234,
|
124 |
-
"avg": 34556.301
|
125 |
-
}
|
126 |
-
},
|
127 |
-
{
|
128 |
-
"id": "[email protected]/s",
|
129 |
-
"executor_type": "ConstantArrivalRate",
|
130 |
-
"config": {
|
131 |
-
"max_vus": 800,
|
132 |
-
"duration_secs": 120,
|
133 |
-
"rate": 10.0
|
134 |
-
},
|
135 |
-
"total_requests": 51,
|
136 |
-
"total_tokens": 10041,
|
137 |
-
"token_throughput_secs": 84.04049965954646,
|
138 |
-
"duration_ms": 119478,
|
139 |
-
"time_to_first_token_ms": {
|
140 |
-
"p50": 55889.645,
|
141 |
-
"p60": 67098.347,
|
142 |
-
"p70": 78905.359,
|
143 |
-
"p80": 90289.182,
|
144 |
-
"p90": 101201.112,
|
145 |
-
"p95": 106805.272,
|
146 |
-
"p99": 111193.127,
|
147 |
-
"avg": 56139.066
|
148 |
-
},
|
149 |
-
"inter_token_latency_ms": {
|
150 |
-
"p50": 11.487,
|
151 |
-
"p60": 11.498,
|
152 |
-
"p70": 11.51,
|
153 |
-
"p80": 11.536,
|
154 |
-
"p90": 11.584,
|
155 |
-
"p95": 11.638,
|
156 |
-
"p99": 11.883,
|
157 |
-
"avg": 11.474
|
158 |
-
},
|
159 |
-
"failed_requests": 0,
|
160 |
-
"successful_requests": 51,
|
161 |
-
"request_rate": 0.4268564368725096,
|
162 |
-
"total_tokens_sent": 10200,
|
163 |
-
"e2e_latency_ms": {
|
164 |
-
"p50": 58084.912,
|
165 |
-
"p60": 69432.711,
|
166 |
-
"p70": 81080.254,
|
167 |
-
"p80": 92442.614,
|
168 |
-
"p90": 103527.041,
|
169 |
-
"p95": 108999.672,
|
170 |
-
"p99": 113397.637,
|
171 |
-
"avg": 58387.662
|
172 |
-
}
|
173 |
-
},
|
174 |
-
{
|
175 |
-
"id": "[email protected]/s",
|
176 |
-
"executor_type": "ConstantArrivalRate",
|
177 |
-
"config": {
|
178 |
-
"max_vus": 800,
|
179 |
-
"duration_secs": 120,
|
180 |
-
"rate": 30.0
|
181 |
-
},
|
182 |
-
"total_requests": 51,
|
183 |
-
"total_tokens": 9889,
|
184 |
-
"token_throughput_secs": 84.08188681268076,
|
185 |
-
"duration_ms": 117611,
|
186 |
-
"time_to_first_token_ms": {
|
187 |
-
"p50": 55982.506,
|
188 |
-
"p60": 68000.692,
|
189 |
-
"p70": 79600.152,
|
190 |
-
"p80": 91108.706,
|
191 |
-
"p90": 101995.453,
|
192 |
-
"p95": 107929.312,
|
193 |
-
"p99": 112340.212,
|
194 |
-
"avg": 56754.648
|
195 |
-
},
|
196 |
-
"inter_token_latency_ms": {
|
197 |
-
"p50": 11.503,
|
198 |
-
"p60": 11.515,
|
199 |
-
"p70": 11.531,
|
200 |
-
"p80": 11.564,
|
201 |
-
"p90": 11.589,
|
202 |
-
"p95": 11.633,
|
203 |
-
"p99": 11.795,
|
204 |
-
"avg": 11.477
|
205 |
-
},
|
206 |
-
"failed_requests": 0,
|
207 |
-
"successful_requests": 51,
|
208 |
-
"request_rate": 0.43363092602353315,
|
209 |
-
"total_tokens_sent": 10200,
|
210 |
-
"e2e_latency_ms": {
|
211 |
-
"p50": 58352.067,
|
212 |
-
"p60": 70321.743,
|
213 |
-
"p70": 81960.377,
|
214 |
-
"p80": 93288.338,
|
215 |
-
"p90": 104277.554,
|
216 |
-
"p95": 110084.734,
|
217 |
-
"p99": 114675.842,
|
218 |
-
"avg": 58969.412
|
219 |
-
}
|
220 |
-
},
|
221 |
-
{
|
222 |
-
"id": "[email protected]/s",
|
223 |
-
"executor_type": "ConstantArrivalRate",
|
224 |
-
"config": {
|
225 |
-
"max_vus": 800,
|
226 |
-
"duration_secs": 120,
|
227 |
-
"rate": 100.0
|
228 |
-
},
|
229 |
-
"total_requests": 57,
|
230 |
-
"total_tokens": 9983,
|
231 |
-
"token_throughput_secs": 83.83914212119033,
|
232 |
-
"duration_ms": 119073,
|
233 |
-
"time_to_first_token_ms": {
|
234 |
-
"p50": 60425.652,
|
235 |
-
"p60": 73426.16,
|
236 |
-
"p70": 83375.468,
|
237 |
-
"p80": 96034.495,
|
238 |
-
"p90": 104082.959,
|
239 |
-
"p95": 110616.366,
|
240 |
-
"p99": 114826.821,
|
241 |
-
"avg": 59050.64
|
242 |
-
},
|
243 |
-
"inter_token_latency_ms": {
|
244 |
-
"p50": 11.528,
|
245 |
-
"p60": 11.552,
|
246 |
-
"p70": 11.577,
|
247 |
-
"p80": 11.595,
|
248 |
-
"p90": 11.625,
|
249 |
-
"p95": 11.656,
|
250 |
-
"p99": 11.7,
|
251 |
-
"avg": 11.281
|
252 |
-
},
|
253 |
-
"failed_requests": 0,
|
254 |
-
"successful_requests": 57,
|
255 |
-
"request_rate": 0.4786968948119652,
|
256 |
-
"total_tokens_sent": 11400,
|
257 |
-
"e2e_latency_ms": {
|
258 |
-
"p50": 62519.008,
|
259 |
-
"p60": 74991.853,
|
260 |
-
"p70": 85562.76,
|
261 |
-
"p80": 96625.366,
|
262 |
-
"p90": 106351.421,
|
263 |
-
"p95": 112531.399,
|
264 |
-
"p99": 117196.304,
|
265 |
-
"avg": 61050.657
|
266 |
-
}
|
267 |
-
}
|
268 |
-
],
|
269 |
-
"start_time": "2025-05-21T13:01:17.074891817+00:00",
|
270 |
-
"end_time": "2025-05-21T13:17:26.396424745+00:00",
|
271 |
-
"system": {
|
272 |
-
"cpu": [
|
273 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu0@4699MHz",
|
274 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu1@4699MHz",
|
275 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu2@4699MHz",
|
276 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu3@4699MHz",
|
277 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu4@4699MHz",
|
278 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu5@4699MHz",
|
279 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu6@4699MHz",
|
280 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu7@4699MHz",
|
281 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu8@4699MHz",
|
282 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu9@4699MHz",
|
283 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu10@4699MHz",
|
284 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu11@4699MHz",
|
285 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu12@4699MHz",
|
286 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu13@4699MHz",
|
287 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu14@4699MHz",
|
288 |
-
"AMD Ryzen 7 9800X3D 8-Core Processor cpu15@4699MHz"
|
289 |
-
],
|
290 |
-
"memory": "83.47 GB",
|
291 |
-
"os_name": "Debian GNU/Linux",
|
292 |
-
"os_version": "11",
|
293 |
-
"kernel": "5.15.167.4-microsoft-standard-WSL2",
|
294 |
-
"hostname": "computer"
|
295 |
-
}
|
296 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|