loghugging25 commited on
Commit
3ccce1a
ยท
1 Parent(s): c8d26ac
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Inference Fp8 Results
3
  emoji: ๐Ÿ“Š
4
  colorFrom: gray
5
  colorTo: green
@@ -9,8 +9,6 @@ app_file: dashboard/app.py
9
  pinned: false
10
  license: mit
11
 
12
- env:
13
- DASHBOARD_FROM_RESULTS_DIR: results
14
  ---
15
 
16
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Inference Benchmarking Results Phi-4 (3000 Tokens)
3
  emoji: ๐Ÿ“Š
4
  colorFrom: gray
5
  colorTo: green
 
9
  pinned: false
10
  license: mit
11
 
 
 
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
dashboard/app.py CHANGED
@@ -3,14 +3,9 @@ from dataclasses import dataclass
3
  from typing import List
4
 
5
  import click
6
- import os
7
  import gradio as gr
8
  import pandas as pd
9
 
10
- import traceback
11
- import glob
12
- import json
13
-
14
  from parse_results import build_results
15
 
16
 
@@ -21,22 +16,8 @@ class PlotConfig:
21
  title: str
22
  percentiles: List[float] = None
23
 
24
- def check_file_exists(path, label=""):
25
- if os.path.exists(path):
26
- print(f"โœ… {label} file exists: {path}")
27
- print(f" File size: {os.path.getsize(path)} bytes")
28
- print(f" Absolute path: {os.path.abspath(path)}")
29
- else:
30
- print(f"โŒ {label} file NOT found: {path}")
31
- print(f" Current working directory: {os.getcwd()}")
32
- print(f" Directory contents: {os.listdir(os.path.dirname(path) if os.path.dirname(path) else '.')}")
33
-
34
 
35
  def run(from_results_dir, datasource, port):
36
- print(f"๐Ÿ’ก Debug - from_results_dir: {from_results_dir}")
37
- print(f"๐Ÿ’ก Debug - datasource: {datasource}")
38
- print(f"๐Ÿ’ก Debug - current directory: {os.getcwd()}")
39
-
40
  css = '''
41
  .summary span {
42
  font-size: 10px;
@@ -48,17 +29,17 @@ def run(from_results_dir, datasource, port):
48
  summary_desc = '''
49
  ## Summary
50
  This table shows the average of the metrics for each model and QPS rate.
51
-
52
  The metrics are:
53
- * Inter token latency: Time to generate a new output token for each user querying the system.
54
  It translates as the โ€œspeedโ€ perceived by the end-user. We aim for at least 300 words per minute (average reading speed), so ITL<150ms
55
- * Time to First Token: Time the user has to wait before seeing the first token of its answer.
56
  Lower waiting time are essential for real-time interactions, less so for offline workloads.
57
  * End-to-end latency: The overall time the system took to generate the full response to the user.
58
  * Throughput: The number of tokens per second the system can generate across all requests
59
  * Successful requests: The number of requests the system was able to honor in the benchmark timeframe
60
- * Error rate: The percentage of requests that ended up in error, as the system could not process them in time or failed to process them.
61
-
62
  '''
63
 
64
  df_bench = pd.DataFrame()
@@ -129,76 +110,17 @@ def run(from_results_dir, datasource, port):
129
  return res
130
 
131
  def load_datasource(datasource, fn):
132
- print(f"๐Ÿ’ก Debug - load_datasource called with: {datasource}")
133
  if datasource.startswith('file://'):
134
- local_path = datasource[len('file://'):]
135
- print(f"๐Ÿ’ก Debug - Extracted local path: {local_path}")
136
- check_file_exists(local_path, "Local")
137
- return fn(local_path)
138
  elif datasource.startswith('s3://'):
139
  return fn(datasource)
140
  else:
141
- # If no scheme is provided, assume it's a local path.
142
- print(f"๐Ÿ’ก Debug - Using path as-is: {datasource}")
143
- check_file_exists(datasource, "Direct")
144
- return fn(datasource)
145
-
146
- parquet_file_to_load = None
147
 
148
  if from_results_dir is not None:
149
- # If from_results_dir is specified, results are built into 'benchmarks.parquet'
150
- # within that directory.
151
- output_filename = 'benchmarks.parquet'
152
- print(f"๐Ÿ’ก Debug - Building results from directory: {from_results_dir}")
153
-
154
- # Check if results directory exists
155
- check_file_exists(from_results_dir, "Results directory")
156
-
157
- # Create absolute path for results directory
158
- abs_results_dir = os.path.abspath(from_results_dir)
159
- print(f"๐Ÿ’ก Debug - Absolute results directory: {abs_results_dir}")
160
-
161
- # Create the results directory if it doesn't exist
162
- if not os.path.exists(abs_results_dir):
163
- print(f"๐Ÿ’ก Debug - Creating results directory: {abs_results_dir}")
164
- os.makedirs(abs_results_dir, exist_ok=True)
165
-
166
- # Call build_results with absolute paths
167
- full_output_path = os.path.join(abs_results_dir, output_filename)
168
- print(f"๐Ÿ’ก Debug - Expected output path: {full_output_path}")
169
-
170
- build_results(abs_results_dir, output_filename, None)
171
-
172
- # Check if the file was created
173
- check_file_exists(full_output_path, "Generated parquet")
174
-
175
- # The file to load is now in from_results_dir/output_filename
176
- parquet_file_to_load = full_output_path
177
- else:
178
- # If not building from results_dir, use the provided datasource directly.
179
- parquet_file_to_load = datasource
180
-
181
- print(f"๐Ÿ’ก Debug - Final parquet_file_to_load: {parquet_file_to_load}")
182
-
183
  # Load data
184
- try:
185
- df_bench = load_datasource(parquet_file_to_load, load_bench_results)
186
- print(f"โœ… Successfully loaded data with {len(df_bench)} rows")
187
- except Exception as e:
188
- print(f"โŒ Error loading data: {str(e)}")
189
- print(f"Stack trace: {traceback.format_exc()}")
190
- # Create a minimal DataFrame to prevent further errors
191
- df_bench = pd.DataFrame({
192
- "model": ["error"],
193
- "run_id": ["error"],
194
- "rate": [0],
195
- "inter_token_latency_ms_p90": [0],
196
- "time_to_first_token_ms_p90": [0],
197
- "e2e_latency_ms_p90": [0],
198
- "token_throughput_secs": [0],
199
- "successful_requests": [0],
200
- "error_rate": [0]
201
- })
202
 
203
  # Define metrics
204
  metrics = {
@@ -276,112 +198,11 @@ def run(from_results_dir, datasource, port):
276
 
277
 
278
  @click.command()
279
- @click.option('--from-results-dir', 'cli_from_results_dir', default=None, help='Load inference-benchmarker results from this directory. Overrides DASHBOARD_FROM_RESULTS_DIR.')
280
- @click.option('--datasource', 'cli_datasource', default='file://benchmarks.parquet', help='Load this Parquet file directly if not building from a results directory.')
281
  @click.option('--port', default=7860, help='Port to run the dashboard')
282
- def main(cli_from_results_dir, cli_datasource, port):
283
- print("===== Starting Application =====")
284
- # print(f"Environment variables: {os.environ}") # Already in user's code or logs
285
-
286
- # Determine the directory from which to process JSON results
287
- # Priority: 1. CLI option, 2. Env Var, 3. Default to 'results' dir
288
- processing_dir = cli_from_results_dir
289
-
290
- if processing_dir is None:
291
- env_var_value = os.environ.get('DASHBOARD_FROM_RESULTS_DIR')
292
- if env_var_value:
293
- print(f"Using environment variable DASHBOARD_FROM_RESULTS_DIR='{env_var_value}' for processing.")
294
- processing_dir = env_var_value
295
- elif os.path.exists('results') and os.path.isdir('results'):
296
- print(f"No --from-results-dir option or DASHBOARD_FROM_RESULTS_DIR env var. Defaulting to 'results' directory for processing as it exists.")
297
- processing_dir = 'results'
298
- else:
299
- print(f"No directory specified for processing (no --from-results-dir, no DASHBOARD_FROM_RESULTS_DIR env var, and 'results' dir not found).")
300
- # processing_dir remains None
301
-
302
- path_to_load_by_run_function = None # This will be the path to the .parquet file
303
-
304
- if processing_dir:
305
- # A directory for processing JSONs has been determined.
306
- # Use the existing logic to build/fallback and generate benchmarks.parquet.
307
- output_filename = 'benchmarks.parquet'
308
- abs_processing_dir = os.path.abspath(processing_dir)
309
-
310
- print(f"๐Ÿ’ก Debug - Will process JSONs from directory: {abs_processing_dir}")
311
- check_file_exists(abs_processing_dir, "Source directory for JSONs")
312
-
313
- # Ensure the directory exists (it might be 'results' or user-provided)
314
- # build_results might expect the output directory to exist.
315
- if not os.path.exists(abs_processing_dir):
316
- print(f"๐Ÿ’ก Debug - Creating directory for processing/output: {abs_processing_dir}")
317
- os.makedirs(abs_processing_dir, exist_ok=True)
318
-
319
- # The generated parquet file will be placed inside the abs_processing_dir
320
- generated_parquet_filepath = os.path.join(abs_processing_dir, output_filename)
321
- print(f"๐Ÿ’ก Debug - Expected path for generated parquet file: {generated_parquet_filepath}")
322
-
323
- try:
324
- build_results(abs_processing_dir, output_filename, None) # output_filename is relative to abs_processing_dir
325
- print("โœ… Build results completed using build_results.")
326
- except Exception as e_build:
327
- print(f"โŒ Error in build_results: {str(e_build)}")
328
- print(f"Stack trace: {traceback.format_exc()}")
329
- print("โš ๏ธ Attempting fallback method: direct JSON processing")
330
- try:
331
- json_files = glob.glob(os.path.join(abs_processing_dir, "*.json"))
332
- print(f"Found {len(json_files)} JSON files for fallback: {json_files}")
333
- if not json_files:
334
- raise FileNotFoundError("Fallback: No JSON files found in results directory")
335
-
336
- combined_data = []
337
- for json_file in json_files:
338
- try:
339
- with open(json_file, 'r') as f:
340
- data = json.load(f)
341
- filename = os.path.basename(json_file)
342
- model_name_parts = filename.split('_')
343
- model_name = f"{model_name_parts[0]}_{model_name_parts[1]}" if len(model_name_parts) > 1 else model_name_parts[0]
344
-
345
- if 'benchmarks' in data:
346
- for benchmark in data['benchmarks']:
347
- benchmark['model'] = model_name
348
- benchmark['run_id'] = os.path.splitext(filename)[0]
349
- combined_data.append(benchmark)
350
- else:
351
- print(f"โš ๏ธ Fallback: No 'benchmarks' key in {json_file}")
352
- except Exception as json_err:
353
- print(f"โŒ Fallback: Error processing {json_file}: {str(json_err)}")
354
-
355
- if combined_data:
356
- df_direct = pd.DataFrame(combined_data)
357
- df_direct.to_parquet(generated_parquet_filepath)
358
- print(f"โœ… Created parquet file via fallback method: {generated_parquet_filepath}")
359
- else:
360
- raise ValueError("Fallback: No data could be extracted from JSON files")
361
- except Exception as e_fallback:
362
- print(f"โŒ Fallback method failed: {str(e_fallback)}")
363
- print(f"Stack trace: {traceback.format_exc()}")
364
-
365
- # After attempting to build/generate, check if the file exists
366
- check_file_exists(generated_parquet_filepath, "Parquet file after build/fallback attempts")
367
- if os.path.exists(generated_parquet_filepath):
368
- path_to_load_by_run_function = generated_parquet_filepath
369
- else:
370
- print(f"โŒ CRITICAL: Failed to generate or find parquet file at '{generated_parquet_filepath}' after all attempts.")
371
- # path_to_load_by_run_function remains None here, will be handled below.
372
-
373
- # If path_to_load_by_run_function is still None at this point
374
- # (either because processing_dir was not set, or all generation attempts failed),
375
- # default to the original cli_datasource.
376
- if path_to_load_by_run_function is None:
377
- print(f"โš ๏ธ Defaulting to cli_datasource '{cli_datasource}' as parquet generation failed or was skipped.")
378
- path_to_load_by_run_function = cli_datasource
379
-
380
- print(f"๐Ÿ’ก Final path to be loaded by run() function: '{path_to_load_by_run_function}'")
381
-
382
- # Call run(). The first argument (from_results_dir for run()) is None because main handles processing.
383
- # The second argument (datasource for run()) is the actual file path to load.
384
- run(None, path_to_load_by_run_function, port)
385
 
386
 
387
  if __name__ == '__main__':
 
3
  from typing import List
4
 
5
  import click
 
6
  import gradio as gr
7
  import pandas as pd
8
 
 
 
 
 
9
  from parse_results import build_results
10
 
11
 
 
16
  title: str
17
  percentiles: List[float] = None
18
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  def run(from_results_dir, datasource, port):
 
 
 
 
21
  css = '''
22
  .summary span {
23
  font-size: 10px;
 
29
  summary_desc = '''
30
  ## Summary
31
  This table shows the average of the metrics for each model and QPS rate.
32
+
33
  The metrics are:
34
+ * Inter token latency: Time to generate a new output token for each user querying the system.
35
  It translates as the โ€œspeedโ€ perceived by the end-user. We aim for at least 300 words per minute (average reading speed), so ITL<150ms
36
+ * Time to First Token: Time the user has to wait before seeing the first token of its answer.
37
  Lower waiting time are essential for real-time interactions, less so for offline workloads.
38
  * End-to-end latency: The overall time the system took to generate the full response to the user.
39
  * Throughput: The number of tokens per second the system can generate across all requests
40
  * Successful requests: The number of requests the system was able to honor in the benchmark timeframe
41
+ * Error rate: The percentage of requests that ended up in error, as the system could not process them in time or failed to process them.
42
+
43
  '''
44
 
45
  df_bench = pd.DataFrame()
 
110
  return res
111
 
112
  def load_datasource(datasource, fn):
 
113
  if datasource.startswith('file://'):
114
+ return fn(datasource)
 
 
 
115
  elif datasource.startswith('s3://'):
116
  return fn(datasource)
117
  else:
118
+ raise ValueError(f"Unknown datasource: {datasource}")
 
 
 
 
 
119
 
120
  if from_results_dir is not None:
121
+ build_results(from_results_dir, 'benchmarks.parquet', None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  # Load data
123
+ df_bench = load_datasource(datasource, load_bench_results)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
  # Define metrics
126
  metrics = {
 
198
 
199
 
200
  @click.command()
201
+ @click.option('--from-results-dir', default=None, help='Load inference-benchmarker results from a directory')
202
+ @click.option('--datasource', default='file://benchmarks.parquet', help='Load a Parquet file already generated')
203
  @click.option('--port', default=7860, help='Port to run the dashboard')
204
+ def main(from_results_dir, datasource, port):
205
+ run(from_results_dir, datasource, port)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
 
208
  if __name__ == '__main__':
results/{RedHatAI_phi-4-FP8-dynamic_2025-05-21-09-15-05.json โ†’ RedHatAI_phi-4-FP8-dynamic_2025-05-21-13-56-47.json} RENAMED
@@ -12,22 +12,22 @@
12
  ],
13
  "num_rates": 10,
14
  "prompt_options": {
15
- "num_tokens": 200,
16
- "min_tokens": 180,
17
- "max_tokens": 220,
18
  "variance": 10
19
  },
20
  "decode_options": {
21
- "num_tokens": 200,
22
- "min_tokens": 180,
23
- "max_tokens": 220,
24
  "variance": 10
25
  },
26
  "tokenizer": "RedHatAI/phi-4-FP8-dynamic",
27
  "model_name": "phi-4",
28
  "profile": null,
29
  "meta": null,
30
- "run_id": "vLLM: RedHatAI/phi-4-FP8-dynamic (200 tokens)"
31
  },
32
  "results": [
33
  {
@@ -38,43 +38,43 @@
38
  "duration_secs": 30,
39
  "rate": null
40
  },
41
- "total_requests": 7,
42
- "total_tokens": 1401,
43
- "token_throughput_secs": 41.207311909734074,
44
- "duration_ms": 33998,
45
  "time_to_first_token_ms": {
46
- "p50": 30.74,
47
- "p60": 30.848,
48
- "p70": 31.032,
49
- "p80": 31.367,
50
- "p90": 600.369,
51
- "p95": 1027.036,
52
- "p99": 1368.37,
53
- "avg": 233.964
54
  },
55
  "inter_token_latency_ms": {
56
- "p50": 23.217,
57
- "p60": 23.222,
58
- "p70": 23.228,
59
- "p80": 23.236,
60
- "p90": 23.248,
61
- "p95": 23.254,
62
- "p99": 23.26,
63
- "avg": 23.213
64
  },
65
  "failed_requests": 0,
66
- "successful_requests": 7,
67
- "request_rate": 0.2058894956232252,
68
- "total_tokens_sent": 1400,
69
  "e2e_latency_ms": {
70
- "p50": 4743.409,
71
- "p60": 4751.971,
72
- "p70": 4775.205,
73
- "p80": 4827.785,
74
- "p90": 5318.839,
75
- "p95": 5673.985,
76
- "p99": 5958.102,
77
- "avg": 4856.823
78
  }
79
  },
80
  {
@@ -85,43 +85,43 @@
85
  "duration_secs": 120,
86
  "rate": 1.0
87
  },
88
- "total_requests": 115,
89
- "total_tokens": 22163,
90
- "token_throughput_secs": 186.64991064360598,
91
- "duration_ms": 118741,
92
  "time_to_first_token_ms": {
93
- "p50": 43.445,
94
- "p60": 45.341,
95
- "p70": 47.407,
96
- "p80": 50.324,
97
- "p90": 53.509,
98
- "p95": 54.94,
99
- "p99": 57.022,
100
- "avg": 43.314
101
  },
102
  "inter_token_latency_ms": {
103
- "p50": 24.082,
104
- "p60": 24.1,
105
- "p70": 24.124,
106
- "p80": 24.146,
107
- "p90": 24.21,
108
- "p95": 24.288,
109
- "p99": 24.376,
110
- "avg": 24.09
111
  },
112
  "failed_requests": 0,
113
- "successful_requests": 115,
114
- "request_rate": 0.9684943249566704,
115
- "total_tokens_sent": 23000,
116
  "e2e_latency_ms": {
117
- "p50": 4814.201,
118
- "p60": 4873.26,
119
- "p70": 4947.365,
120
- "p80": 5011.934,
121
- "p90": 5104.903,
122
- "p95": 5182.844,
123
- "p99": 5309.301,
124
- "avg": 4665.197
125
  }
126
  },
127
  {
@@ -132,43 +132,43 @@
132
  "duration_secs": 120,
133
  "rate": 10.0
134
  },
135
- "total_requests": 1149,
136
- "total_tokens": 217686,
137
- "token_throughput_secs": 1837.4411468828155,
138
- "duration_ms": 118472,
139
  "time_to_first_token_ms": {
140
- "p50": 55.249,
141
- "p60": 57.796,
142
- "p70": 60.296,
143
- "p80": 63.162,
144
- "p90": 66.14,
145
- "p95": 67.799,
146
- "p99": 70.85,
147
- "avg": 55.52
148
  },
149
  "inter_token_latency_ms": {
150
- "p50": 28.914,
151
- "p60": 28.973,
152
- "p70": 29.029,
153
- "p80": 29.089,
154
- "p90": 29.168,
155
- "p95": 29.211,
156
- "p99": 29.331,
157
- "avg": 28.737
158
  },
159
  "failed_requests": 0,
160
- "successful_requests": 1149,
161
- "request_rate": 9.698464199665366,
162
- "total_tokens_sent": 229800,
163
  "e2e_latency_ms": {
164
- "p50": 5707.118,
165
- "p60": 5793.95,
166
- "p70": 5885.254,
167
- "p80": 5983.201,
168
- "p90": 6126.889,
169
- "p95": 6219.476,
170
- "p99": 6386.803,
171
- "avg": 5477.946
172
  }
173
  },
174
  {
@@ -179,43 +179,43 @@
179
  "duration_secs": 120,
180
  "rate": 30.0
181
  },
182
- "total_requests": 1889,
183
- "total_tokens": 348708,
184
- "token_throughput_secs": 2911.7479692043544,
185
- "duration_ms": 119758,
186
  "time_to_first_token_ms": {
187
- "p50": 22192.744,
188
- "p60": 26837.194,
189
- "p70": 29205.612,
190
- "p80": 33069.312,
191
- "p90": 35968.562,
192
- "p95": 36825.858,
193
- "p99": 37298.867,
194
- "avg": 19829.052
195
  },
196
  "inter_token_latency_ms": {
197
- "p50": 64.987,
198
- "p60": 66.093,
199
- "p70": 67.344,
200
- "p80": 72.108,
201
- "p90": 90.713,
202
- "p95": 98.38,
203
- "p99": 177.348,
204
- "avg": 69.926
205
  },
206
  "failed_requests": 0,
207
- "successful_requests": 1889,
208
- "request_rate": 15.77334593363796,
209
- "total_tokens_sent": 377800,
210
  "e2e_latency_ms": {
211
- "p50": 33837.749,
212
- "p60": 38364.805,
213
- "p70": 42612.972,
214
- "p80": 45779.935,
215
- "p90": 48249.655,
216
- "p95": 49268.594,
217
- "p99": 50884.661,
218
- "avg": 32263.266
219
  }
220
  },
221
  {
@@ -226,48 +226,48 @@
226
  "duration_secs": 120,
227
  "rate": 100.0
228
  },
229
- "total_requests": 1923,
230
- "total_tokens": 355495,
231
- "token_throughput_secs": 2963.3510051149824,
232
- "duration_ms": 119963,
233
  "time_to_first_token_ms": {
234
- "p50": 30849.07,
235
- "p60": 32647.17,
236
- "p70": 35695.762,
237
- "p80": 36657.309,
238
- "p90": 37063.893,
239
- "p95": 37265.804,
240
- "p99": 37693.244,
241
- "avg": 25983.203
242
  },
243
  "inter_token_latency_ms": {
244
- "p50": 64.756,
245
- "p60": 66.434,
246
- "p70": 68.803,
247
- "p80": 83.204,
248
- "p90": 96.295,
249
- "p95": 103.874,
250
- "p99": 163.895,
251
- "avg": 73.033
252
  },
253
  "failed_requests": 0,
254
- "successful_requests": 1923,
255
- "request_rate": 16.02982878194099,
256
- "total_tokens_sent": 384600,
257
  "e2e_latency_ms": {
258
- "p50": 44432.763,
259
- "p60": 46273.082,
260
- "p70": 47729.904,
261
- "p80": 48714.768,
262
- "p90": 49917.33,
263
- "p95": 50686.527,
264
- "p99": 51992.951,
265
- "avg": 38685.294
266
  }
267
  }
268
  ],
269
- "start_time": "2025-05-21T09:04:59.479961191+00:00",
270
- "end_time": "2025-05-21T09:15:05.115323148+00:00",
271
  "system": {
272
  "cpu": [
273
  "AMD Ryzen 7 9800X3D 8-Core Processor cpu0@4699MHz",
 
12
  ],
13
  "num_rates": 10,
14
  "prompt_options": {
15
+ "num_tokens": 8000,
16
+ "min_tokens": 7980,
17
+ "max_tokens": 8020,
18
  "variance": 10
19
  },
20
  "decode_options": {
21
+ "num_tokens": 8000,
22
+ "min_tokens": 7980,
23
+ "max_tokens": 8020,
24
  "variance": 10
25
  },
26
  "tokenizer": "RedHatAI/phi-4-FP8-dynamic",
27
  "model_name": "phi-4",
28
  "profile": null,
29
  "meta": null,
30
+ "run_id": "vLLM: RedHatAI/phi-4-FP8-dynamic (8000 tokens)"
31
  },
32
  "results": [
33
  {
 
38
  "duration_secs": 30,
39
  "rate": null
40
  },
41
+ "total_requests": 2,
42
+ "total_tokens": 1643,
43
+ "token_throughput_secs": 38.490013255851395,
44
+ "duration_ms": 42686,
45
  "time_to_first_token_ms": {
46
+ "p50": 1276.801,
47
+ "p60": 1388.913,
48
+ "p70": 1501.026,
49
+ "p80": 1613.139,
50
+ "p90": 1725.252,
51
+ "p95": 1781.309,
52
+ "p99": 1826.154,
53
+ "avg": 1276.801
54
  },
55
  "inter_token_latency_ms": {
56
+ "p50": 24.424,
57
+ "p60": 24.432,
58
+ "p70": 24.44,
59
+ "p80": 24.448,
60
+ "p90": 24.456,
61
+ "p95": 24.46,
62
+ "p99": 24.463,
63
+ "avg": 24.424
64
  },
65
  "failed_requests": 0,
66
+ "successful_requests": 2,
67
+ "request_rate": 0.0468533332390157,
68
+ "total_tokens_sent": 16000,
69
  "e2e_latency_ms": {
70
+ "p50": 21343.075,
71
+ "p60": 21391.438,
72
+ "p70": 21439.801,
73
+ "p80": 21488.164,
74
+ "p90": 21536.527,
75
+ "p95": 21560.709,
76
+ "p99": 21580.054,
77
+ "avg": 21343.075
78
  }
79
  },
80
  {
 
85
  "duration_secs": 120,
86
  "rate": 1.0
87
  },
88
+ "total_requests": 90,
89
+ "total_tokens": 55892,
90
+ "token_throughput_secs": 478.696852515677,
91
+ "duration_ms": 116758,
92
  "time_to_first_token_ms": {
93
+ "p50": 118.856,
94
+ "p60": 124.707,
95
+ "p70": 131.654,
96
+ "p80": 135.562,
97
+ "p90": 145.529,
98
+ "p95": 150.366,
99
+ "p99": 715.649,
100
+ "avg": 128.611
101
  },
102
  "inter_token_latency_ms": {
103
+ "p50": 45.758,
104
+ "p60": 46.229,
105
+ "p70": 46.314,
106
+ "p80": 46.373,
107
+ "p90": 46.483,
108
+ "p95": 46.581,
109
+ "p99": 46.871,
110
+ "avg": 43.271
111
  },
112
  "failed_requests": 0,
113
+ "successful_requests": 90,
114
+ "request_rate": 0.7708208102485317,
115
+ "total_tokens_sent": 720000,
116
  "e2e_latency_ms": {
117
+ "p50": 27887.256,
118
+ "p60": 30188.411,
119
+ "p70": 31661.903,
120
+ "p80": 35685.812,
121
+ "p90": 45661.636,
122
+ "p95": 50093.628,
123
+ "p99": 59727.184,
124
+ "avg": 27093.895
125
  }
126
  },
127
  {
 
132
  "duration_secs": 120,
133
  "rate": 10.0
134
  },
135
+ "total_requests": 97,
136
+ "total_tokens": 45779,
137
+ "token_throughput_secs": 385.8671945353039,
138
+ "duration_ms": 118639,
139
  "time_to_first_token_ms": {
140
+ "p50": 264.625,
141
+ "p60": 314.639,
142
+ "p70": 341.786,
143
+ "p80": 416.021,
144
+ "p90": 502.604,
145
+ "p95": 608.336,
146
+ "p99": 712.908,
147
+ "avg": 278.878
148
  },
149
  "inter_token_latency_ms": {
150
+ "p50": 152.068,
151
+ "p60": 183.639,
152
+ "p70": 208.294,
153
+ "p80": 210.057,
154
+ "p90": 211.894,
155
+ "p95": 421.244,
156
+ "p99": 436.578,
157
+ "avg": 190.502
158
  },
159
  "failed_requests": 0,
160
+ "successful_requests": 97,
161
+ "request_rate": 0.8176045319890011,
162
+ "total_tokens_sent": 776000,
163
  "e2e_latency_ms": {
164
+ "p50": 89809.719,
165
+ "p60": 90599.198,
166
+ "p70": 97086.861,
167
+ "p80": 97763.592,
168
+ "p90": 102705.608,
169
+ "p95": 105891.319,
170
+ "p99": 109209.372,
171
+ "avg": 80168.287
172
  }
173
  },
174
  {
 
179
  "duration_secs": 120,
180
  "rate": 30.0
181
  },
182
+ "total_requests": 108,
183
+ "total_tokens": 48755,
184
+ "token_throughput_secs": 408.5182278415837,
185
+ "duration_ms": 119345,
186
  "time_to_first_token_ms": {
187
+ "p50": 315.639,
188
+ "p60": 364.113,
189
+ "p70": 440.936,
190
+ "p80": 517.15,
191
+ "p90": 635.496,
192
+ "p95": 743.467,
193
+ "p99": 886.077,
194
+ "avg": 348.945
195
  },
196
  "inter_token_latency_ms": {
197
+ "p50": 172.827,
198
+ "p60": 189.057,
199
+ "p70": 196.538,
200
+ "p80": 201.266,
201
+ "p90": 442.975,
202
+ "p95": 465.991,
203
+ "p99": 473.842,
204
+ "avg": 207.845
205
  },
206
  "failed_requests": 0,
207
+ "successful_requests": 108,
208
+ "request_rate": 0.9049321835071489,
209
+ "total_tokens_sent": 864000,
210
  "e2e_latency_ms": {
211
+ "p50": 89868.756,
212
+ "p60": 96902.23,
213
+ "p70": 98937.333,
214
+ "p80": 102789.849,
215
+ "p90": 109541.9,
216
+ "p95": 111388.456,
217
+ "p99": 114281.927,
218
+ "avg": 82072.638
219
  }
220
  },
221
  {
 
226
  "duration_secs": 120,
227
  "rate": 100.0
228
  },
229
+ "total_requests": 125,
230
+ "total_tokens": 57918,
231
+ "token_throughput_secs": 485.359321343381,
232
+ "duration_ms": 119330,
233
  "time_to_first_token_ms": {
234
+ "p50": 1154.434,
235
+ "p60": 1276.393,
236
+ "p70": 1440.368,
237
+ "p80": 1604.069,
238
+ "p90": 1768.54,
239
+ "p95": 1850.13,
240
+ "p99": 1919.678,
241
+ "avg": 1208.132
242
  },
243
  "inter_token_latency_ms": {
244
+ "p50": 166.875,
245
+ "p60": 166.884,
246
+ "p70": 167.245,
247
+ "p80": 188.28,
248
+ "p90": 350.172,
249
+ "p95": 417.485,
250
+ "p99": 437.566,
251
+ "avg": 186.06
252
  },
253
  "failed_requests": 0,
254
+ "successful_requests": 125,
255
+ "request_rate": 1.047513988188864,
256
+ "total_tokens_sent": 1000000,
257
  "e2e_latency_ms": {
258
+ "p50": 82803.004,
259
+ "p60": 89976.229,
260
+ "p70": 90374.914,
261
+ "p80": 99727.225,
262
+ "p90": 108866.194,
263
+ "p95": 113444.528,
264
+ "p99": 116545.189,
265
+ "avg": 77917.015
266
  }
267
  }
268
  ],
269
+ "start_time": "2025-05-21T13:41:44.260015742+00:00",
270
+ "end_time": "2025-05-21T13:56:47.150683889+00:00",
271
  "system": {
272
  "cpu": [
273
  "AMD Ryzen 7 9800X3D 8-Core Processor cpu0@4699MHz",
results/microsoft_phi-4_2025-05-21-12-47-52.json DELETED
@@ -1,296 +0,0 @@
1
- {
2
- "config": {
3
- "max_vus": 800,
4
- "duration_secs": 120,
5
- "benchmark_kind": "Rate",
6
- "warmup_duration_secs": 30,
7
- "rates": [
8
- 1.0,
9
- 10.0,
10
- 30.0,
11
- 100.0
12
- ],
13
- "num_rates": 10,
14
- "prompt_options": {
15
- "num_tokens": 200,
16
- "min_tokens": 180,
17
- "max_tokens": 220,
18
- "variance": 10
19
- },
20
- "decode_options": {
21
- "num_tokens": 200,
22
- "min_tokens": 180,
23
- "max_tokens": 220,
24
- "variance": 10
25
- },
26
- "tokenizer": "microsoft/phi-4",
27
- "model_name": "phi-4",
28
- "profile": null,
29
- "meta": null,
30
- "run_id": "Ollama: unsloth/phi-4-GGUF:Q8_0 (200 tokens)"
31
- },
32
- "results": [
33
- {
34
- "id": "warmup",
35
- "executor_type": "ConstantVUs",
36
- "config": {
37
- "max_vus": 1,
38
- "duration_secs": 30,
39
- "rate": null
40
- },
41
- "total_requests": 17,
42
- "total_tokens": 2560,
43
- "token_throughput_secs": 81.92346820970964,
44
- "duration_ms": 31248,
45
- "time_to_first_token_ms": {
46
- "p50": 48.023,
47
- "p60": 48.316,
48
- "p70": 48.704,
49
- "p80": 49.172,
50
- "p90": 50.133,
51
- "p95": 79.141,
52
- "p99": 171.884,
53
- "avg": 56.904
54
- },
55
- "inter_token_latency_ms": {
56
- "p50": 11.835,
57
- "p60": 11.849,
58
- "p70": 11.866,
59
- "p80": 11.888,
60
- "p90": 11.999,
61
- "p95": 12.031,
62
- "p99": 12.057,
63
- "avg": 11.863
64
- },
65
- "failed_requests": 0,
66
- "successful_requests": 17,
67
- "request_rate": 0.5440230310801031,
68
- "total_tokens_sent": 3400,
69
- "e2e_latency_ms": {
70
- "p50": 2193.161,
71
- "p60": 2256.189,
72
- "p70": 2409.636,
73
- "p80": 2503.287,
74
- "p90": 2558.373,
75
- "p95": 2565.267,
76
- "p99": 2582.093,
77
- "avg": 1837.986
78
- }
79
- },
80
- {
81
- "id": "[email protected]/s",
82
- "executor_type": "ConstantArrivalRate",
83
- "config": {
84
- "max_vus": 800,
85
- "duration_secs": 120,
86
- "rate": 1.0
87
- },
88
- "total_requests": 68,
89
- "total_tokens": 13393,
90
- "token_throughput_secs": 113.50678834081126,
91
- "duration_ms": 117992,
92
- "time_to_first_token_ms": {
93
- "p50": 23628.355,
94
- "p60": 28364.866,
95
- "p70": 33468.314,
96
- "p80": 37116.28,
97
- "p90": 42197.075,
98
- "p95": 44792.584,
99
- "p99": 46808.871,
100
- "avg": 23527.531
101
- },
102
- "inter_token_latency_ms": {
103
- "p50": 17.148,
104
- "p60": 17.164,
105
- "p70": 17.183,
106
- "p80": 17.199,
107
- "p90": 17.22,
108
- "p95": 17.235,
109
- "p99": 17.256,
110
- "avg": 17.123
111
- },
112
- "failed_requests": 0,
113
- "successful_requests": 68,
114
- "request_rate": 0.5763056527421164,
115
- "total_tokens_sent": 13600,
116
- "e2e_latency_ms": {
117
- "p50": 26918.292,
118
- "p60": 31837.746,
119
- "p70": 36426.629,
120
- "p80": 40565.391,
121
- "p90": 45507.834,
122
- "p95": 48259.487,
123
- "p99": 50280.92,
124
- "avg": 26884.974
125
- }
126
- },
127
- {
128
- "id": "[email protected]/s",
129
- "executor_type": "ConstantArrivalRate",
130
- "config": {
131
- "max_vus": 800,
132
- "duration_secs": 120,
133
- "rate": 10.0
134
- },
135
- "total_requests": 69,
136
- "total_tokens": 13411,
137
- "token_throughput_secs": 112.91469560470007,
138
- "duration_ms": 118771,
139
- "time_to_first_token_ms": {
140
- "p50": 54889.419,
141
- "p60": 66226.724,
142
- "p70": 77657.43,
143
- "p80": 87194.269,
144
- "p90": 97361.153,
145
- "p95": 102660.303,
146
- "p99": 106894.626,
147
- "avg": 54527.075
148
- },
149
- "inter_token_latency_ms": {
150
- "p50": 17.284,
151
- "p60": 17.295,
152
- "p70": 17.305,
153
- "p80": 17.328,
154
- "p90": 17.385,
155
- "p95": 17.394,
156
- "p99": 17.447,
157
- "avg": 17.279
158
- },
159
- "failed_requests": 0,
160
- "successful_requests": 69,
161
- "request_rate": 0.5809495188072705,
162
- "total_tokens_sent": 13800,
163
- "e2e_latency_ms": {
164
- "p50": 58021.804,
165
- "p60": 69751.13,
166
- "p70": 80116.293,
167
- "p80": 90587.03,
168
- "p90": 100535.513,
169
- "p95": 105903.68,
170
- "p99": 110535.65,
171
- "avg": 57868.946
172
- }
173
- },
174
- {
175
- "id": "[email protected]/s",
176
- "executor_type": "ConstantArrivalRate",
177
- "config": {
178
- "max_vus": 800,
179
- "duration_secs": 120,
180
- "rate": 30.0
181
- },
182
- "total_requests": 70,
183
- "total_tokens": 13581,
184
- "token_throughput_secs": 113.61611267427078,
185
- "duration_ms": 119534,
186
- "time_to_first_token_ms": {
187
- "p50": 56313.526,
188
- "p60": 68465.8,
189
- "p70": 78580.113,
190
- "p80": 90639.114,
191
- "p90": 102040.301,
192
- "p95": 108031.928,
193
- "p99": 112499.04,
194
- "avg": 56639.341
195
- },
196
- "inter_token_latency_ms": {
197
- "p50": 17.172,
198
- "p60": 17.182,
199
- "p70": 17.217,
200
- "p80": 17.235,
201
- "p90": 17.256,
202
- "p95": 17.31,
203
- "p99": 17.346,
204
- "avg": 17.18
205
- },
206
- "failed_requests": 0,
207
- "successful_requests": 70,
208
- "request_rate": 0.5856069425814708,
209
- "total_tokens_sent": 14000,
210
- "e2e_latency_ms": {
211
- "p50": 59683.651,
212
- "p60": 71746.875,
213
- "p70": 81953.181,
214
- "p80": 94277.653,
215
- "p90": 105378.271,
216
- "p95": 111453.36,
217
- "p99": 115949.496,
218
- "avg": 59958.385
219
- }
220
- },
221
- {
222
- "id": "[email protected]/s",
223
- "executor_type": "ConstantArrivalRate",
224
- "config": {
225
- "max_vus": 800,
226
- "duration_secs": 120,
227
- "rate": 100.0
228
- },
229
- "total_requests": 70,
230
- "total_tokens": 13359,
231
- "token_throughput_secs": 114.42379660997986,
232
- "duration_ms": 116750,
233
- "time_to_first_token_ms": {
234
- "p50": 57218.949,
235
- "p60": 67960.841,
236
- "p70": 79764.715,
237
- "p80": 91579.471,
238
- "p90": 102620.956,
239
- "p95": 107961.016,
240
- "p99": 112866.279,
241
- "avg": 56772.876
242
- },
243
- "inter_token_latency_ms": {
244
- "p50": 17.171,
245
- "p60": 17.189,
246
- "p70": 17.201,
247
- "p80": 17.215,
248
- "p90": 17.245,
249
- "p95": 17.299,
250
- "p99": 17.353,
251
- "avg": 17.179
252
- },
253
- "failed_requests": 0,
254
- "successful_requests": 70,
255
- "request_rate": 0.5995707584922966,
256
- "total_tokens_sent": 14000,
257
- "e2e_latency_ms": {
258
- "p50": 60551.916,
259
- "p60": 71380.408,
260
- "p70": 83198.203,
261
- "p80": 93909.886,
262
- "p90": 105788.774,
263
- "p95": 111364.807,
264
- "p99": 115968.729,
265
- "avg": 60037.39
266
- }
267
- }
268
- ],
269
- "start_time": "2025-05-21T12:32:04.299141299+00:00",
270
- "end_time": "2025-05-21T12:47:52.695866821+00:00",
271
- "system": {
272
- "cpu": [
273
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu0@4699MHz",
274
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu1@4699MHz",
275
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu2@4699MHz",
276
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu3@4699MHz",
277
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu4@4699MHz",
278
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu5@4699MHz",
279
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu6@4699MHz",
280
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu7@4699MHz",
281
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu8@4699MHz",
282
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu9@4699MHz",
283
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu10@4699MHz",
284
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu11@4699MHz",
285
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu12@4699MHz",
286
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu13@4699MHz",
287
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu14@4699MHz",
288
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu15@4699MHz"
289
- ],
290
- "memory": "83.47 GB",
291
- "os_name": "Debian GNU/Linux",
292
- "os_version": "11",
293
- "kernel": "5.15.167.4-microsoft-standard-WSL2",
294
- "hostname": "computer"
295
- }
296
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
results/microsoft_phi-4_2025-05-21-13-17-26.json DELETED
@@ -1,296 +0,0 @@
1
- {
2
- "config": {
3
- "max_vus": 800,
4
- "duration_secs": 120,
5
- "benchmark_kind": "Rate",
6
- "warmup_duration_secs": 30,
7
- "rates": [
8
- 1.0,
9
- 10.0,
10
- 30.0,
11
- 100.0
12
- ],
13
- "num_rates": 10,
14
- "prompt_options": {
15
- "num_tokens": 200,
16
- "min_tokens": 180,
17
- "max_tokens": 220,
18
- "variance": 10
19
- },
20
- "decode_options": {
21
- "num_tokens": 200,
22
- "min_tokens": 180,
23
- "max_tokens": 220,
24
- "variance": 10
25
- },
26
- "tokenizer": "microsoft/phi-4",
27
- "model_name": "phi-4",
28
- "profile": null,
29
- "meta": null,
30
- "run_id": "LM Studio: lmstudio-community/phi-4-GGUF:Q8_0 (200 tokens)"
31
- },
32
- "results": [
33
- {
34
- "id": "warmup",
35
- "executor_type": "ConstantVUs",
36
- "config": {
37
- "max_vus": 1,
38
- "duration_secs": 30,
39
- "rate": null
40
- },
41
- "total_requests": 13,
42
- "total_tokens": 2610,
43
- "token_throughput_secs": 83.60700961692694,
44
- "duration_ms": 31217,
45
- "time_to_first_token_ms": {
46
- "p50": 90.517,
47
- "p60": 93.25,
48
- "p70": 102.443,
49
- "p80": 109.227,
50
- "p90": 130.959,
51
- "p95": 207.294,
52
- "p99": 293.629,
53
- "avg": 108.58
54
- },
55
- "inter_token_latency_ms": {
56
- "p50": 11.513,
57
- "p60": 11.519,
58
- "p70": 11.534,
59
- "p80": 11.548,
60
- "p90": 11.559,
61
- "p95": 11.574,
62
- "p99": 11.589,
63
- "avg": 11.472
64
- },
65
- "failed_requests": 0,
66
- "successful_requests": 13,
67
- "request_rate": 0.41643338123373574,
68
- "total_tokens_sent": 2600,
69
- "e2e_latency_ms": {
70
- "p50": 2419.372,
71
- "p60": 2423.796,
72
- "p70": 2432.426,
73
- "p80": 2458.236,
74
- "p90": 2525.006,
75
- "p95": 2596.86,
76
- "p99": 2667.757,
77
- "avg": 2401.195
78
- }
79
- },
80
- {
81
- "id": "[email protected]/s",
82
- "executor_type": "ConstantArrivalRate",
83
- "config": {
84
- "max_vus": 800,
85
- "duration_secs": 120,
86
- "rate": 1.0
87
- },
88
- "total_requests": 52,
89
- "total_tokens": 9915,
90
- "token_throughput_secs": 84.1224984364473,
91
- "duration_ms": 117863,
92
- "time_to_first_token_ms": {
93
- "p50": 31149.018,
94
- "p60": 38159.307,
95
- "p70": 44798.95,
96
- "p80": 51599.01,
97
- "p90": 58334.517,
98
- "p95": 61414.588,
99
- "p99": 63915.289,
100
- "avg": 32379.62
101
- },
102
- "inter_token_latency_ms": {
103
- "p50": 11.473,
104
- "p60": 11.501,
105
- "p70": 11.517,
106
- "p80": 11.529,
107
- "p90": 11.563,
108
- "p95": 11.598,
109
- "p99": 11.775,
110
- "avg": 11.254
111
- },
112
- "failed_requests": 0,
113
- "successful_requests": 52,
114
- "request_rate": 0.4411870820670963,
115
- "total_tokens_sent": 10400,
116
- "e2e_latency_ms": {
117
- "p50": 33388.263,
118
- "p60": 40395.415,
119
- "p70": 47230.795,
120
- "p80": 53979.194,
121
- "p90": 60382.07,
122
- "p95": 63519.032,
123
- "p99": 66184.234,
124
- "avg": 34556.301
125
- }
126
- },
127
- {
128
- "id": "[email protected]/s",
129
- "executor_type": "ConstantArrivalRate",
130
- "config": {
131
- "max_vus": 800,
132
- "duration_secs": 120,
133
- "rate": 10.0
134
- },
135
- "total_requests": 51,
136
- "total_tokens": 10041,
137
- "token_throughput_secs": 84.04049965954646,
138
- "duration_ms": 119478,
139
- "time_to_first_token_ms": {
140
- "p50": 55889.645,
141
- "p60": 67098.347,
142
- "p70": 78905.359,
143
- "p80": 90289.182,
144
- "p90": 101201.112,
145
- "p95": 106805.272,
146
- "p99": 111193.127,
147
- "avg": 56139.066
148
- },
149
- "inter_token_latency_ms": {
150
- "p50": 11.487,
151
- "p60": 11.498,
152
- "p70": 11.51,
153
- "p80": 11.536,
154
- "p90": 11.584,
155
- "p95": 11.638,
156
- "p99": 11.883,
157
- "avg": 11.474
158
- },
159
- "failed_requests": 0,
160
- "successful_requests": 51,
161
- "request_rate": 0.4268564368725096,
162
- "total_tokens_sent": 10200,
163
- "e2e_latency_ms": {
164
- "p50": 58084.912,
165
- "p60": 69432.711,
166
- "p70": 81080.254,
167
- "p80": 92442.614,
168
- "p90": 103527.041,
169
- "p95": 108999.672,
170
- "p99": 113397.637,
171
- "avg": 58387.662
172
- }
173
- },
174
- {
175
- "id": "[email protected]/s",
176
- "executor_type": "ConstantArrivalRate",
177
- "config": {
178
- "max_vus": 800,
179
- "duration_secs": 120,
180
- "rate": 30.0
181
- },
182
- "total_requests": 51,
183
- "total_tokens": 9889,
184
- "token_throughput_secs": 84.08188681268076,
185
- "duration_ms": 117611,
186
- "time_to_first_token_ms": {
187
- "p50": 55982.506,
188
- "p60": 68000.692,
189
- "p70": 79600.152,
190
- "p80": 91108.706,
191
- "p90": 101995.453,
192
- "p95": 107929.312,
193
- "p99": 112340.212,
194
- "avg": 56754.648
195
- },
196
- "inter_token_latency_ms": {
197
- "p50": 11.503,
198
- "p60": 11.515,
199
- "p70": 11.531,
200
- "p80": 11.564,
201
- "p90": 11.589,
202
- "p95": 11.633,
203
- "p99": 11.795,
204
- "avg": 11.477
205
- },
206
- "failed_requests": 0,
207
- "successful_requests": 51,
208
- "request_rate": 0.43363092602353315,
209
- "total_tokens_sent": 10200,
210
- "e2e_latency_ms": {
211
- "p50": 58352.067,
212
- "p60": 70321.743,
213
- "p70": 81960.377,
214
- "p80": 93288.338,
215
- "p90": 104277.554,
216
- "p95": 110084.734,
217
- "p99": 114675.842,
218
- "avg": 58969.412
219
- }
220
- },
221
- {
222
- "id": "[email protected]/s",
223
- "executor_type": "ConstantArrivalRate",
224
- "config": {
225
- "max_vus": 800,
226
- "duration_secs": 120,
227
- "rate": 100.0
228
- },
229
- "total_requests": 57,
230
- "total_tokens": 9983,
231
- "token_throughput_secs": 83.83914212119033,
232
- "duration_ms": 119073,
233
- "time_to_first_token_ms": {
234
- "p50": 60425.652,
235
- "p60": 73426.16,
236
- "p70": 83375.468,
237
- "p80": 96034.495,
238
- "p90": 104082.959,
239
- "p95": 110616.366,
240
- "p99": 114826.821,
241
- "avg": 59050.64
242
- },
243
- "inter_token_latency_ms": {
244
- "p50": 11.528,
245
- "p60": 11.552,
246
- "p70": 11.577,
247
- "p80": 11.595,
248
- "p90": 11.625,
249
- "p95": 11.656,
250
- "p99": 11.7,
251
- "avg": 11.281
252
- },
253
- "failed_requests": 0,
254
- "successful_requests": 57,
255
- "request_rate": 0.4786968948119652,
256
- "total_tokens_sent": 11400,
257
- "e2e_latency_ms": {
258
- "p50": 62519.008,
259
- "p60": 74991.853,
260
- "p70": 85562.76,
261
- "p80": 96625.366,
262
- "p90": 106351.421,
263
- "p95": 112531.399,
264
- "p99": 117196.304,
265
- "avg": 61050.657
266
- }
267
- }
268
- ],
269
- "start_time": "2025-05-21T13:01:17.074891817+00:00",
270
- "end_time": "2025-05-21T13:17:26.396424745+00:00",
271
- "system": {
272
- "cpu": [
273
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu0@4699MHz",
274
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu1@4699MHz",
275
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu2@4699MHz",
276
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu3@4699MHz",
277
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu4@4699MHz",
278
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu5@4699MHz",
279
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu6@4699MHz",
280
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu7@4699MHz",
281
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu8@4699MHz",
282
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu9@4699MHz",
283
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu10@4699MHz",
284
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu11@4699MHz",
285
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu12@4699MHz",
286
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu13@4699MHz",
287
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu14@4699MHz",
288
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu15@4699MHz"
289
- ],
290
- "memory": "83.47 GB",
291
- "os_name": "Debian GNU/Linux",
292
- "os_version": "11",
293
- "kernel": "5.15.167.4-microsoft-standard-WSL2",
294
- "hostname": "computer"
295
- }
296
- }