Commit
Β·
9ff97e7
1
Parent(s):
c8d26ac
update
Browse files- README.md +1 -4
- dashboard/app.py +13 -192
README.md
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
---
|
| 2 |
-
title: Inference
|
| 3 |
emoji: π
|
| 4 |
colorFrom: gray
|
| 5 |
colorTo: green
|
|
@@ -8,9 +8,6 @@ sdk_version: 5.30.0
|
|
| 8 |
app_file: dashboard/app.py
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
| 11 |
-
|
| 12 |
-
env:
|
| 13 |
-
DASHBOARD_FROM_RESULTS_DIR: results
|
| 14 |
---
|
| 15 |
|
| 16 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Inference Benchmarking Results Phi-4 (200 Tokens)
|
| 3 |
emoji: π
|
| 4 |
colorFrom: gray
|
| 5 |
colorTo: green
|
|
|
|
| 8 |
app_file: dashboard/app.py
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
|
|
|
|
|
|
|
|
|
| 11 |
---
|
| 12 |
|
| 13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
dashboard/app.py
CHANGED
|
@@ -3,14 +3,9 @@ from dataclasses import dataclass
|
|
| 3 |
from typing import List
|
| 4 |
|
| 5 |
import click
|
| 6 |
-
import os
|
| 7 |
import gradio as gr
|
| 8 |
import pandas as pd
|
| 9 |
|
| 10 |
-
import traceback
|
| 11 |
-
import glob
|
| 12 |
-
import json
|
| 13 |
-
|
| 14 |
from parse_results import build_results
|
| 15 |
|
| 16 |
|
|
@@ -21,22 +16,8 @@ class PlotConfig:
|
|
| 21 |
title: str
|
| 22 |
percentiles: List[float] = None
|
| 23 |
|
| 24 |
-
def check_file_exists(path, label=""):
|
| 25 |
-
if os.path.exists(path):
|
| 26 |
-
print(f"β
{label} file exists: {path}")
|
| 27 |
-
print(f" File size: {os.path.getsize(path)} bytes")
|
| 28 |
-
print(f" Absolute path: {os.path.abspath(path)}")
|
| 29 |
-
else:
|
| 30 |
-
print(f"β {label} file NOT found: {path}")
|
| 31 |
-
print(f" Current working directory: {os.getcwd()}")
|
| 32 |
-
print(f" Directory contents: {os.listdir(os.path.dirname(path) if os.path.dirname(path) else '.')}")
|
| 33 |
-
|
| 34 |
|
| 35 |
def run(from_results_dir, datasource, port):
|
| 36 |
-
print(f"π‘ Debug - from_results_dir: {from_results_dir}")
|
| 37 |
-
print(f"π‘ Debug - datasource: {datasource}")
|
| 38 |
-
print(f"π‘ Debug - current directory: {os.getcwd()}")
|
| 39 |
-
|
| 40 |
css = '''
|
| 41 |
.summary span {
|
| 42 |
font-size: 10px;
|
|
@@ -48,17 +29,17 @@ def run(from_results_dir, datasource, port):
|
|
| 48 |
summary_desc = '''
|
| 49 |
## Summary
|
| 50 |
This table shows the average of the metrics for each model and QPS rate.
|
| 51 |
-
|
| 52 |
The metrics are:
|
| 53 |
-
* Inter token latency: Time to generate a new output token for each user querying the system.
|
| 54 |
It translates as the βspeedβ perceived by the end-user. We aim for at least 300 words per minute (average reading speed), so ITL<150ms
|
| 55 |
-
* Time to First Token: Time the user has to wait before seeing the first token of its answer.
|
| 56 |
Lower waiting time are essential for real-time interactions, less so for offline workloads.
|
| 57 |
* End-to-end latency: The overall time the system took to generate the full response to the user.
|
| 58 |
* Throughput: The number of tokens per second the system can generate across all requests
|
| 59 |
* Successful requests: The number of requests the system was able to honor in the benchmark timeframe
|
| 60 |
-
* Error rate: The percentage of requests that ended up in error, as the system could not process them in time or failed to process them.
|
| 61 |
-
|
| 62 |
'''
|
| 63 |
|
| 64 |
df_bench = pd.DataFrame()
|
|
@@ -129,76 +110,17 @@ def run(from_results_dir, datasource, port):
|
|
| 129 |
return res
|
| 130 |
|
| 131 |
def load_datasource(datasource, fn):
|
| 132 |
-
print(f"π‘ Debug - load_datasource called with: {datasource}")
|
| 133 |
if datasource.startswith('file://'):
|
| 134 |
-
|
| 135 |
-
print(f"π‘ Debug - Extracted local path: {local_path}")
|
| 136 |
-
check_file_exists(local_path, "Local")
|
| 137 |
-
return fn(local_path)
|
| 138 |
elif datasource.startswith('s3://'):
|
| 139 |
return fn(datasource)
|
| 140 |
else:
|
| 141 |
-
|
| 142 |
-
print(f"π‘ Debug - Using path as-is: {datasource}")
|
| 143 |
-
check_file_exists(datasource, "Direct")
|
| 144 |
-
return fn(datasource)
|
| 145 |
-
|
| 146 |
-
parquet_file_to_load = None
|
| 147 |
|
| 148 |
if from_results_dir is not None:
|
| 149 |
-
|
| 150 |
-
# within that directory.
|
| 151 |
-
output_filename = 'benchmarks.parquet'
|
| 152 |
-
print(f"π‘ Debug - Building results from directory: {from_results_dir}")
|
| 153 |
-
|
| 154 |
-
# Check if results directory exists
|
| 155 |
-
check_file_exists(from_results_dir, "Results directory")
|
| 156 |
-
|
| 157 |
-
# Create absolute path for results directory
|
| 158 |
-
abs_results_dir = os.path.abspath(from_results_dir)
|
| 159 |
-
print(f"π‘ Debug - Absolute results directory: {abs_results_dir}")
|
| 160 |
-
|
| 161 |
-
# Create the results directory if it doesn't exist
|
| 162 |
-
if not os.path.exists(abs_results_dir):
|
| 163 |
-
print(f"π‘ Debug - Creating results directory: {abs_results_dir}")
|
| 164 |
-
os.makedirs(abs_results_dir, exist_ok=True)
|
| 165 |
-
|
| 166 |
-
# Call build_results with absolute paths
|
| 167 |
-
full_output_path = os.path.join(abs_results_dir, output_filename)
|
| 168 |
-
print(f"π‘ Debug - Expected output path: {full_output_path}")
|
| 169 |
-
|
| 170 |
-
build_results(abs_results_dir, output_filename, None)
|
| 171 |
-
|
| 172 |
-
# Check if the file was created
|
| 173 |
-
check_file_exists(full_output_path, "Generated parquet")
|
| 174 |
-
|
| 175 |
-
# The file to load is now in from_results_dir/output_filename
|
| 176 |
-
parquet_file_to_load = full_output_path
|
| 177 |
-
else:
|
| 178 |
-
# If not building from results_dir, use the provided datasource directly.
|
| 179 |
-
parquet_file_to_load = datasource
|
| 180 |
-
|
| 181 |
-
print(f"π‘ Debug - Final parquet_file_to_load: {parquet_file_to_load}")
|
| 182 |
-
|
| 183 |
# Load data
|
| 184 |
-
|
| 185 |
-
df_bench = load_datasource(parquet_file_to_load, load_bench_results)
|
| 186 |
-
print(f"β
Successfully loaded data with {len(df_bench)} rows")
|
| 187 |
-
except Exception as e:
|
| 188 |
-
print(f"β Error loading data: {str(e)}")
|
| 189 |
-
print(f"Stack trace: {traceback.format_exc()}")
|
| 190 |
-
# Create a minimal DataFrame to prevent further errors
|
| 191 |
-
df_bench = pd.DataFrame({
|
| 192 |
-
"model": ["error"],
|
| 193 |
-
"run_id": ["error"],
|
| 194 |
-
"rate": [0],
|
| 195 |
-
"inter_token_latency_ms_p90": [0],
|
| 196 |
-
"time_to_first_token_ms_p90": [0],
|
| 197 |
-
"e2e_latency_ms_p90": [0],
|
| 198 |
-
"token_throughput_secs": [0],
|
| 199 |
-
"successful_requests": [0],
|
| 200 |
-
"error_rate": [0]
|
| 201 |
-
})
|
| 202 |
|
| 203 |
# Define metrics
|
| 204 |
metrics = {
|
|
@@ -276,112 +198,11 @@ def run(from_results_dir, datasource, port):
|
|
| 276 |
|
| 277 |
|
| 278 |
@click.command()
|
| 279 |
-
@click.option('--from-results-dir',
|
| 280 |
-
@click.option('--datasource',
|
| 281 |
@click.option('--port', default=7860, help='Port to run the dashboard')
|
| 282 |
-
def main(
|
| 283 |
-
|
| 284 |
-
# print(f"Environment variables: {os.environ}") # Already in user's code or logs
|
| 285 |
-
|
| 286 |
-
# Determine the directory from which to process JSON results
|
| 287 |
-
# Priority: 1. CLI option, 2. Env Var, 3. Default to 'results' dir
|
| 288 |
-
processing_dir = cli_from_results_dir
|
| 289 |
-
|
| 290 |
-
if processing_dir is None:
|
| 291 |
-
env_var_value = os.environ.get('DASHBOARD_FROM_RESULTS_DIR')
|
| 292 |
-
if env_var_value:
|
| 293 |
-
print(f"Using environment variable DASHBOARD_FROM_RESULTS_DIR='{env_var_value}' for processing.")
|
| 294 |
-
processing_dir = env_var_value
|
| 295 |
-
elif os.path.exists('results') and os.path.isdir('results'):
|
| 296 |
-
print(f"No --from-results-dir option or DASHBOARD_FROM_RESULTS_DIR env var. Defaulting to 'results' directory for processing as it exists.")
|
| 297 |
-
processing_dir = 'results'
|
| 298 |
-
else:
|
| 299 |
-
print(f"No directory specified for processing (no --from-results-dir, no DASHBOARD_FROM_RESULTS_DIR env var, and 'results' dir not found).")
|
| 300 |
-
# processing_dir remains None
|
| 301 |
-
|
| 302 |
-
path_to_load_by_run_function = None # This will be the path to the .parquet file
|
| 303 |
-
|
| 304 |
-
if processing_dir:
|
| 305 |
-
# A directory for processing JSONs has been determined.
|
| 306 |
-
# Use the existing logic to build/fallback and generate benchmarks.parquet.
|
| 307 |
-
output_filename = 'benchmarks.parquet'
|
| 308 |
-
abs_processing_dir = os.path.abspath(processing_dir)
|
| 309 |
-
|
| 310 |
-
print(f"π‘ Debug - Will process JSONs from directory: {abs_processing_dir}")
|
| 311 |
-
check_file_exists(abs_processing_dir, "Source directory for JSONs")
|
| 312 |
-
|
| 313 |
-
# Ensure the directory exists (it might be 'results' or user-provided)
|
| 314 |
-
# build_results might expect the output directory to exist.
|
| 315 |
-
if not os.path.exists(abs_processing_dir):
|
| 316 |
-
print(f"π‘ Debug - Creating directory for processing/output: {abs_processing_dir}")
|
| 317 |
-
os.makedirs(abs_processing_dir, exist_ok=True)
|
| 318 |
-
|
| 319 |
-
# The generated parquet file will be placed inside the abs_processing_dir
|
| 320 |
-
generated_parquet_filepath = os.path.join(abs_processing_dir, output_filename)
|
| 321 |
-
print(f"π‘ Debug - Expected path for generated parquet file: {generated_parquet_filepath}")
|
| 322 |
-
|
| 323 |
-
try:
|
| 324 |
-
build_results(abs_processing_dir, output_filename, None) # output_filename is relative to abs_processing_dir
|
| 325 |
-
print("β
Build results completed using build_results.")
|
| 326 |
-
except Exception as e_build:
|
| 327 |
-
print(f"β Error in build_results: {str(e_build)}")
|
| 328 |
-
print(f"Stack trace: {traceback.format_exc()}")
|
| 329 |
-
print("β οΈ Attempting fallback method: direct JSON processing")
|
| 330 |
-
try:
|
| 331 |
-
json_files = glob.glob(os.path.join(abs_processing_dir, "*.json"))
|
| 332 |
-
print(f"Found {len(json_files)} JSON files for fallback: {json_files}")
|
| 333 |
-
if not json_files:
|
| 334 |
-
raise FileNotFoundError("Fallback: No JSON files found in results directory")
|
| 335 |
-
|
| 336 |
-
combined_data = []
|
| 337 |
-
for json_file in json_files:
|
| 338 |
-
try:
|
| 339 |
-
with open(json_file, 'r') as f:
|
| 340 |
-
data = json.load(f)
|
| 341 |
-
filename = os.path.basename(json_file)
|
| 342 |
-
model_name_parts = filename.split('_')
|
| 343 |
-
model_name = f"{model_name_parts[0]}_{model_name_parts[1]}" if len(model_name_parts) > 1 else model_name_parts[0]
|
| 344 |
-
|
| 345 |
-
if 'benchmarks' in data:
|
| 346 |
-
for benchmark in data['benchmarks']:
|
| 347 |
-
benchmark['model'] = model_name
|
| 348 |
-
benchmark['run_id'] = os.path.splitext(filename)[0]
|
| 349 |
-
combined_data.append(benchmark)
|
| 350 |
-
else:
|
| 351 |
-
print(f"β οΈ Fallback: No 'benchmarks' key in {json_file}")
|
| 352 |
-
except Exception as json_err:
|
| 353 |
-
print(f"β Fallback: Error processing {json_file}: {str(json_err)}")
|
| 354 |
-
|
| 355 |
-
if combined_data:
|
| 356 |
-
df_direct = pd.DataFrame(combined_data)
|
| 357 |
-
df_direct.to_parquet(generated_parquet_filepath)
|
| 358 |
-
print(f"β
Created parquet file via fallback method: {generated_parquet_filepath}")
|
| 359 |
-
else:
|
| 360 |
-
raise ValueError("Fallback: No data could be extracted from JSON files")
|
| 361 |
-
except Exception as e_fallback:
|
| 362 |
-
print(f"β Fallback method failed: {str(e_fallback)}")
|
| 363 |
-
print(f"Stack trace: {traceback.format_exc()}")
|
| 364 |
-
|
| 365 |
-
# After attempting to build/generate, check if the file exists
|
| 366 |
-
check_file_exists(generated_parquet_filepath, "Parquet file after build/fallback attempts")
|
| 367 |
-
if os.path.exists(generated_parquet_filepath):
|
| 368 |
-
path_to_load_by_run_function = generated_parquet_filepath
|
| 369 |
-
else:
|
| 370 |
-
print(f"β CRITICAL: Failed to generate or find parquet file at '{generated_parquet_filepath}' after all attempts.")
|
| 371 |
-
# path_to_load_by_run_function remains None here, will be handled below.
|
| 372 |
-
|
| 373 |
-
# If path_to_load_by_run_function is still None at this point
|
| 374 |
-
# (either because processing_dir was not set, or all generation attempts failed),
|
| 375 |
-
# default to the original cli_datasource.
|
| 376 |
-
if path_to_load_by_run_function is None:
|
| 377 |
-
print(f"β οΈ Defaulting to cli_datasource '{cli_datasource}' as parquet generation failed or was skipped.")
|
| 378 |
-
path_to_load_by_run_function = cli_datasource
|
| 379 |
-
|
| 380 |
-
print(f"π‘ Final path to be loaded by run() function: '{path_to_load_by_run_function}'")
|
| 381 |
-
|
| 382 |
-
# Call run(). The first argument (from_results_dir for run()) is None because main handles processing.
|
| 383 |
-
# The second argument (datasource for run()) is the actual file path to load.
|
| 384 |
-
run(None, path_to_load_by_run_function, port)
|
| 385 |
|
| 386 |
|
| 387 |
if __name__ == '__main__':
|
|
|
|
| 3 |
from typing import List
|
| 4 |
|
| 5 |
import click
|
|
|
|
| 6 |
import gradio as gr
|
| 7 |
import pandas as pd
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
from parse_results import build_results
|
| 10 |
|
| 11 |
|
|
|
|
| 16 |
title: str
|
| 17 |
percentiles: List[float] = None
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
def run(from_results_dir, datasource, port):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
css = '''
|
| 22 |
.summary span {
|
| 23 |
font-size: 10px;
|
|
|
|
| 29 |
summary_desc = '''
|
| 30 |
## Summary
|
| 31 |
This table shows the average of the metrics for each model and QPS rate.
|
| 32 |
+
|
| 33 |
The metrics are:
|
| 34 |
+
* Inter token latency: Time to generate a new output token for each user querying the system.
|
| 35 |
It translates as the βspeedβ perceived by the end-user. We aim for at least 300 words per minute (average reading speed), so ITL<150ms
|
| 36 |
+
* Time to First Token: Time the user has to wait before seeing the first token of its answer.
|
| 37 |
Lower waiting time are essential for real-time interactions, less so for offline workloads.
|
| 38 |
* End-to-end latency: The overall time the system took to generate the full response to the user.
|
| 39 |
* Throughput: The number of tokens per second the system can generate across all requests
|
| 40 |
* Successful requests: The number of requests the system was able to honor in the benchmark timeframe
|
| 41 |
+
* Error rate: The percentage of requests that ended up in error, as the system could not process them in time or failed to process them.
|
| 42 |
+
|
| 43 |
'''
|
| 44 |
|
| 45 |
df_bench = pd.DataFrame()
|
|
|
|
| 110 |
return res
|
| 111 |
|
| 112 |
def load_datasource(datasource, fn):
|
|
|
|
| 113 |
if datasource.startswith('file://'):
|
| 114 |
+
return fn(datasource)
|
|
|
|
|
|
|
|
|
|
| 115 |
elif datasource.startswith('s3://'):
|
| 116 |
return fn(datasource)
|
| 117 |
else:
|
| 118 |
+
raise ValueError(f"Unknown datasource: {datasource}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
if from_results_dir is not None:
|
| 121 |
+
build_results(from_results_dir, 'benchmarks.parquet', None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
# Load data
|
| 123 |
+
df_bench = load_datasource(datasource, load_bench_results)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
# Define metrics
|
| 126 |
metrics = {
|
|
|
|
| 198 |
|
| 199 |
|
| 200 |
@click.command()
|
| 201 |
+
@click.option('--from-results-dir', default=None, help='Load inference-benchmarker results from a directory')
|
| 202 |
+
@click.option('--datasource', default='file://benchmarks.parquet', help='Load a Parquet file already generated')
|
| 203 |
@click.option('--port', default=7860, help='Port to run the dashboard')
|
| 204 |
+
def main(from_results_dir, datasource, port):
|
| 205 |
+
run(from_results_dir, datasource, port)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
|
| 207 |
|
| 208 |
if __name__ == '__main__':
|