Commit
Β·
9ff97e7
1
Parent(s):
c8d26ac
update
Browse files- README.md +1 -4
- dashboard/app.py +13 -192
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title: Inference
|
3 |
emoji: π
|
4 |
colorFrom: gray
|
5 |
colorTo: green
|
@@ -8,9 +8,6 @@ sdk_version: 5.30.0
|
|
8 |
app_file: dashboard/app.py
|
9 |
pinned: false
|
10 |
license: mit
|
11 |
-
|
12 |
-
env:
|
13 |
-
DASHBOARD_FROM_RESULTS_DIR: results
|
14 |
---
|
15 |
|
16 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: Inference Benchmarking Results Phi-4 (200 Tokens)
|
3 |
emoji: π
|
4 |
colorFrom: gray
|
5 |
colorTo: green
|
|
|
8 |
app_file: dashboard/app.py
|
9 |
pinned: false
|
10 |
license: mit
|
|
|
|
|
|
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
dashboard/app.py
CHANGED
@@ -3,14 +3,9 @@ from dataclasses import dataclass
|
|
3 |
from typing import List
|
4 |
|
5 |
import click
|
6 |
-
import os
|
7 |
import gradio as gr
|
8 |
import pandas as pd
|
9 |
|
10 |
-
import traceback
|
11 |
-
import glob
|
12 |
-
import json
|
13 |
-
|
14 |
from parse_results import build_results
|
15 |
|
16 |
|
@@ -21,22 +16,8 @@ class PlotConfig:
|
|
21 |
title: str
|
22 |
percentiles: List[float] = None
|
23 |
|
24 |
-
def check_file_exists(path, label=""):
|
25 |
-
if os.path.exists(path):
|
26 |
-
print(f"β
{label} file exists: {path}")
|
27 |
-
print(f" File size: {os.path.getsize(path)} bytes")
|
28 |
-
print(f" Absolute path: {os.path.abspath(path)}")
|
29 |
-
else:
|
30 |
-
print(f"β {label} file NOT found: {path}")
|
31 |
-
print(f" Current working directory: {os.getcwd()}")
|
32 |
-
print(f" Directory contents: {os.listdir(os.path.dirname(path) if os.path.dirname(path) else '.')}")
|
33 |
-
|
34 |
|
35 |
def run(from_results_dir, datasource, port):
|
36 |
-
print(f"π‘ Debug - from_results_dir: {from_results_dir}")
|
37 |
-
print(f"π‘ Debug - datasource: {datasource}")
|
38 |
-
print(f"π‘ Debug - current directory: {os.getcwd()}")
|
39 |
-
|
40 |
css = '''
|
41 |
.summary span {
|
42 |
font-size: 10px;
|
@@ -48,17 +29,17 @@ def run(from_results_dir, datasource, port):
|
|
48 |
summary_desc = '''
|
49 |
## Summary
|
50 |
This table shows the average of the metrics for each model and QPS rate.
|
51 |
-
|
52 |
The metrics are:
|
53 |
-
* Inter token latency: Time to generate a new output token for each user querying the system.
|
54 |
It translates as the βspeedβ perceived by the end-user. We aim for at least 300 words per minute (average reading speed), so ITL<150ms
|
55 |
-
* Time to First Token: Time the user has to wait before seeing the first token of its answer.
|
56 |
Lower waiting time are essential for real-time interactions, less so for offline workloads.
|
57 |
* End-to-end latency: The overall time the system took to generate the full response to the user.
|
58 |
* Throughput: The number of tokens per second the system can generate across all requests
|
59 |
* Successful requests: The number of requests the system was able to honor in the benchmark timeframe
|
60 |
-
* Error rate: The percentage of requests that ended up in error, as the system could not process them in time or failed to process them.
|
61 |
-
|
62 |
'''
|
63 |
|
64 |
df_bench = pd.DataFrame()
|
@@ -129,76 +110,17 @@ def run(from_results_dir, datasource, port):
|
|
129 |
return res
|
130 |
|
131 |
def load_datasource(datasource, fn):
|
132 |
-
print(f"π‘ Debug - load_datasource called with: {datasource}")
|
133 |
if datasource.startswith('file://'):
|
134 |
-
|
135 |
-
print(f"π‘ Debug - Extracted local path: {local_path}")
|
136 |
-
check_file_exists(local_path, "Local")
|
137 |
-
return fn(local_path)
|
138 |
elif datasource.startswith('s3://'):
|
139 |
return fn(datasource)
|
140 |
else:
|
141 |
-
|
142 |
-
print(f"π‘ Debug - Using path as-is: {datasource}")
|
143 |
-
check_file_exists(datasource, "Direct")
|
144 |
-
return fn(datasource)
|
145 |
-
|
146 |
-
parquet_file_to_load = None
|
147 |
|
148 |
if from_results_dir is not None:
|
149 |
-
|
150 |
-
# within that directory.
|
151 |
-
output_filename = 'benchmarks.parquet'
|
152 |
-
print(f"π‘ Debug - Building results from directory: {from_results_dir}")
|
153 |
-
|
154 |
-
# Check if results directory exists
|
155 |
-
check_file_exists(from_results_dir, "Results directory")
|
156 |
-
|
157 |
-
# Create absolute path for results directory
|
158 |
-
abs_results_dir = os.path.abspath(from_results_dir)
|
159 |
-
print(f"π‘ Debug - Absolute results directory: {abs_results_dir}")
|
160 |
-
|
161 |
-
# Create the results directory if it doesn't exist
|
162 |
-
if not os.path.exists(abs_results_dir):
|
163 |
-
print(f"π‘ Debug - Creating results directory: {abs_results_dir}")
|
164 |
-
os.makedirs(abs_results_dir, exist_ok=True)
|
165 |
-
|
166 |
-
# Call build_results with absolute paths
|
167 |
-
full_output_path = os.path.join(abs_results_dir, output_filename)
|
168 |
-
print(f"π‘ Debug - Expected output path: {full_output_path}")
|
169 |
-
|
170 |
-
build_results(abs_results_dir, output_filename, None)
|
171 |
-
|
172 |
-
# Check if the file was created
|
173 |
-
check_file_exists(full_output_path, "Generated parquet")
|
174 |
-
|
175 |
-
# The file to load is now in from_results_dir/output_filename
|
176 |
-
parquet_file_to_load = full_output_path
|
177 |
-
else:
|
178 |
-
# If not building from results_dir, use the provided datasource directly.
|
179 |
-
parquet_file_to_load = datasource
|
180 |
-
|
181 |
-
print(f"π‘ Debug - Final parquet_file_to_load: {parquet_file_to_load}")
|
182 |
-
|
183 |
# Load data
|
184 |
-
|
185 |
-
df_bench = load_datasource(parquet_file_to_load, load_bench_results)
|
186 |
-
print(f"β
Successfully loaded data with {len(df_bench)} rows")
|
187 |
-
except Exception as e:
|
188 |
-
print(f"β Error loading data: {str(e)}")
|
189 |
-
print(f"Stack trace: {traceback.format_exc()}")
|
190 |
-
# Create a minimal DataFrame to prevent further errors
|
191 |
-
df_bench = pd.DataFrame({
|
192 |
-
"model": ["error"],
|
193 |
-
"run_id": ["error"],
|
194 |
-
"rate": [0],
|
195 |
-
"inter_token_latency_ms_p90": [0],
|
196 |
-
"time_to_first_token_ms_p90": [0],
|
197 |
-
"e2e_latency_ms_p90": [0],
|
198 |
-
"token_throughput_secs": [0],
|
199 |
-
"successful_requests": [0],
|
200 |
-
"error_rate": [0]
|
201 |
-
})
|
202 |
|
203 |
# Define metrics
|
204 |
metrics = {
|
@@ -276,112 +198,11 @@ def run(from_results_dir, datasource, port):
|
|
276 |
|
277 |
|
278 |
@click.command()
|
279 |
-
@click.option('--from-results-dir',
|
280 |
-
@click.option('--datasource',
|
281 |
@click.option('--port', default=7860, help='Port to run the dashboard')
|
282 |
-
def main(
|
283 |
-
|
284 |
-
# print(f"Environment variables: {os.environ}") # Already in user's code or logs
|
285 |
-
|
286 |
-
# Determine the directory from which to process JSON results
|
287 |
-
# Priority: 1. CLI option, 2. Env Var, 3. Default to 'results' dir
|
288 |
-
processing_dir = cli_from_results_dir
|
289 |
-
|
290 |
-
if processing_dir is None:
|
291 |
-
env_var_value = os.environ.get('DASHBOARD_FROM_RESULTS_DIR')
|
292 |
-
if env_var_value:
|
293 |
-
print(f"Using environment variable DASHBOARD_FROM_RESULTS_DIR='{env_var_value}' for processing.")
|
294 |
-
processing_dir = env_var_value
|
295 |
-
elif os.path.exists('results') and os.path.isdir('results'):
|
296 |
-
print(f"No --from-results-dir option or DASHBOARD_FROM_RESULTS_DIR env var. Defaulting to 'results' directory for processing as it exists.")
|
297 |
-
processing_dir = 'results'
|
298 |
-
else:
|
299 |
-
print(f"No directory specified for processing (no --from-results-dir, no DASHBOARD_FROM_RESULTS_DIR env var, and 'results' dir not found).")
|
300 |
-
# processing_dir remains None
|
301 |
-
|
302 |
-
path_to_load_by_run_function = None # This will be the path to the .parquet file
|
303 |
-
|
304 |
-
if processing_dir:
|
305 |
-
# A directory for processing JSONs has been determined.
|
306 |
-
# Use the existing logic to build/fallback and generate benchmarks.parquet.
|
307 |
-
output_filename = 'benchmarks.parquet'
|
308 |
-
abs_processing_dir = os.path.abspath(processing_dir)
|
309 |
-
|
310 |
-
print(f"π‘ Debug - Will process JSONs from directory: {abs_processing_dir}")
|
311 |
-
check_file_exists(abs_processing_dir, "Source directory for JSONs")
|
312 |
-
|
313 |
-
# Ensure the directory exists (it might be 'results' or user-provided)
|
314 |
-
# build_results might expect the output directory to exist.
|
315 |
-
if not os.path.exists(abs_processing_dir):
|
316 |
-
print(f"π‘ Debug - Creating directory for processing/output: {abs_processing_dir}")
|
317 |
-
os.makedirs(abs_processing_dir, exist_ok=True)
|
318 |
-
|
319 |
-
# The generated parquet file will be placed inside the abs_processing_dir
|
320 |
-
generated_parquet_filepath = os.path.join(abs_processing_dir, output_filename)
|
321 |
-
print(f"π‘ Debug - Expected path for generated parquet file: {generated_parquet_filepath}")
|
322 |
-
|
323 |
-
try:
|
324 |
-
build_results(abs_processing_dir, output_filename, None) # output_filename is relative to abs_processing_dir
|
325 |
-
print("β
Build results completed using build_results.")
|
326 |
-
except Exception as e_build:
|
327 |
-
print(f"β Error in build_results: {str(e_build)}")
|
328 |
-
print(f"Stack trace: {traceback.format_exc()}")
|
329 |
-
print("β οΈ Attempting fallback method: direct JSON processing")
|
330 |
-
try:
|
331 |
-
json_files = glob.glob(os.path.join(abs_processing_dir, "*.json"))
|
332 |
-
print(f"Found {len(json_files)} JSON files for fallback: {json_files}")
|
333 |
-
if not json_files:
|
334 |
-
raise FileNotFoundError("Fallback: No JSON files found in results directory")
|
335 |
-
|
336 |
-
combined_data = []
|
337 |
-
for json_file in json_files:
|
338 |
-
try:
|
339 |
-
with open(json_file, 'r') as f:
|
340 |
-
data = json.load(f)
|
341 |
-
filename = os.path.basename(json_file)
|
342 |
-
model_name_parts = filename.split('_')
|
343 |
-
model_name = f"{model_name_parts[0]}_{model_name_parts[1]}" if len(model_name_parts) > 1 else model_name_parts[0]
|
344 |
-
|
345 |
-
if 'benchmarks' in data:
|
346 |
-
for benchmark in data['benchmarks']:
|
347 |
-
benchmark['model'] = model_name
|
348 |
-
benchmark['run_id'] = os.path.splitext(filename)[0]
|
349 |
-
combined_data.append(benchmark)
|
350 |
-
else:
|
351 |
-
print(f"β οΈ Fallback: No 'benchmarks' key in {json_file}")
|
352 |
-
except Exception as json_err:
|
353 |
-
print(f"β Fallback: Error processing {json_file}: {str(json_err)}")
|
354 |
-
|
355 |
-
if combined_data:
|
356 |
-
df_direct = pd.DataFrame(combined_data)
|
357 |
-
df_direct.to_parquet(generated_parquet_filepath)
|
358 |
-
print(f"β
Created parquet file via fallback method: {generated_parquet_filepath}")
|
359 |
-
else:
|
360 |
-
raise ValueError("Fallback: No data could be extracted from JSON files")
|
361 |
-
except Exception as e_fallback:
|
362 |
-
print(f"β Fallback method failed: {str(e_fallback)}")
|
363 |
-
print(f"Stack trace: {traceback.format_exc()}")
|
364 |
-
|
365 |
-
# After attempting to build/generate, check if the file exists
|
366 |
-
check_file_exists(generated_parquet_filepath, "Parquet file after build/fallback attempts")
|
367 |
-
if os.path.exists(generated_parquet_filepath):
|
368 |
-
path_to_load_by_run_function = generated_parquet_filepath
|
369 |
-
else:
|
370 |
-
print(f"β CRITICAL: Failed to generate or find parquet file at '{generated_parquet_filepath}' after all attempts.")
|
371 |
-
# path_to_load_by_run_function remains None here, will be handled below.
|
372 |
-
|
373 |
-
# If path_to_load_by_run_function is still None at this point
|
374 |
-
# (either because processing_dir was not set, or all generation attempts failed),
|
375 |
-
# default to the original cli_datasource.
|
376 |
-
if path_to_load_by_run_function is None:
|
377 |
-
print(f"β οΈ Defaulting to cli_datasource '{cli_datasource}' as parquet generation failed or was skipped.")
|
378 |
-
path_to_load_by_run_function = cli_datasource
|
379 |
-
|
380 |
-
print(f"π‘ Final path to be loaded by run() function: '{path_to_load_by_run_function}'")
|
381 |
-
|
382 |
-
# Call run(). The first argument (from_results_dir for run()) is None because main handles processing.
|
383 |
-
# The second argument (datasource for run()) is the actual file path to load.
|
384 |
-
run(None, path_to_load_by_run_function, port)
|
385 |
|
386 |
|
387 |
if __name__ == '__main__':
|
|
|
3 |
from typing import List
|
4 |
|
5 |
import click
|
|
|
6 |
import gradio as gr
|
7 |
import pandas as pd
|
8 |
|
|
|
|
|
|
|
|
|
9 |
from parse_results import build_results
|
10 |
|
11 |
|
|
|
16 |
title: str
|
17 |
percentiles: List[float] = None
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
def run(from_results_dir, datasource, port):
|
|
|
|
|
|
|
|
|
21 |
css = '''
|
22 |
.summary span {
|
23 |
font-size: 10px;
|
|
|
29 |
summary_desc = '''
|
30 |
## Summary
|
31 |
This table shows the average of the metrics for each model and QPS rate.
|
32 |
+
|
33 |
The metrics are:
|
34 |
+
* Inter token latency: Time to generate a new output token for each user querying the system.
|
35 |
It translates as the βspeedβ perceived by the end-user. We aim for at least 300 words per minute (average reading speed), so ITL<150ms
|
36 |
+
* Time to First Token: Time the user has to wait before seeing the first token of its answer.
|
37 |
Lower waiting time are essential for real-time interactions, less so for offline workloads.
|
38 |
* End-to-end latency: The overall time the system took to generate the full response to the user.
|
39 |
* Throughput: The number of tokens per second the system can generate across all requests
|
40 |
* Successful requests: The number of requests the system was able to honor in the benchmark timeframe
|
41 |
+
* Error rate: The percentage of requests that ended up in error, as the system could not process them in time or failed to process them.
|
42 |
+
|
43 |
'''
|
44 |
|
45 |
df_bench = pd.DataFrame()
|
|
|
110 |
return res
|
111 |
|
112 |
def load_datasource(datasource, fn):
|
|
|
113 |
if datasource.startswith('file://'):
|
114 |
+
return fn(datasource)
|
|
|
|
|
|
|
115 |
elif datasource.startswith('s3://'):
|
116 |
return fn(datasource)
|
117 |
else:
|
118 |
+
raise ValueError(f"Unknown datasource: {datasource}")
|
|
|
|
|
|
|
|
|
|
|
119 |
|
120 |
if from_results_dir is not None:
|
121 |
+
build_results(from_results_dir, 'benchmarks.parquet', None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
# Load data
|
123 |
+
df_bench = load_datasource(datasource, load_bench_results)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
|
125 |
# Define metrics
|
126 |
metrics = {
|
|
|
198 |
|
199 |
|
200 |
@click.command()
|
201 |
+
@click.option('--from-results-dir', default=None, help='Load inference-benchmarker results from a directory')
|
202 |
+
@click.option('--datasource', default='file://benchmarks.parquet', help='Load a Parquet file already generated')
|
203 |
@click.option('--port', default=7860, help='Port to run the dashboard')
|
204 |
+
def main(from_results_dir, datasource, port):
|
205 |
+
run(from_results_dir, datasource, port)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
|
207 |
|
208 |
if __name__ == '__main__':
|