add gpu layers
Browse files- src/app.py +1 -1
- src/components/filters.py +21 -10
- src/components/visualizations.py +56 -72
src/app.py
CHANGED
|
@@ -34,7 +34,7 @@ def get_filter_values(
|
|
| 34 |
cache_type_v = sorted(df["cache_type_v"].unique().tolist())
|
| 35 |
cache_type_k = sorted(df["cache_type_k"].unique().tolist())
|
| 36 |
n_threads = (df["n_threads"].min(), df["n_threads"].max())
|
| 37 |
-
max_n_gpu_layers = max(df["n_gpu_layers"].unique().tolist())
|
| 38 |
pp_range = (df["PP Config"].min(), df["PP Config"].max())
|
| 39 |
tg_range = (df["TG Config"].min(), df["TG Config"].max())
|
| 40 |
versions = sorted(df["Version"].unique().tolist())
|
|
|
|
| 34 |
cache_type_v = sorted(df["cache_type_v"].unique().tolist())
|
| 35 |
cache_type_k = sorted(df["cache_type_k"].unique().tolist())
|
| 36 |
n_threads = (df["n_threads"].min(), df["n_threads"].max())
|
| 37 |
+
max_n_gpu_layers = (0, max(df["n_gpu_layers"].unique().tolist()))
|
| 38 |
pp_range = (df["PP Config"].min(), df["PP Config"].max())
|
| 39 |
tg_range = (df["TG Config"].min(), df["TG Config"].max())
|
| 40 |
versions = sorted(df["Version"].unique().tolist())
|
src/components/filters.py
CHANGED
|
@@ -14,10 +14,11 @@ def render_grouping_options(key_prefix: str = "") -> List[str]:
|
|
| 14 |
"cache_type_v",
|
| 15 |
"PP Config",
|
| 16 |
"TG Config",
|
| 17 |
-
"n_context",
|
| 18 |
-
"n_batch",
|
| 19 |
-
"n_ubatch",
|
| 20 |
"Version",
|
|
|
|
| 21 |
]
|
| 22 |
|
| 23 |
default_groups = [
|
|
@@ -53,7 +54,7 @@ def render_column_visibility() -> Set[str]:
|
|
| 53 |
"CPU Cores",
|
| 54 |
"Total Memory (GB)",
|
| 55 |
"Peak Memory (GB)",
|
| 56 |
-
"Memory Usage (%)",
|
| 57 |
],
|
| 58 |
"Benchmark Info": [
|
| 59 |
"PP Config",
|
|
@@ -70,13 +71,13 @@ def render_column_visibility() -> Set[str]:
|
|
| 70 |
],
|
| 71 |
"Advanced": [
|
| 72 |
"n_threads",
|
| 73 |
-
"n_gpu_layers",
|
| 74 |
"flash_attn",
|
| 75 |
"cache_type_k",
|
| 76 |
"cache_type_v",
|
| 77 |
-
"n_context",
|
| 78 |
-
"n_batch",
|
| 79 |
-
"n_ubatch",
|
| 80 |
],
|
| 81 |
"App": [
|
| 82 |
"Version",
|
|
@@ -148,7 +149,7 @@ def render_filters(
|
|
| 148 |
)
|
| 149 |
|
| 150 |
# Row 2 continued
|
| 151 |
-
col2, col3, col4, col5, col6, col7, col8, col9, col10 = st.columns(
|
| 152 |
|
| 153 |
with col2:
|
| 154 |
filters["platform"] = st.selectbox(
|
|
@@ -162,7 +163,7 @@ def render_filters(
|
|
| 162 |
|
| 163 |
with col4:
|
| 164 |
filters["flash_attn"] = st.selectbox(
|
| 165 |
-
"
|
| 166 |
options=["All", True, False],
|
| 167 |
key=f"{key_prefix}_filter_flash_attn",
|
| 168 |
)
|
|
@@ -211,9 +212,19 @@ def render_filters(
|
|
| 211 |
)
|
| 212 |
|
| 213 |
with col10:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
filters["Version"] = st.selectbox(
|
| 215 |
"Version", options=["All"] + versions, key=f"{key_prefix}_filter_version"
|
| 216 |
)
|
|
|
|
| 217 |
|
| 218 |
# Column visibility control as a small button/dropdown
|
| 219 |
filters["visible_columns"] = render_column_visibility()
|
|
|
|
| 14 |
"cache_type_v",
|
| 15 |
"PP Config",
|
| 16 |
"TG Config",
|
| 17 |
+
#"n_context",
|
| 18 |
+
#"n_batch",
|
| 19 |
+
#"n_ubatch",
|
| 20 |
"Version",
|
| 21 |
+
"n_gpu_layers",
|
| 22 |
]
|
| 23 |
|
| 24 |
default_groups = [
|
|
|
|
| 54 |
"CPU Cores",
|
| 55 |
"Total Memory (GB)",
|
| 56 |
"Peak Memory (GB)",
|
| 57 |
+
#"Memory Usage (%)",
|
| 58 |
],
|
| 59 |
"Benchmark Info": [
|
| 60 |
"PP Config",
|
|
|
|
| 71 |
],
|
| 72 |
"Advanced": [
|
| 73 |
"n_threads",
|
| 74 |
+
#"n_gpu_layers",
|
| 75 |
"flash_attn",
|
| 76 |
"cache_type_k",
|
| 77 |
"cache_type_v",
|
| 78 |
+
# "n_context",
|
| 79 |
+
# "n_batch",
|
| 80 |
+
# "n_ubatch",
|
| 81 |
],
|
| 82 |
"App": [
|
| 83 |
"Version",
|
|
|
|
| 149 |
)
|
| 150 |
|
| 151 |
# Row 2 continued
|
| 152 |
+
col2, col3, col4, col5, col6, col7, col8, col9, col10, col11 = st.columns(10)
|
| 153 |
|
| 154 |
with col2:
|
| 155 |
filters["platform"] = st.selectbox(
|
|
|
|
| 163 |
|
| 164 |
with col4:
|
| 165 |
filters["flash_attn"] = st.selectbox(
|
| 166 |
+
"Attn",
|
| 167 |
options=["All", True, False],
|
| 168 |
key=f"{key_prefix}_filter_flash_attn",
|
| 169 |
)
|
|
|
|
| 212 |
)
|
| 213 |
|
| 214 |
with col10:
|
| 215 |
+
filters["n_gpu_layers"] = st.slider(
|
| 216 |
+
"GPU Layers",
|
| 217 |
+
min_value=0,
|
| 218 |
+
max_value=max_n_gpu_layers[1],
|
| 219 |
+
value=max_n_gpu_layers,
|
| 220 |
+
key=f"{key_prefix}_filter_n_gpu_layers",
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
with col11:
|
| 224 |
filters["Version"] = st.selectbox(
|
| 225 |
"Version", options=["All"] + versions, key=f"{key_prefix}_filter_version"
|
| 226 |
)
|
| 227 |
+
|
| 228 |
|
| 229 |
# Column visibility control as a small button/dropdown
|
| 230 |
filters["visible_columns"] = render_column_visibility()
|
src/components/visualizations.py
CHANGED
|
@@ -83,6 +83,13 @@ def filter_dataframe(df: pd.DataFrame, filters: Dict) -> pd.DataFrame:
|
|
| 83 |
(n_threads >= n_threads_min) & (n_threads <= n_threads_max)
|
| 84 |
]
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
# Version filter - handle multiple selections
|
| 87 |
if filters.get("Version") != "All" and filters.get("Version"):
|
| 88 |
filtered_df = filtered_df[filtered_df["Version"] == filters["Version"]]
|
|
@@ -216,48 +223,56 @@ def render_leaderboard_table(df: pd.DataFrame, filters: Dict):
|
|
| 216 |
st.warning("No data matches the selected filters.")
|
| 217 |
return
|
| 218 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
# Group by selected columns
|
| 220 |
grouping_cols = filters["grouping"]
|
| 221 |
if not grouping_cols:
|
| 222 |
grouping_cols = ["Model ID", "Device", "Platform"] # Default grouping
|
| 223 |
|
| 224 |
-
#
|
| 225 |
agg_dict = {
|
| 226 |
col: agg
|
| 227 |
for col, agg in {
|
| 228 |
"Prompt Processing": ["mean", "std"],
|
| 229 |
"Token Generation": ["mean", "std"],
|
| 230 |
-
|
| 231 |
-
"
|
| 232 |
-
"
|
| 233 |
-
"
|
| 234 |
-
"
|
| 235 |
-
"
|
| 236 |
-
sorted(set(x))
|
| 237 |
-
), # Concatenate unique versions
|
| 238 |
}.items()
|
| 239 |
if col not in grouping_cols
|
| 240 |
}
|
| 241 |
|
| 242 |
-
# # Extract initSettings if needed
|
| 243 |
-
# init_settings_cols = {
|
| 244 |
-
# "n_threads": "n_threads",
|
| 245 |
-
# "flash_attn": "flash_attn",
|
| 246 |
-
# "cache_type_k": "cache_type_k",
|
| 247 |
-
# "cache_type_v": "cache_type_v",
|
| 248 |
-
# "n_context": "n_context",
|
| 249 |
-
# "n_batch": "n_batch",
|
| 250 |
-
# "n_ubatch": "n_ubatch",
|
| 251 |
-
# }
|
| 252 |
-
|
| 253 |
-
# for col, setting in init_settings_cols.items():
|
| 254 |
-
# if col not in filtered_df.columns:
|
| 255 |
-
# filtered_df[col] = filtered_df["initSettings"].apply(
|
| 256 |
-
# lambda x: x.get(setting)
|
| 257 |
-
# )
|
| 258 |
-
# if col not in grouping_cols:
|
| 259 |
-
# agg_dict[col] = "first"
|
| 260 |
-
|
| 261 |
# Group and aggregate
|
| 262 |
grouped_df = filtered_df.groupby(grouping_cols).agg(agg_dict).reset_index()
|
| 263 |
|
|
@@ -266,33 +281,10 @@ def render_leaderboard_table(df: pd.DataFrame, filters: Dict):
|
|
| 266 |
col[0] if col[1] == "" else f"{col[0]} ({col[1]})" for col in grouped_df.columns
|
| 267 |
]
|
| 268 |
|
| 269 |
-
# Round numeric columns
|
| 270 |
-
numeric_cols = [
|
| 271 |
-
col
|
| 272 |
-
for col in grouped_df.columns
|
| 273 |
-
if any(x in col for x in ["mean", "std", "Memory", "Size"])
|
| 274 |
-
]
|
| 275 |
-
grouped_df[numeric_cols] = grouped_df[numeric_cols].round(2)
|
| 276 |
-
|
| 277 |
-
# Sort using the actual column names we have
|
| 278 |
-
sort_cols = []
|
| 279 |
-
if "Model Size (first)" in grouped_df.columns:
|
| 280 |
-
sort_cols.append("Model Size (first)")
|
| 281 |
-
if "PP Config (first)" in grouped_df.columns:
|
| 282 |
-
sort_cols.append("PP Config (first)")
|
| 283 |
-
if "Token Generation (mean)" in grouped_df.columns:
|
| 284 |
-
sort_cols.append("Token Generation (mean)")
|
| 285 |
-
|
| 286 |
-
if sort_cols: # Only sort if we have columns to sort by
|
| 287 |
-
grouped_df = grouped_df.sort_values(
|
| 288 |
-
by=sort_cols, ascending=[False] + [True] * (len(sort_cols) - 1)
|
| 289 |
-
)
|
| 290 |
-
|
| 291 |
# Rename columns for display
|
| 292 |
column_mapping = {
|
| 293 |
"Prompt Processing (mean)": "PP Avg (t/s)",
|
| 294 |
"Prompt Processing (std)": "PP Std (t/s)",
|
| 295 |
-
"Prompt Processing (count)": "Runs",
|
| 296 |
"Token Generation (mean)": "TG Avg (t/s)",
|
| 297 |
"Token Generation (std)": "TG Std (t/s)",
|
| 298 |
"Memory Usage (%) (mean)": "Memory Usage (%)",
|
|
@@ -352,34 +344,26 @@ def render_leaderboard_table(df: pd.DataFrame, filters: Dict):
|
|
| 352 |
# Combine both sets to get unique columns
|
| 353 |
all_cols = mapped_visible | mapped_grouping
|
| 354 |
|
| 355 |
-
# Create final display columns list
|
| 356 |
display_cols = []
|
| 357 |
|
| 358 |
-
#
|
| 359 |
-
|
| 360 |
-
mapped_col = column_name_mapping.get(col, col)
|
| 361 |
-
if mapped_col in all_cols:
|
| 362 |
-
display_cols.append(mapped_col)
|
| 363 |
-
all_cols.remove(mapped_col)
|
| 364 |
|
| 365 |
-
#
|
| 366 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
else:
|
| 368 |
# Default columns if none selected
|
| 369 |
-
display_cols = [
|
| 370 |
-
"Device",
|
| 371 |
-
"Platform",
|
| 372 |
-
"Model ID",
|
| 373 |
-
"Model Size",
|
| 374 |
-
"PP Avg (ms)",
|
| 375 |
-
"TG Avg (ms)",
|
| 376 |
-
"Memory Usage (%)",
|
| 377 |
-
]
|
| 378 |
-
|
| 379 |
-
# Ensure all display columns exist in the DataFrame
|
| 380 |
-
display_cols = [col for col in display_cols if col in grouped_df.columns]
|
| 381 |
|
| 382 |
# Display the filtered and grouped table
|
|
|
|
| 383 |
st.dataframe(
|
| 384 |
grouped_df[display_cols],
|
| 385 |
use_container_width=True,
|
|
|
|
| 83 |
(n_threads >= n_threads_min) & (n_threads <= n_threads_max)
|
| 84 |
]
|
| 85 |
|
| 86 |
+
n_gpu_layers_min, n_gpu_layers_max = filters["n_gpu_layers"]
|
| 87 |
+
if n_gpu_layers_min is not None and n_gpu_layers_max is not None:
|
| 88 |
+
n_gpu_layers = filtered_df["n_gpu_layers"]
|
| 89 |
+
filtered_df = filtered_df[
|
| 90 |
+
(n_gpu_layers >= n_gpu_layers_min) & (n_gpu_layers <= n_gpu_layers_max)
|
| 91 |
+
]
|
| 92 |
+
|
| 93 |
# Version filter - handle multiple selections
|
| 94 |
if filters.get("Version") != "All" and filters.get("Version"):
|
| 95 |
filtered_df = filtered_df[filtered_df["Version"] == filters["Version"]]
|
|
|
|
| 223 |
st.warning("No data matches the selected filters.")
|
| 224 |
return
|
| 225 |
|
| 226 |
+
# Define the preferred column order (grouped logically)
|
| 227 |
+
column_order = [
|
| 228 |
+
# Device Info
|
| 229 |
+
"Device",
|
| 230 |
+
"Platform",
|
| 231 |
+
"CPU Cores",
|
| 232 |
+
"Total Memory (GB)",
|
| 233 |
+
"Peak Memory (GB)",
|
| 234 |
+
"Memory Usage (%)"
|
| 235 |
+
# Benchmark Results
|
| 236 |
+
"PP Config",
|
| 237 |
+
"PP Avg (t/s)",
|
| 238 |
+
"PP Std (t/s)",
|
| 239 |
+
"TG Config",
|
| 240 |
+
"TG Avg (t/s)",
|
| 241 |
+
"TG Std (t/s)",
|
| 242 |
+
# Model Config
|
| 243 |
+
"Model ID",
|
| 244 |
+
"Model Size",
|
| 245 |
+
"n_threads",
|
| 246 |
+
"flash_attn",
|
| 247 |
+
"cache_type_k",
|
| 248 |
+
"cache_type_v",
|
| 249 |
+
"n_context",
|
| 250 |
+
"n_batch",
|
| 251 |
+
"n_ubatch",
|
| 252 |
+
"Version",
|
| 253 |
+
]
|
| 254 |
+
|
| 255 |
# Group by selected columns
|
| 256 |
grouping_cols = filters["grouping"]
|
| 257 |
if not grouping_cols:
|
| 258 |
grouping_cols = ["Model ID", "Device", "Platform"] # Default grouping
|
| 259 |
|
| 260 |
+
# Create aggregations (excluding grouping columns)
|
| 261 |
agg_dict = {
|
| 262 |
col: agg
|
| 263 |
for col, agg in {
|
| 264 |
"Prompt Processing": ["mean", "std"],
|
| 265 |
"Token Generation": ["mean", "std"],
|
| 266 |
+
"Peak Memory (GB)": "mean",
|
| 267 |
+
"Total Memory (GB)": "first",
|
| 268 |
+
"CPU Cores": "first",
|
| 269 |
+
"Model Size": "first",
|
| 270 |
+
"Version": lambda x: ", ".join(sorted(set(x))),
|
| 271 |
+
"n_gpu_layers": lambda x: ", ".join(sorted(set(str(x)))),
|
|
|
|
|
|
|
| 272 |
}.items()
|
| 273 |
if col not in grouping_cols
|
| 274 |
}
|
| 275 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
# Group and aggregate
|
| 277 |
grouped_df = filtered_df.groupby(grouping_cols).agg(agg_dict).reset_index()
|
| 278 |
|
|
|
|
| 281 |
col[0] if col[1] == "" else f"{col[0]} ({col[1]})" for col in grouped_df.columns
|
| 282 |
]
|
| 283 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 284 |
# Rename columns for display
|
| 285 |
column_mapping = {
|
| 286 |
"Prompt Processing (mean)": "PP Avg (t/s)",
|
| 287 |
"Prompt Processing (std)": "PP Std (t/s)",
|
|
|
|
| 288 |
"Token Generation (mean)": "TG Avg (t/s)",
|
| 289 |
"Token Generation (std)": "TG Std (t/s)",
|
| 290 |
"Memory Usage (%) (mean)": "Memory Usage (%)",
|
|
|
|
| 344 |
# Combine both sets to get unique columns
|
| 345 |
all_cols = mapped_visible | mapped_grouping
|
| 346 |
|
| 347 |
+
# Create final display columns list
|
| 348 |
display_cols = []
|
| 349 |
|
| 350 |
+
# Get all available columns we want to display
|
| 351 |
+
available_cols = set(all_cols)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
|
| 353 |
+
# Add columns in the predefined order
|
| 354 |
+
for col in column_order:
|
| 355 |
+
if col in available_cols:
|
| 356 |
+
display_cols.append(col)
|
| 357 |
+
|
| 358 |
+
# Add any remaining columns that weren't in our predefined order
|
| 359 |
+
remaining_cols = sorted(list(available_cols - set(display_cols)))
|
| 360 |
+
display_cols.extend(remaining_cols)
|
| 361 |
else:
|
| 362 |
# Default columns if none selected
|
| 363 |
+
display_cols = column_order[:8] # First 8 columns from the predefined order
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 364 |
|
| 365 |
# Display the filtered and grouped table
|
| 366 |
+
st.markdown("#### 📊 Benchmark Results")
|
| 367 |
st.dataframe(
|
| 368 |
grouped_df[display_cols],
|
| 369 |
use_container_width=True,
|