Spaces:
Running
Running
Mandark-droid
commited on
Commit
·
dafc8f1
1
Parent(s):
7f90c34
Fix HTMLPlus data attributes generation to match reference implementation
Browse files- Convert entire DataFrame row to dictionary
- Dynamically generate all data-* attributes
- Convert underscores to hyphens in attribute names (data-run-id, etc.)
- Properly handle None/NaN values by converting to 'None' string
- Add debug logging to event handler to diagnose issues
- Matches working pattern from reference implementation
- README.md +1 -1
- app.py +6 -0
- components/leaderboard_table.py +19 -34
README.md
CHANGED
|
@@ -7,7 +7,7 @@ sdk: gradio
|
|
| 7 |
sdk_version: 5.49.1
|
| 8 |
app_file: app.py
|
| 9 |
short_description: AI agent evaluation with MCP-powered intelligence
|
| 10 |
-
pinned:
|
| 11 |
tags:
|
| 12 |
- mcp-in-action-track-enterprise
|
| 13 |
- agent-evaluation
|
|
|
|
| 7 |
sdk_version: 5.49.1
|
| 8 |
app_file: app.py
|
| 9 |
short_description: AI agent evaluation with MCP-powered intelligence
|
| 10 |
+
pinned: true
|
| 11 |
tags:
|
| 12 |
- mcp-in-action-track-enterprise
|
| 13 |
- agent-evaluation
|
app.py
CHANGED
|
@@ -1142,6 +1142,12 @@ def on_html_leaderboard_select(evt: gr.SelectData):
|
|
| 1142 |
# evt.index = CSS selector that was matched (e.g., "tr")
|
| 1143 |
# evt.value = dictionary of data-* attributes from the HTML element
|
| 1144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1145 |
if evt.index != "tr":
|
| 1146 |
gr.Warning("Invalid selection")
|
| 1147 |
return {
|
|
|
|
| 1142 |
# evt.index = CSS selector that was matched (e.g., "tr")
|
| 1143 |
# evt.value = dictionary of data-* attributes from the HTML element
|
| 1144 |
|
| 1145 |
+
print(f"[DEBUG] HTMLPlus event triggered")
|
| 1146 |
+
print(f"[DEBUG] evt.index: {evt.index}")
|
| 1147 |
+
print(f"[DEBUG] evt.value type: {type(evt.value)}")
|
| 1148 |
+
print(f"[DEBUG] evt.value keys: {list(evt.value.keys()) if isinstance(evt.value, dict) else 'Not a dict'}")
|
| 1149 |
+
print(f"[DEBUG] evt.value: {evt.value}")
|
| 1150 |
+
|
| 1151 |
if evt.index != "tr":
|
| 1152 |
gr.Warning("Invalid selection")
|
| 1153 |
return {
|
components/leaderboard_table.py
CHANGED
|
@@ -272,7 +272,24 @@ def generate_leaderboard_html(
|
|
| 272 |
for idx, row in df_sorted.iterrows():
|
| 273 |
rank = idx + 1
|
| 274 |
|
| 275 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
model = row.get('model', 'Unknown')
|
| 277 |
agent_type = row.get('agent_type', 'unknown')
|
| 278 |
provider = row.get('provider', 'unknown')
|
|
@@ -346,40 +363,8 @@ def generate_leaderboard_html(
|
|
| 346 |
run_id = row.get('run_id', 'N/A')
|
| 347 |
run_id_short = run_id[:8] + '...' if len(run_id) > 8 else run_id
|
| 348 |
|
| 349 |
-
# Get dataset references
|
| 350 |
-
results_dataset = row.get('results_dataset', '')
|
| 351 |
-
traces_dataset = row.get('traces_dataset', '')
|
| 352 |
-
metrics_dataset = row.get('metrics_dataset', '')
|
| 353 |
-
|
| 354 |
html += f"""
|
| 355 |
-
<tr
|
| 356 |
-
data-run-id="{run_id}"
|
| 357 |
-
data-rank="{rank}"
|
| 358 |
-
data-model="{model}"
|
| 359 |
-
data-agent-type="{agent_type}"
|
| 360 |
-
data-provider="{provider}"
|
| 361 |
-
data-success-rate="{success_rate}"
|
| 362 |
-
data-total-tests="{total_tests}"
|
| 363 |
-
data-successful-tests="{successful_tests}"
|
| 364 |
-
data-failed-tests="{failed_tests}"
|
| 365 |
-
data-avg-steps="{avg_steps}"
|
| 366 |
-
data-avg-duration-ms="{avg_duration_ms}"
|
| 367 |
-
data-total-tokens="{total_tokens}"
|
| 368 |
-
data-total-cost-usd="{total_cost_usd}"
|
| 369 |
-
data-co2-emissions-g="{co2_emissions_g}"
|
| 370 |
-
data-gpu-utilization-avg="{gpu_utilization_avg if pd.notna(gpu_utilization_avg) else 'None'}"
|
| 371 |
-
data-gpu-memory-avg-mib="{gpu_memory_avg_mib if pd.notna(gpu_memory_avg_mib) else 'None'}"
|
| 372 |
-
data-gpu-memory-max-mib="{gpu_memory_max_mib if pd.notna(gpu_memory_max_mib) else 'None'}"
|
| 373 |
-
data-gpu-temperature-avg="{gpu_temperature_avg if pd.notna(gpu_temperature_avg) else 'None'}"
|
| 374 |
-
data-gpu-temperature-max="{gpu_temperature_max if pd.notna(gpu_temperature_max) else 'None'}"
|
| 375 |
-
data-gpu-power-avg-w="{gpu_power_avg_w if pd.notna(gpu_power_avg_w) else 'None'}"
|
| 376 |
-
data-timestamp="{timestamp}"
|
| 377 |
-
data-submitted-by="{submitted_by}"
|
| 378 |
-
data-results-dataset="{results_dataset}"
|
| 379 |
-
data-traces-dataset="{traces_dataset}"
|
| 380 |
-
data-metrics-dataset="{metrics_dataset}"
|
| 381 |
-
class="tm-clickable-row"
|
| 382 |
-
>
|
| 383 |
<td>{get_rank_badge(rank)}</td>
|
| 384 |
<td class="tm-run-id" title="{run_id}">{run_id_short}</td>
|
| 385 |
<td class="tm-model-name">{model}</td>
|
|
|
|
| 272 |
for idx, row in df_sorted.iterrows():
|
| 273 |
rank = idx + 1
|
| 274 |
|
| 275 |
+
# Convert row to dictionary for data attributes (like reference implementation)
|
| 276 |
+
row_dict = row.to_dict()
|
| 277 |
+
|
| 278 |
+
# Generate data attributes dynamically from all row data
|
| 279 |
+
data_attrs_dict = {}
|
| 280 |
+
for key, value in row_dict.items():
|
| 281 |
+
# Convert underscores to hyphens for HTML data attributes
|
| 282 |
+
attr_name = f"data-{key.replace('_', '-')}"
|
| 283 |
+
# Handle None/NaN values
|
| 284 |
+
if pd.isna(value):
|
| 285 |
+
data_attrs_dict[attr_name] = "None"
|
| 286 |
+
else:
|
| 287 |
+
data_attrs_dict[attr_name] = str(value)
|
| 288 |
+
|
| 289 |
+
# Create the data attributes string
|
| 290 |
+
data_attrs = " ".join([f'{key}="{value}"' for key, value in data_attrs_dict.items()])
|
| 291 |
+
|
| 292 |
+
# Get values with safe defaults for display
|
| 293 |
model = row.get('model', 'Unknown')
|
| 294 |
agent_type = row.get('agent_type', 'unknown')
|
| 295 |
provider = row.get('provider', 'unknown')
|
|
|
|
| 363 |
run_id = row.get('run_id', 'N/A')
|
| 364 |
run_id_short = run_id[:8] + '...' if len(run_id) > 8 else run_id
|
| 365 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 366 |
html += f"""
|
| 367 |
+
<tr {data_attrs} class="tm-clickable-row">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 368 |
<td>{get_rank_badge(rank)}</td>
|
| 369 |
<td class="tm-run-id" title="{run_id}">{run_id_short}</td>
|
| 370 |
<td class="tm-model-name">{model}</td>
|