Mandark-droid commited on
Commit
dafc8f1
·
1 Parent(s): 7f90c34

Fix HTMLPlus data attributes generation to match reference implementation

Browse files

- Convert entire DataFrame row to dictionary
- Dynamically generate all data-* attributes
- Convert underscores to hyphens in attribute names (data-run-id, etc.)
- Properly handle None/NaN values by converting to 'None' string
- Add debug logging to event handler to diagnose issues
- Matches working pattern from reference implementation

Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +6 -0
  3. components/leaderboard_table.py +19 -34
README.md CHANGED
@@ -7,7 +7,7 @@ sdk: gradio
7
  sdk_version: 5.49.1
8
  app_file: app.py
9
  short_description: AI agent evaluation with MCP-powered intelligence
10
- pinned: false
11
  tags:
12
  - mcp-in-action-track-enterprise
13
  - agent-evaluation
 
7
  sdk_version: 5.49.1
8
  app_file: app.py
9
  short_description: AI agent evaluation with MCP-powered intelligence
10
+ pinned: true
11
  tags:
12
  - mcp-in-action-track-enterprise
13
  - agent-evaluation
app.py CHANGED
@@ -1142,6 +1142,12 @@ def on_html_leaderboard_select(evt: gr.SelectData):
1142
  # evt.index = CSS selector that was matched (e.g., "tr")
1143
  # evt.value = dictionary of data-* attributes from the HTML element
1144
 
 
 
 
 
 
 
1145
  if evt.index != "tr":
1146
  gr.Warning("Invalid selection")
1147
  return {
 
1142
  # evt.index = CSS selector that was matched (e.g., "tr")
1143
  # evt.value = dictionary of data-* attributes from the HTML element
1144
 
1145
+ print(f"[DEBUG] HTMLPlus event triggered")
1146
+ print(f"[DEBUG] evt.index: {evt.index}")
1147
+ print(f"[DEBUG] evt.value type: {type(evt.value)}")
1148
+ print(f"[DEBUG] evt.value keys: {list(evt.value.keys()) if isinstance(evt.value, dict) else 'Not a dict'}")
1149
+ print(f"[DEBUG] evt.value: {evt.value}")
1150
+
1151
  if evt.index != "tr":
1152
  gr.Warning("Invalid selection")
1153
  return {
components/leaderboard_table.py CHANGED
@@ -272,7 +272,24 @@ def generate_leaderboard_html(
272
  for idx, row in df_sorted.iterrows():
273
  rank = idx + 1
274
 
275
- # Get values with safe defaults
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  model = row.get('model', 'Unknown')
277
  agent_type = row.get('agent_type', 'unknown')
278
  provider = row.get('provider', 'unknown')
@@ -346,40 +363,8 @@ def generate_leaderboard_html(
346
  run_id = row.get('run_id', 'N/A')
347
  run_id_short = run_id[:8] + '...' if len(run_id) > 8 else run_id
348
 
349
- # Get dataset references
350
- results_dataset = row.get('results_dataset', '')
351
- traces_dataset = row.get('traces_dataset', '')
352
- metrics_dataset = row.get('metrics_dataset', '')
353
-
354
  html += f"""
355
- <tr
356
- data-run-id="{run_id}"
357
- data-rank="{rank}"
358
- data-model="{model}"
359
- data-agent-type="{agent_type}"
360
- data-provider="{provider}"
361
- data-success-rate="{success_rate}"
362
- data-total-tests="{total_tests}"
363
- data-successful-tests="{successful_tests}"
364
- data-failed-tests="{failed_tests}"
365
- data-avg-steps="{avg_steps}"
366
- data-avg-duration-ms="{avg_duration_ms}"
367
- data-total-tokens="{total_tokens}"
368
- data-total-cost-usd="{total_cost_usd}"
369
- data-co2-emissions-g="{co2_emissions_g}"
370
- data-gpu-utilization-avg="{gpu_utilization_avg if pd.notna(gpu_utilization_avg) else 'None'}"
371
- data-gpu-memory-avg-mib="{gpu_memory_avg_mib if pd.notna(gpu_memory_avg_mib) else 'None'}"
372
- data-gpu-memory-max-mib="{gpu_memory_max_mib if pd.notna(gpu_memory_max_mib) else 'None'}"
373
- data-gpu-temperature-avg="{gpu_temperature_avg if pd.notna(gpu_temperature_avg) else 'None'}"
374
- data-gpu-temperature-max="{gpu_temperature_max if pd.notna(gpu_temperature_max) else 'None'}"
375
- data-gpu-power-avg-w="{gpu_power_avg_w if pd.notna(gpu_power_avg_w) else 'None'}"
376
- data-timestamp="{timestamp}"
377
- data-submitted-by="{submitted_by}"
378
- data-results-dataset="{results_dataset}"
379
- data-traces-dataset="{traces_dataset}"
380
- data-metrics-dataset="{metrics_dataset}"
381
- class="tm-clickable-row"
382
- >
383
  <td>{get_rank_badge(rank)}</td>
384
  <td class="tm-run-id" title="{run_id}">{run_id_short}</td>
385
  <td class="tm-model-name">{model}</td>
 
272
  for idx, row in df_sorted.iterrows():
273
  rank = idx + 1
274
 
275
+ # Convert row to dictionary for data attributes (like reference implementation)
276
+ row_dict = row.to_dict()
277
+
278
+ # Generate data attributes dynamically from all row data
279
+ data_attrs_dict = {}
280
+ for key, value in row_dict.items():
281
+ # Convert underscores to hyphens for HTML data attributes
282
+ attr_name = f"data-{key.replace('_', '-')}"
283
+ # Handle None/NaN values
284
+ if pd.isna(value):
285
+ data_attrs_dict[attr_name] = "None"
286
+ else:
287
+ data_attrs_dict[attr_name] = str(value)
288
+
289
+ # Create the data attributes string
290
+ data_attrs = " ".join([f'{key}="{value}"' for key, value in data_attrs_dict.items()])
291
+
292
+ # Get values with safe defaults for display
293
  model = row.get('model', 'Unknown')
294
  agent_type = row.get('agent_type', 'unknown')
295
  provider = row.get('provider', 'unknown')
 
363
  run_id = row.get('run_id', 'N/A')
364
  run_id_short = run_id[:8] + '...' if len(run_id) > 8 else run_id
365
 
 
 
 
 
 
366
  html += f"""
367
+ <tr {data_attrs} class="tm-clickable-row">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368
  <td>{get_rank_badge(rank)}</td>
369
  <td class="tm-run-id" title="{run_id}">{run_id_short}</td>
370
  <td class="tm-model-name">{model}</td>