Mandark-droid commited on
Commit
3138502
Β·
1 Parent(s): baaa457

Add dashboard screen with aggregate statistics and recent runs

Browse files

- Implement dashboard screen module with stats cards display
- Add 6 metric cards: Total Runs, Avg Accuracy, Avg Latency, Total Tokens, Total Cost, Total CO2
- Include recent evaluations table showing latest 5 runs
- Integrate dashboard navigation with sidebar buttons
- Set dashboard as default landing screen
- Fix console encoding issues for Windows compatibility

Files changed (2) hide show
  1. app.py +80 -10
  2. screens/dashboard.py +291 -0
app.py CHANGED
@@ -27,6 +27,10 @@ from screens.trace_detail import (
27
  create_gpu_metrics_dashboard,
28
  create_gpu_summary_cards
29
  )
 
 
 
 
30
  from utils.navigation import Navigator, Screen
31
 
32
 
@@ -388,9 +392,9 @@ data_loader = create_data_loader_from_env()
388
  navigator = Navigator()
389
 
390
  # Pre-load and cache the leaderboard data before building UI
391
- print("πŸ“₯ Pre-loading leaderboard data from HuggingFace...")
392
  leaderboard_df_cache = data_loader.load_leaderboard()
393
- print(f"βœ… Loaded {len(leaderboard_df_cache)} evaluation runs")
394
 
395
  # Global state (already populated)
396
  # leaderboard_df_cache is now set
@@ -895,7 +899,7 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
895
  Agent Evaluation Platform
896
  </p>
897
  <p style="color: rgba(255,255,255,0.8); margin: 10px 0 0 0; font-size: 0.9em;">
898
- Powered by Gradio 6 πŸš€ | HuggingFace Jobs | TraceVerde | SmolTrace | MCP | Gemini | Modal
899
  </p>
900
  </div>
901
  """)
@@ -913,9 +917,10 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
913
 
914
  # Navigation section
915
  gr.Markdown("### 🧭 Navigation")
916
-
917
  # Navigation buttons
918
- leaderboard_nav_btn = gr.Button("πŸ† Leaderboard", variant="primary", size="lg")
 
919
  compare_nav_btn = gr.Button("βš–οΈ Compare", variant="secondary", size="lg")
920
  docs_nav_btn = gr.Button("πŸ“š Documentation", variant="secondary", size="lg")
921
 
@@ -944,10 +949,13 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
944
  label="Agent Type",
945
  info="Tool: Function calling | Code: Code execution | Both: Hybrid"
946
  )
947
-
948
  # Main content area
 
 
 
949
  # Screen 1: Main Leaderboard
950
- with gr.Column(visible=True) as leaderboard_screen:
951
  gr.Markdown("## πŸ† Agent Evaluation Leaderboard")
952
  with gr.Tabs():
953
  with gr.TabItem("πŸ† Leaderboard"):
@@ -1106,7 +1114,53 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
1106
  trace_ask_btn = gr.Button("Ask", variant="primary")
1107
  trace_answer = gr.Markdown("*Ask a question to get AI-powered insights*")
1108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1109
  # Event handlers
 
 
 
 
 
 
 
 
 
1110
  app.load(
1111
  fn=load_leaderboard,
1112
  outputs=[leaderboard_by_model, model_filter, sidebar_model_filter]
@@ -1191,6 +1245,22 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
1191
  outputs=[mcp_insights]
1192
  )
1193
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1194
 
1195
  leaderboard_table.select(
1196
  fn=on_drilldown_select,
@@ -1238,9 +1308,9 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
1238
 
1239
 
1240
  if __name__ == "__main__":
1241
- print("πŸš€ Starting TraceMind-AI...")
1242
- print(f"πŸ“Š Data Source: {os.getenv('DATA_SOURCE', 'both')}")
1243
- print(f"πŸ“ JSON Path: {os.getenv('JSON_DATA_PATH', './sample_data')}")
1244
 
1245
  app.launch(
1246
  server_name="0.0.0.0",
 
27
  create_gpu_metrics_dashboard,
28
  create_gpu_summary_cards
29
  )
30
+ from screens.dashboard import (
31
+ create_dashboard_ui,
32
+ update_dashboard_data
33
+ )
34
  from utils.navigation import Navigator, Screen
35
 
36
 
 
392
  navigator = Navigator()
393
 
394
  # Pre-load and cache the leaderboard data before building UI
395
+ print("Pre-loading leaderboard data from HuggingFace...")
396
  leaderboard_df_cache = data_loader.load_leaderboard()
397
+ print(f"Loaded {len(leaderboard_df_cache)} evaluation runs")
398
 
399
  # Global state (already populated)
400
  # leaderboard_df_cache is now set
 
899
  Agent Evaluation Platform
900
  </p>
901
  <p style="color: rgba(255,255,255,0.8); margin: 10px 0 0 0; font-size: 0.9em;">
902
+ Powered by Gradio πŸš€ | HuggingFace Jobs | TraceVerde | SmolTrace | MCP | Gemini | Modal
903
  </p>
904
  </div>
905
  """)
 
917
 
918
  # Navigation section
919
  gr.Markdown("### 🧭 Navigation")
920
+
921
  # Navigation buttons
922
+ dashboard_nav_btn = gr.Button("πŸ“Š Dashboard", variant="primary", size="lg")
923
+ leaderboard_nav_btn = gr.Button("πŸ† Leaderboard", variant="secondary", size="lg")
924
  compare_nav_btn = gr.Button("βš–οΈ Compare", variant="secondary", size="lg")
925
  docs_nav_btn = gr.Button("πŸ“š Documentation", variant="secondary", size="lg")
926
 
 
949
  label="Agent Type",
950
  info="Tool: Function calling | Code: Code execution | Both: Hybrid"
951
  )
952
+
953
  # Main content area
954
+ # Screen 0: Dashboard
955
+ dashboard_screen, dashboard_components = create_dashboard_ui()
956
+
957
  # Screen 1: Main Leaderboard
958
+ with gr.Column(visible=False) as leaderboard_screen:
959
  gr.Markdown("## πŸ† Agent Evaluation Leaderboard")
960
  with gr.Tabs():
961
  with gr.TabItem("πŸ† Leaderboard"):
 
1114
  trace_ask_btn = gr.Button("Ask", variant="primary")
1115
  trace_answer = gr.Markdown("*Ask a question to get AI-powered insights*")
1116
 
1117
+ # Navigation handlers (define before use)
1118
+ def navigate_to_dashboard():
1119
+ """Navigate to dashboard screen and load dashboard data"""
1120
+ try:
1121
+ leaderboard_df = data_loader.load_leaderboard()
1122
+ dashboard_updates = update_dashboard_data(leaderboard_df, dashboard_components)
1123
+ except Exception as e:
1124
+ print(f"[ERROR] Loading dashboard data: {e}")
1125
+ dashboard_updates = {}
1126
+
1127
+ # Combine navigation updates with dashboard data updates
1128
+ result = {
1129
+ dashboard_screen: gr.update(visible=True),
1130
+ leaderboard_screen: gr.update(visible=False),
1131
+ run_detail_screen: gr.update(visible=False),
1132
+ trace_detail_screen: gr.update(visible=False),
1133
+ dashboard_nav_btn: gr.update(variant="primary"),
1134
+ leaderboard_nav_btn: gr.update(variant="secondary"),
1135
+ compare_nav_btn: gr.update(variant="secondary"),
1136
+ docs_nav_btn: gr.update(variant="secondary"),
1137
+ }
1138
+ result.update(dashboard_updates)
1139
+ return result
1140
+
1141
+ def navigate_to_leaderboard():
1142
+ """Navigate to leaderboard screen"""
1143
+ return {
1144
+ dashboard_screen: gr.update(visible=False),
1145
+ leaderboard_screen: gr.update(visible=True),
1146
+ run_detail_screen: gr.update(visible=False),
1147
+ trace_detail_screen: gr.update(visible=False),
1148
+ dashboard_nav_btn: gr.update(variant="secondary"),
1149
+ leaderboard_nav_btn: gr.update(variant="primary"),
1150
+ compare_nav_btn: gr.update(variant="secondary"),
1151
+ docs_nav_btn: gr.update(variant="secondary"),
1152
+ }
1153
+
1154
  # Event handlers
1155
+ # Load dashboard on app start
1156
+ app.load(
1157
+ fn=navigate_to_dashboard,
1158
+ outputs=[
1159
+ dashboard_screen, leaderboard_screen, run_detail_screen, trace_detail_screen,
1160
+ dashboard_nav_btn, leaderboard_nav_btn, compare_nav_btn, docs_nav_btn
1161
+ ] + list(dashboard_components.values())
1162
+ )
1163
+
1164
  app.load(
1165
  fn=load_leaderboard,
1166
  outputs=[leaderboard_by_model, model_filter, sidebar_model_filter]
 
1245
  outputs=[mcp_insights]
1246
  )
1247
 
1248
+ # Wire up navigation buttons
1249
+ dashboard_nav_btn.click(
1250
+ fn=navigate_to_dashboard,
1251
+ outputs=[
1252
+ dashboard_screen, leaderboard_screen, run_detail_screen, trace_detail_screen,
1253
+ dashboard_nav_btn, leaderboard_nav_btn, compare_nav_btn, docs_nav_btn
1254
+ ] + list(dashboard_components.values())
1255
+ )
1256
+
1257
+ leaderboard_nav_btn.click(
1258
+ fn=navigate_to_leaderboard,
1259
+ outputs=[
1260
+ dashboard_screen, leaderboard_screen, run_detail_screen, trace_detail_screen,
1261
+ dashboard_nav_btn, leaderboard_nav_btn, compare_nav_btn, docs_nav_btn
1262
+ ]
1263
+ )
1264
 
1265
  leaderboard_table.select(
1266
  fn=on_drilldown_select,
 
1308
 
1309
 
1310
  if __name__ == "__main__":
1311
+ print("Starting TraceMind-AI...")
1312
+ print(f"Data Source: {os.getenv('DATA_SOURCE', 'both')}")
1313
+ print(f"JSON Path: {os.getenv('JSON_DATA_PATH', './sample_data')}")
1314
 
1315
  app.launch(
1316
  server_name="0.0.0.0",
screens/dashboard.py ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Dashboard Screen for TraceMind-AI
3
+ Displays aggregate statistics and recent evaluation runs
4
+ """
5
+
6
+ import gradio as gr
7
+ import pandas as pd
8
+
9
+
10
+ def calculate_aggregate_stats(leaderboard_df):
11
+ """Calculate aggregate statistics for dashboard"""
12
+ if leaderboard_df.empty:
13
+ return {
14
+ 'total_runs': 0,
15
+ 'avg_accuracy': 0.0,
16
+ 'avg_latency': 0.0,
17
+ 'total_tokens': 0,
18
+ 'total_cost': 0.0,
19
+ 'avg_cost': 0.0,
20
+ 'total_co2': 0.0
21
+ }
22
+
23
+ return {
24
+ 'total_runs': len(leaderboard_df),
25
+ 'avg_accuracy': leaderboard_df['success_rate'].mean() if 'success_rate' in leaderboard_df.columns else 0.0,
26
+ 'avg_latency': leaderboard_df['avg_duration_ms'].mean() / 1000 if 'avg_duration_ms' in leaderboard_df.columns else 0.0,
27
+ 'total_tokens': int(leaderboard_df['total_tokens'].sum()) if 'total_tokens' in leaderboard_df.columns else 0,
28
+ 'total_cost': leaderboard_df['total_cost_usd'].sum() if 'total_cost_usd' in leaderboard_df.columns else 0.0,
29
+ 'avg_cost': leaderboard_df['total_cost_usd'].mean() if 'total_cost_usd' in leaderboard_df.columns else 0.0,
30
+ 'total_co2': leaderboard_df['co2_emissions_g'].sum() if 'co2_emissions_g' in leaderboard_df.columns else 0.0
31
+ }
32
+
33
+
34
+ def generate_stats_card(title, value, emoji, gradient_colors, description):
35
+ """
36
+ Generate HTML for a single statistics card
37
+
38
+ Args:
39
+ title: Card title
40
+ value: Main value to display
41
+ emoji: Emoji icon
42
+ gradient_colors: Tuple of (start_color, end_color) for gradient
43
+ description: Description text
44
+ """
45
+ return f"""
46
+ <div style="background: linear-gradient(135deg, {gradient_colors[0]} 0%, {gradient_colors[1]} 100%);
47
+ padding: 25px;
48
+ border-radius: 12px;
49
+ box-shadow: 0 4px 12px rgba(102, 126, 234, 0.3);
50
+ color: white;
51
+ min-height: 150px;">
52
+ <div style="display: flex; align-items: center; justify-content: space-between;">
53
+ <div>
54
+ <div style="font-size: 3em; font-weight: bold; margin: 10px 0;">{value}</div>
55
+ <div style="font-size: 1.1em; opacity: 0.9;">{emoji} {title}</div>
56
+ </div>
57
+ </div>
58
+ <div style="margin-top: 15px; font-size: 0.9em; opacity: 0.8;">
59
+ <span style="background: rgba(255,255,255,0.2); padding: 4px 8px; border-radius: 4px;">
60
+ {description}
61
+ </span>
62
+ </div>
63
+ </div>
64
+ """
65
+
66
+
67
+ def create_dashboard_cards(stats):
68
+ """
69
+ Create all dashboard stat cards from stats dictionary
70
+
71
+ Args:
72
+ stats: Dictionary with aggregate statistics
73
+
74
+ Returns:
75
+ Dictionary of card HTML strings
76
+ """
77
+ cards = {}
78
+
79
+ # Card 1: Total Runs
80
+ cards['total_runs'] = generate_stats_card(
81
+ title="Total Runs",
82
+ value=stats['total_runs'],
83
+ emoji="πŸš€",
84
+ gradient_colors=("#667eea", "#764ba2"),
85
+ description="All evaluations"
86
+ )
87
+
88
+ # Card 2: Avg Accuracy
89
+ cards['avg_accuracy'] = generate_stats_card(
90
+ title="Avg Accuracy",
91
+ value=f"{stats['avg_accuracy']:.1f}%",
92
+ emoji="🎯",
93
+ gradient_colors=("#f093fb", "#f5576c"),
94
+ description="Success rate"
95
+ )
96
+
97
+ # Card 3: Avg Latency
98
+ cards['avg_latency'] = generate_stats_card(
99
+ title="Avg Latency",
100
+ value=f"{stats['avg_latency']:.2f}s",
101
+ emoji="⚑",
102
+ gradient_colors=("#4facfe", "#00f2fe"),
103
+ description="Response time"
104
+ )
105
+
106
+ # Card 4: Total Tokens
107
+ cards['total_tokens'] = generate_stats_card(
108
+ title="Total Tokens",
109
+ value=f"{stats['total_tokens']:,}",
110
+ emoji="πŸ’¬",
111
+ gradient_colors=("#43e97b", "#38f9d7"),
112
+ description="Across all runs"
113
+ )
114
+
115
+ # Card 5: Total Cost
116
+ cards['total_cost'] = generate_stats_card(
117
+ title="Total Cost",
118
+ value=f"${stats['total_cost']:.4f}",
119
+ emoji="πŸ’°",
120
+ gradient_colors=("#fa709a", "#fee140"),
121
+ description="All evaluations"
122
+ )
123
+
124
+ # Card 6: Total CO2
125
+ cards['total_co2'] = generate_stats_card(
126
+ title="Total CO2",
127
+ value=f"{stats['total_co2']:.2f}g",
128
+ emoji="🌱",
129
+ gradient_colors=("#30cfd0", "#330867"),
130
+ description="Carbon emissions"
131
+ )
132
+
133
+ return cards
134
+
135
+
136
+ def prepare_recent_runs_data(leaderboard_df, n=5):
137
+ """
138
+ Prepare data for recent runs table
139
+
140
+ Args:
141
+ leaderboard_df: Leaderboard dataframe
142
+ n: Number of recent runs to show
143
+
144
+ Returns:
145
+ List of lists for Gradio DataFrame
146
+ """
147
+ recent_runs_data = []
148
+
149
+ if not leaderboard_df.empty:
150
+ # Convert timestamp to datetime to avoid type errors during sorting
151
+ if 'timestamp' in leaderboard_df.columns:
152
+ leaderboard_df['timestamp'] = pd.to_datetime(leaderboard_df['timestamp'], errors='coerce')
153
+ recent_df = leaderboard_df.sort_values('timestamp', ascending=False).head(n)
154
+ else:
155
+ recent_df = leaderboard_df.head(n)
156
+
157
+ for _, row in recent_df.iterrows():
158
+ # Format duration
159
+ duration_ms = row.get('avg_duration_ms', 0)
160
+ if duration_ms >= 1000:
161
+ duration_str = f"{duration_ms/1000:.2f}s"
162
+ else:
163
+ duration_str = f"{duration_ms:.0f}ms"
164
+
165
+ recent_runs_data.append([
166
+ row.get('model', 'N/A'),
167
+ f"{row.get('success_rate', 0):.1f}%",
168
+ f"${row.get('total_cost_usd', 0):.4f}",
169
+ duration_str,
170
+ row.get('timestamp', 'N/A')
171
+ ])
172
+
173
+ return recent_runs_data
174
+
175
+
176
+ def create_dashboard_ui():
177
+ """
178
+ Create the dashboard screen UI components
179
+
180
+ Returns:
181
+ Tuple of (screen_column, component_dict)
182
+ """
183
+ components = {}
184
+
185
+ with gr.Column(visible=True) as dashboard_screen:
186
+ gr.Markdown("## πŸ“Š Dashboard")
187
+ gr.Markdown("*Overview of agent evaluation metrics*")
188
+
189
+ # Stats cards in draggable grid layout
190
+ with gr.Row():
191
+ # Card 1: Total Runs
192
+ with gr.Draggable():
193
+ components['total_runs_card'] = gr.HTML(
194
+ generate_stats_card(
195
+ "Total Runs", "0", "πŸš€",
196
+ ("#667eea", "#764ba2"),
197
+ "All evaluations"
198
+ )
199
+ )
200
+
201
+ # Card 2: Avg Accuracy
202
+ with gr.Draggable():
203
+ components['avg_accuracy_card'] = gr.HTML(
204
+ generate_stats_card(
205
+ "Avg Accuracy", "0%", "🎯",
206
+ ("#f093fb", "#f5576c"),
207
+ "Success rate"
208
+ )
209
+ )
210
+
211
+ with gr.Row():
212
+ # Card 3: Avg Latency
213
+ with gr.Draggable():
214
+ components['avg_latency_card'] = gr.HTML(
215
+ generate_stats_card(
216
+ "Avg Latency", "0.0s", "⚑",
217
+ ("#4facfe", "#00f2fe"),
218
+ "Response time"
219
+ )
220
+ )
221
+
222
+ # Card 4: Total Tokens
223
+ with gr.Draggable():
224
+ components['total_tokens_card'] = gr.HTML(
225
+ generate_stats_card(
226
+ "Total Tokens", "0", "πŸ’¬",
227
+ ("#43e97b", "#38f9d7"),
228
+ "Across all runs"
229
+ )
230
+ )
231
+
232
+ with gr.Row():
233
+ # Card 5: Total Cost
234
+ with gr.Draggable():
235
+ components['total_cost_card'] = gr.HTML(
236
+ generate_stats_card(
237
+ "Total Cost", "$0.00", "πŸ’°",
238
+ ("#fa709a", "#fee140"),
239
+ "All evaluations"
240
+ )
241
+ )
242
+
243
+ # Card 6: Total CO2
244
+ with gr.Draggable():
245
+ components['total_co2_card'] = gr.HTML(
246
+ generate_stats_card(
247
+ "Total CO2", "0g", "🌱",
248
+ ("#30cfd0", "#330867"),
249
+ "Carbon emissions"
250
+ )
251
+ )
252
+
253
+ gr.Markdown("---")
254
+
255
+ # Recent Runs Preview
256
+ gr.Markdown("### πŸ“‹ Recent Evaluations")
257
+ components['recent_runs_table'] = gr.Dataframe(
258
+ headers=["Model", "Success Rate", "Cost", "Duration", "Timestamp"],
259
+ interactive=False,
260
+ wrap=True,
261
+ row_count=5,
262
+ label="Latest 5 runs"
263
+ )
264
+
265
+ return dashboard_screen, components
266
+
267
+
268
+ def update_dashboard_data(leaderboard_df, components):
269
+ """
270
+ Update dashboard stats cards and recent runs table
271
+
272
+ Args:
273
+ leaderboard_df: Leaderboard dataframe
274
+ components: Dictionary of Gradio components
275
+
276
+ Returns:
277
+ Dictionary of component updates
278
+ """
279
+ stats = calculate_aggregate_stats(leaderboard_df)
280
+ cards = create_dashboard_cards(stats)
281
+ recent_runs_data = prepare_recent_runs_data(leaderboard_df)
282
+
283
+ return {
284
+ components['total_runs_card']: gr.update(value=cards['total_runs']),
285
+ components['avg_accuracy_card']: gr.update(value=cards['avg_accuracy']),
286
+ components['avg_latency_card']: gr.update(value=cards['avg_latency']),
287
+ components['total_tokens_card']: gr.update(value=cards['total_tokens']),
288
+ components['total_cost_card']: gr.update(value=cards['total_cost']),
289
+ components['total_co2_card']: gr.update(value=cards['total_co2']),
290
+ components['recent_runs_table']: gr.update(value=recent_runs_data)
291
+ }