Tonic commited on
Commit
fc2b4e3
·
verified ·
1 Parent(s): 2d87eb3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +268 -35
app.py CHANGED
@@ -302,7 +302,7 @@ class TrackioSpace:
302
  'use_chat_template': True,
303
  'chat_template_kwargs': {'add_generation_prompt': True, 'no_think_system_message': True},
304
  'enable_tracking': True,
305
- 'trackio_url': 'https://huggingface.co/spaces/Tonic/track-tonic',
306
  'trackio_token': None,
307
  'log_artifacts': True,
308
  'log_metrics': True,
@@ -402,7 +402,7 @@ class TrackioSpace:
402
  'use_chat_template': True,
403
  'chat_template_kwargs': {'add_generation_prompt': True, 'no_think_system_message': True},
404
  'enable_tracking': True,
405
- 'trackio_url': 'https://huggingface.co/spaces/Tonic/track-tonic',
406
  'trackio_token': None,
407
  'log_artifacts': True,
408
  'log_metrics': True,
@@ -1143,33 +1143,63 @@ def create_metrics_plot(experiment_id: str, metric_name: str = "loss") -> go.Fig
1143
  )
1144
  return fig
1145
 
 
 
 
 
 
 
 
 
 
 
1146
  # Ensure steps are numeric and monotonically increasing to avoid zig-zag lines
1147
  try:
1148
  df = df.copy()
1149
- # If step looks constant or missing, try to derive it from a common field
1150
- if 'step' not in df or df['step'].nunique() <= 1:
1151
- for alt in ['train/global_step', 'global_step', 'train/step']:
1152
- if alt in df.columns and df[alt].notna().any():
1153
- df['step'] = pd.to_numeric(df[alt], errors='coerce')
1154
- break
1155
- # If still missing or constant, fallback to an inferred counter by order of arrival
1156
- if 'step' not in df.columns or df['step'].isna().all() or df['step'].nunique() <= 1:
1157
- df['step'] = range(1, len(df) + 1)
 
 
1158
  else:
1159
- df['step'] = pd.to_numeric(df.get('step', -1), errors='coerce').fillna(-1)
1160
- df.sort_values('step', inplace=True)
 
 
 
 
 
 
 
 
 
 
 
1161
  except Exception:
1162
- pass
1163
- fig = px.line(df, x='step', y=metric_name, title=f'{metric_name} over time')
 
 
 
 
 
1164
  fig.update_layout(
1165
- xaxis_title="Training Step",
1166
  yaxis_title=metric_name.title(),
1167
  hovermode='x unified'
1168
  )
1169
- # Avoid interpolating across missing steps which can create odd visuals
1170
  try:
1171
  for trace in fig.data:
1172
- trace.connectgaps = False
 
 
1173
  except Exception:
1174
  pass
1175
  return fig
@@ -1547,6 +1577,16 @@ def create_combined_metrics_plot(experiment_id: str) -> go.Figure:
1547
  # Define colors for different metrics
1548
  colors = ['blue', 'red', 'green', 'orange', 'purple', 'brown', 'pink', 'gray', 'cyan', 'magenta']
1549
 
 
 
 
 
 
 
 
 
 
 
1550
  for i, metric in enumerate(numeric_cols):
1551
  if metric in df.columns and not df[metric].isna().all():
1552
  row = (i // n_cols) + 1
@@ -1556,31 +1596,54 @@ def create_combined_metrics_plot(experiment_id: str) -> go.Figure:
1556
  # Clean steps for each subplot too
1557
  try:
1558
  df_sub = df.copy()
1559
- if 'step' not in df_sub or df_sub['step'].nunique() <= 1:
1560
- for alt in ['train/global_step', 'global_step', 'train/step']:
1561
- if alt in df_sub.columns and df_sub[alt].notna().any():
1562
- df_sub['step'] = pd.to_numeric(df_sub[alt], errors='coerce')
1563
- break
1564
- if 'step' not in df_sub.columns or df_sub['step'].isna().all() or df_sub['step'].nunique() <= 1:
1565
- df_sub['step'] = range(1, len(df_sub) + 1)
 
 
 
1566
  else:
1567
- df_sub['step'] = pd.to_numeric(df_sub.get('step', -1), errors='coerce').fillna(-1)
1568
- df_sub.sort_values('step', inplace=True)
 
 
 
 
 
 
 
 
 
 
 
 
1569
  except Exception:
1570
  df_sub = df
 
 
 
1571
  fig.add_trace(
1572
  go.Scatter(
1573
- x=df_sub['step'].tolist(),
1574
- y=df_sub[metric].tolist(),
1575
  mode='lines+markers',
1576
  name=metric,
1577
  line=dict(width=2, color=color),
1578
  marker=dict(size=4, color=color),
1579
  showlegend=False,
1580
- connectgaps=False
1581
  ),
1582
  row=row, col=col
1583
  )
 
 
 
 
 
1584
 
1585
  fig.update_layout(
1586
  title=f"All Metrics for Experiment {experiment_id}",
@@ -1677,7 +1740,7 @@ def create_experiment_comparison_from_selection(selected_experiments: list, sele
1677
  plot_bgcolor='white', paper_bgcolor='white'
1678
  )
1679
  return fig
1680
-
1681
  if not selected_metrics:
1682
  fig = go.Figure()
1683
  fig.add_annotation(
@@ -1691,10 +1754,180 @@ def create_experiment_comparison_from_selection(selected_experiments: list, sele
1691
  plot_bgcolor='white', paper_bgcolor='white'
1692
  )
1693
  return fig
1694
-
1695
- # Use the existing comparison function with comma-separated IDs
1696
- experiment_ids_str = ",".join(selected_experiments)
1697
- return create_experiment_comparison(experiment_ids_str)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1698
 
1699
  except Exception as e:
1700
  logger.error(f"Error creating comparison from selection: {str(e)}")
 
302
  'use_chat_template': True,
303
  'chat_template_kwargs': {'add_generation_prompt': True, 'no_think_system_message': True},
304
  'enable_tracking': True,
305
+ 'trackio_url': 'https://tonic-test-trackio-test.hf.space',
306
  'trackio_token': None,
307
  'log_artifacts': True,
308
  'log_metrics': True,
 
402
  'use_chat_template': True,
403
  'chat_template_kwargs': {'add_generation_prompt': True, 'no_think_system_message': True},
404
  'enable_tracking': True,
405
+ 'trackio_url': 'https://tonic-test-trackio-test.hf.space',
406
  'trackio_token': None,
407
  'log_artifacts': True,
408
  'log_metrics': True,
 
1143
  )
1144
  return fig
1145
 
1146
+ # Helper predicates
1147
+ def _is_eval_metric(name: str) -> bool:
1148
+ return name.startswith('eval_') or name.startswith('eval/')
1149
+
1150
+ def _is_system_metric(name: str) -> bool:
1151
+ import re
1152
+ if name in ("cpu_percent", "memory_percent"):
1153
+ return True
1154
+ return re.match(r"^gpu_\d+_(memory_allocated|memory_reserved|utilization)$", name) is not None
1155
+
1156
  # Ensure steps are numeric and monotonically increasing to avoid zig-zag lines
1157
  try:
1158
  df = df.copy()
1159
+ # Choose x-axis: time for system metrics, step otherwise
1160
+ use_time_axis = _is_system_metric(metric_name)
1161
+
1162
+ if use_time_axis:
1163
+ # Convert timestamp to datetime for nicer axis rendering
1164
+ df['time'] = pd.to_datetime(df.get('timestamp', ''), errors='coerce')
1165
+ # Fallback order if timestamps are missing
1166
+ if df['time'].isna().all():
1167
+ df['time'] = range(1, len(df) + 1)
1168
+ df.sort_values('time', inplace=True)
1169
+ x_field = 'time'
1170
  else:
1171
+ # If step looks constant or missing, try to derive it from a common field
1172
+ if 'step' not in df or df['step'].nunique() <= 1:
1173
+ for alt in ['train/global_step', 'global_step', 'train/step']:
1174
+ if alt in df.columns and df[alt].notna().any():
1175
+ df['step'] = pd.to_numeric(df[alt], errors='coerce')
1176
+ break
1177
+ # If still missing or constant, fallback to an inferred counter by order of arrival
1178
+ if 'step' not in df.columns or df['step'].isna().all() or df['step'].nunique() <= 1:
1179
+ df['step'] = range(1, len(df) + 1)
1180
+ else:
1181
+ df['step'] = pd.to_numeric(df.get('step', -1), errors='coerce').fillna(-1)
1182
+ df.sort_values('step', inplace=True)
1183
+ x_field = 'step'
1184
  except Exception:
1185
+ x_field = 'step'
1186
+ # Filter rows where the metric is present to ensure connected lines
1187
+ try:
1188
+ plot_df = df[[x_field, metric_name]].dropna(subset=[metric_name]).copy()
1189
+ except Exception:
1190
+ plot_df = df
1191
+ fig = px.line(plot_df, x=x_field, y=metric_name, title=f'{metric_name} over time')
1192
  fig.update_layout(
1193
+ xaxis_title="Time" if (metric_name in ("cpu_percent", "memory_percent") or metric_name.startswith('gpu_')) else "Training Step",
1194
  yaxis_title=metric_name.title(),
1195
  hovermode='x unified'
1196
  )
1197
+ # Connect points for evaluation metrics, avoid connecting gaps for others
1198
  try:
1199
  for trace in fig.data:
1200
+ trace.connectgaps = True if _is_eval_metric(metric_name) else False
1201
+ # Force line+markers to visually connect points
1202
+ trace.mode = 'lines+markers'
1203
  except Exception:
1204
  pass
1205
  return fig
 
1577
  # Define colors for different metrics
1578
  colors = ['blue', 'red', 'green', 'orange', 'purple', 'brown', 'pink', 'gray', 'cyan', 'magenta']
1579
 
1580
+ # Helper predicates
1581
+ def _is_eval_metric(name: str) -> bool:
1582
+ return name.startswith('eval_') or name.startswith('eval/')
1583
+
1584
+ def _is_system_metric(name: str) -> bool:
1585
+ import re
1586
+ if name in ("cpu_percent", "memory_percent"):
1587
+ return True
1588
+ return re.match(r"^gpu_\d+_(memory_allocated|memory_reserved|utilization)$", name) is not None
1589
+
1590
  for i, metric in enumerate(numeric_cols):
1591
  if metric in df.columns and not df[metric].isna().all():
1592
  row = (i // n_cols) + 1
 
1596
  # Clean steps for each subplot too
1597
  try:
1598
  df_sub = df.copy()
1599
+ use_time_axis = _is_system_metric(metric)
1600
+ if use_time_axis:
1601
+ df_sub['time'] = pd.to_datetime(df_sub.get('timestamp', ''), errors='coerce')
1602
+ if df_sub['time'].isna().all():
1603
+ df_sub['time'] = range(1, len(df_sub) + 1)
1604
+ df_sub.sort_values('time', inplace=True)
1605
+ # Filter to available metric points only to ensure connected lines
1606
+ metric_mask = df_sub[metric].notna()
1607
+ x_vals = df_sub.loc[metric_mask, 'time'].tolist()
1608
+ y_vals = df_sub.loc[metric_mask, metric].tolist()
1609
  else:
1610
+ if 'step' not in df_sub or df_sub['step'].nunique() <= 1:
1611
+ for alt in ['train/global_step', 'global_step', 'train/step']:
1612
+ if alt in df_sub.columns and df_sub[alt].notna().any():
1613
+ df_sub['step'] = pd.to_numeric(df_sub[alt], errors='coerce')
1614
+ break
1615
+ if 'step' not in df_sub.columns or df_sub['step'].isna().all() or df_sub['step'].nunique() <= 1:
1616
+ df_sub['step'] = range(1, len(df_sub) + 1)
1617
+ else:
1618
+ df_sub['step'] = pd.to_numeric(df_sub.get('step', -1), errors='coerce').fillna(-1)
1619
+ df_sub.sort_values('step', inplace=True)
1620
+ # Filter to available metric points only to ensure connected lines
1621
+ metric_mask = df_sub[metric].notna()
1622
+ x_vals = df_sub.loc[metric_mask, 'step'].tolist()
1623
+ y_vals = df_sub.loc[metric_mask, metric].tolist()
1624
  except Exception:
1625
  df_sub = df
1626
+ metric_mask = df_sub[metric].notna() if metric in df_sub else []
1627
+ x_vals = df_sub.get('step', list(range(1, len(df_sub) + 1))).tolist()
1628
+ y_vals = df_sub.get(metric, []).tolist()
1629
  fig.add_trace(
1630
  go.Scatter(
1631
+ x=x_vals,
1632
+ y=y_vals,
1633
  mode='lines+markers',
1634
  name=metric,
1635
  line=dict(width=2, color=color),
1636
  marker=dict(size=4, color=color),
1637
  showlegend=False,
1638
+ connectgaps=True if _is_eval_metric(metric) else False
1639
  ),
1640
  row=row, col=col
1641
  )
1642
+ # Set axis titles per subplot for clarity
1643
+ try:
1644
+ fig.update_xaxes(title_text=("Time" if use_time_axis else "Training Step"), row=row, col=col)
1645
+ except Exception:
1646
+ pass
1647
 
1648
  fig.update_layout(
1649
  title=f"All Metrics for Experiment {experiment_id}",
 
1740
  plot_bgcolor='white', paper_bgcolor='white'
1741
  )
1742
  return fig
1743
+
1744
  if not selected_metrics:
1745
  fig = go.Figure()
1746
  fig.add_annotation(
 
1754
  plot_bgcolor='white', paper_bgcolor='white'
1755
  )
1756
  return fig
1757
+
1758
+ # Prepare dataframes for each selected experiment once
1759
+ experiment_to_dataframe = {}
1760
+ for experiment_id in selected_experiments:
1761
+ try:
1762
+ experiment_to_dataframe[experiment_id] = get_metrics_dataframe(experiment_id)
1763
+ except Exception:
1764
+ experiment_to_dataframe[experiment_id] = pd.DataFrame()
1765
+
1766
+ # Setup subplots: one subplot per selected metric
1767
+ from plotly.subplots import make_subplots
1768
+
1769
+ num_metrics = len(selected_metrics)
1770
+ num_columns = min(3, num_metrics)
1771
+ num_rows = (num_metrics + num_columns - 1) // num_columns
1772
+
1773
+ fig = make_subplots(
1774
+ rows=num_rows,
1775
+ cols=num_columns,
1776
+ subplot_titles=selected_metrics,
1777
+ vertical_spacing=0.05,
1778
+ horizontal_spacing=0.1
1779
+ )
1780
+
1781
+ # Color palette for experiments (consistent colors across subplots)
1782
+ try:
1783
+ palette = px.colors.qualitative.Plotly
1784
+ except Exception:
1785
+ palette = [
1786
+ 'blue', 'red', 'green', 'orange', 'purple', 'brown',
1787
+ 'pink', 'gray', 'cyan', 'magenta'
1788
+ ]
1789
+ experiment_to_color = {
1790
+ exp_id: palette[idx % len(palette)] for idx, exp_id in enumerate(selected_experiments)
1791
+ }
1792
+
1793
+ # Helper predicates (match logic used elsewhere in this file)
1794
+ def _is_eval_metric(name: str) -> bool:
1795
+ return name.startswith('eval_') or name.startswith('eval/')
1796
+
1797
+ def _is_system_metric(name: str) -> bool:
1798
+ import re
1799
+ if name in ("cpu_percent", "memory_percent"):
1800
+ return True
1801
+ return re.match(r"^gpu_\d+_(memory_allocated|memory_reserved|utilization)$", name) is not None
1802
+
1803
+ any_trace_added = False
1804
+
1805
+ for metric_index, metric_name in enumerate(selected_metrics):
1806
+ row = (metric_index // num_columns) + 1
1807
+ col = (metric_index % num_columns) + 1
1808
+
1809
+ subplot_has_data = False
1810
+
1811
+ for experiment_id, df in experiment_to_dataframe.items():
1812
+ if df is None or df.empty or metric_name not in df.columns:
1813
+ continue
1814
+
1815
+ # Build x/y based on metric type
1816
+ try:
1817
+ df_local = df.copy()
1818
+ use_time_axis = _is_system_metric(metric_name)
1819
+
1820
+ if use_time_axis:
1821
+ # Time axis: use timestamp → datetime
1822
+ df_local['time'] = pd.to_datetime(df_local.get('timestamp', ''), errors='coerce')
1823
+ if df_local['time'].isna().all():
1824
+ df_local['time'] = range(1, len(df_local) + 1)
1825
+ df_local.sort_values('time', inplace=True)
1826
+ valid_mask = df_local[metric_name].notna()
1827
+ x_values = df_local.loc[valid_mask, 'time'].tolist()
1828
+ y_values = df_local.loc[valid_mask, metric_name].tolist()
1829
+ else:
1830
+ # Step axis: ensure a reasonable step column exists
1831
+ if 'step' not in df_local or df_local['step'].nunique() <= 1:
1832
+ for alternative in ['train/global_step', 'global_step', 'train/step']:
1833
+ if alternative in df_local.columns and df_local[alternative].notna().any():
1834
+ df_local['step'] = pd.to_numeric(df_local[alternative], errors='coerce')
1835
+ break
1836
+ if 'step' not in df_local.columns or df_local['step'].isna().all() or df_local['step'].nunique() <= 1:
1837
+ df_local['step'] = range(1, len(df_local) + 1)
1838
+ else:
1839
+ df_local['step'] = pd.to_numeric(df_local.get('step', -1), errors='coerce').fillna(-1)
1840
+ df_local.sort_values('step', inplace=True)
1841
+ valid_mask = df_local[metric_name].notna()
1842
+ x_values = df_local.loc[valid_mask, 'step'].tolist()
1843
+ y_values = df_local.loc[valid_mask, metric_name].tolist()
1844
+ except Exception:
1845
+ # Fallback to naive arrays
1846
+ valid_mask = df[metric_name].notna()
1847
+ x_values = df.loc[valid_mask, 'step'].tolist() if 'step' in df.columns else list(range(1, len(df) + 1))
1848
+ y_values = df.loc[valid_mask, metric_name].tolist() if metric_name in df.columns else []
1849
+
1850
+ if not x_values or not y_values:
1851
+ continue
1852
+
1853
+ subplot_has_data = True
1854
+ any_trace_added = True
1855
+ color = experiment_to_color.get(experiment_id, 'blue')
1856
+
1857
+ fig.add_trace(
1858
+ go.Scatter(
1859
+ x=x_values,
1860
+ y=y_values,
1861
+ mode='lines+markers',
1862
+ name=experiment_id,
1863
+ line=dict(width=2, color=color),
1864
+ marker=dict(size=4, color=color),
1865
+ showlegend=True,
1866
+ connectgaps=True if _is_eval_metric(metric_name) else False
1867
+ ),
1868
+ row=row,
1869
+ col=col
1870
+ )
1871
+
1872
+ # Axis titles per subplot
1873
+ try:
1874
+ fig.update_xaxes(
1875
+ title_text=("Time" if _is_system_metric(metric_name) else "Training Step"),
1876
+ row=row,
1877
+ col=col
1878
+ )
1879
+ fig.update_yaxes(title_text=metric_name, row=row, col=col)
1880
+ except Exception:
1881
+ pass
1882
+
1883
+ # If no experiment had data for this metric, annotate the subplot
1884
+ if not subplot_has_data:
1885
+ try:
1886
+ fig.add_annotation(
1887
+ text=f"No data for metric: {metric_name}",
1888
+ xref="paper", yref="paper",
1889
+ x=0.5, y=0.5, showarrow=False,
1890
+ font=dict(size=12, color="gray"),
1891
+ row=row, col=col
1892
+ )
1893
+ except Exception:
1894
+ fig.add_annotation(
1895
+ text=f"No data for metric: {metric_name}",
1896
+ xref="paper", yref="paper",
1897
+ x=0.5, y=0.5, showarrow=False,
1898
+ font=dict(size=12, color="gray")
1899
+ )
1900
+
1901
+ fig.update_layout(
1902
+ title="Experiment Comparison",
1903
+ height=max(350, 320 * num_rows),
1904
+ plot_bgcolor='white',
1905
+ paper_bgcolor='white',
1906
+ hovermode='x unified',
1907
+ legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1)
1908
+ )
1909
+
1910
+ # Grid lines for all subplots
1911
+ for r in range(1, num_rows + 1):
1912
+ for c in range(1, num_columns + 1):
1913
+ fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgray', row=r, col=c)
1914
+ fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgray', row=r, col=c)
1915
+
1916
+ if not any_trace_added:
1917
+ # Overall annotation if literally nothing to plot
1918
+ fig = go.Figure()
1919
+ fig.add_annotation(
1920
+ text="No comparable data available for the selected experiments/metrics",
1921
+ xref="paper", yref="paper",
1922
+ x=0.5, y=0.5, showarrow=False,
1923
+ font=dict(size=16, color="orange")
1924
+ )
1925
+ fig.update_layout(
1926
+ title="No Data",
1927
+ plot_bgcolor='white', paper_bgcolor='white'
1928
+ )
1929
+
1930
+ return fig
1931
 
1932
  except Exception as e:
1933
  logger.error(f"Error creating comparison from selection: {str(e)}")