eloukas commited on
Commit
7043798
·
verified ·
1 Parent(s): 4a8ba95

Disable rereading

Browse files
Files changed (1) hide show
  1. app.py +257 -1005
app.py CHANGED
@@ -83,14 +83,12 @@ app.layout = html.Div(
83
  children="Sessions Observatory",
84
  className="section-header",
85
  ),
86
- # dcc.Graph(id="bubble-chart", style={"height": "80vh"}),
87
  dcc.Graph(
88
  id="bubble-chart",
89
  style={"height": "calc(100% - 154px)"},
90
- ), # this does not work for some reason
91
  html.Div(
92
  [
93
- # Only keep Color by
94
  html.Div(
95
  [
96
  html.Div(
@@ -103,7 +101,6 @@ app.layout = html.Div(
103
  ],
104
  className="control-labels-row",
105
  ),
106
- # Only keep Color by options
107
  html.Div(
108
  [
109
  html.Div(
@@ -188,10 +185,9 @@ app.layout = html.Div(
188
  html.I(
189
  className="fas fa-info-circle",
190
  title="Root cause detection is experimental and may require manual review since it is generated by AI models. Root causes are only shown in clusters with identifiable root causes.",
191
- # Added title for info icon
192
  style={
193
  "marginLeft": "0.2rem",
194
- "color": "#6c757d", # General gray
195
  "fontSize": "0.9rem",
196
  "cursor": "pointer",
197
  "verticalAlign": "middle",
@@ -206,9 +202,7 @@ app.layout = html.Div(
206
  ),
207
  ],
208
  id="root-causes-section",
209
- style={
210
- "display": "none"
211
- }, # Initially hidden
212
  ),
213
  # Added Tags section
214
  html.Div(
@@ -223,9 +217,7 @@ app.layout = html.Div(
223
  ),
224
  ],
225
  id="tags-section",
226
- style={
227
- "display": "none"
228
- }, # Initially hidden
229
  ),
230
  ],
231
  className="details-section",
@@ -276,7 +268,7 @@ app.layout = html.Div(
276
  ),
277
  html.H3("No topic selected"),
278
  html.P(
279
- "Click or hover on a bubble to view topic details."
280
  ),
281
  ],
282
  className="no-selection-message",
@@ -395,6 +387,8 @@ app.layout = html.Div(
395
  ),
396
  # Store the processed data
397
  dcc.Store(id="stored-data"),
 
 
398
  # Store the current selected topic for dialogs modal
399
  dcc.Store(id="selected-topic-store"),
400
  # Store the current selected root cause for root cause modal
@@ -403,7 +397,7 @@ app.layout = html.Div(
403
  className="app-container",
404
  )
405
 
406
- # Define CSS for the app
407
  app.index_string = """
408
  <!DOCTYPE html>
409
  <html>
@@ -1227,10 +1221,10 @@ app.index_string = """
1227
  )
1228
  def update_topic_distribution_header(data):
1229
  if not data:
1230
- return "Sessions Observatory" # Default when no data is available
1231
 
1232
  df = pd.DataFrame(data)
1233
- total_dialogs = df["count"].sum() # Sum up the 'count' column
1234
  return f"Sessions Observatory ({total_dialogs} dialogs)"
1235
 
1236
 
@@ -1238,8 +1232,9 @@ def update_topic_distribution_header(data):
1238
  @callback(
1239
  [
1240
  Output("stored-data", "data"),
 
1241
  Output("upload-status", "children"),
1242
- Output("upload-status", "style"), # Add style output for visibility
1243
  Output("main-content", "style"),
1244
  ],
1245
  [Input("upload-data", "contents")],
@@ -1247,91 +1242,81 @@ def update_topic_distribution_header(data):
1247
  )
1248
  def process_upload(contents, filename):
1249
  if contents is None:
1250
- return None, "", {"display": "none"}, {"display": "none"} # Keep hidden
1251
 
1252
  try:
1253
- # Parse uploaded file
1254
  content_type, content_string = contents.split(",")
1255
  decoded = base64.b64decode(content_string)
1256
 
1257
  if "csv" in filename.lower():
1258
- df = pd.read_csv(io.StringIO(decoded.decode("utf-8")))
1259
  elif "xls" in filename.lower():
1260
- df = pd.read_excel(io.BytesIO(decoded))
1261
-
1262
- # DEBUG
1263
- # --- Print unique root_cause_subcluster values for each deduplicated_topic_name ---
1264
- if (
1265
- "deduplicated_topic_name" in df.columns
1266
- and "root_cause_subcluster" in df.columns
1267
- ):
1268
- print(
1269
- "\n[INFO] Unique root_cause_subcluster values for each deduplicated_topic_name:"
 
1270
  )
1271
- for topic in df["deduplicated_topic_name"].unique():
1272
- subclusters = (
1273
- df[df["deduplicated_topic_name"] == topic]["root_cause_subcluster"]
1274
- .dropna()
1275
- .unique()
1276
- )
1277
- print(f"- {topic}:")
1278
- for sub in subclusters:
1279
- print(f" - {sub}")
1280
- print()
1281
- # --- End of DEBUG ---
1282
 
1283
- # Hardcoded flag to exclude 'Unclustered' topics
1284
  EXCLUDE_UNCLUSTERED = True
1285
  if EXCLUDE_UNCLUSTERED and "deduplicated_topic_name" in df.columns:
1286
  df = df[df["deduplicated_topic_name"] != "Unclustered"].copy()
1287
- # If we strip leading and trailing `"` or `'` from the topic name here, then
1288
- # we will have a problem with the deduplicated names, as they will not match the
1289
- # original topic names in the dataset.
1290
- # Better do it in the first script.
1291
  else:
1292
  return (
 
1293
  None,
1294
  html.Div(
1295
- [
1296
- html.I(
1297
- className="fas fa-exclamation-circle",
1298
- style={"color": "var(--destructive)", "marginRight": "8px"},
1299
- ),
1300
- "Please upload a CSV or Excel file.",
1301
- ],
1302
  style={"color": "var(--destructive)"},
1303
  ),
1304
- {"display": "block"}, # Make visible after error
1305
  {"display": "none"},
1306
  )
1307
 
1308
- # Process the dataframe to get topic statistics
1309
  topic_stats = analyze_topics(df)
1310
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1311
  return (
1312
  topic_stats.to_dict("records"),
 
1313
  html.Div(
1314
  [
1315
  html.I(
1316
  className="fas fa-check-circle",
1317
- style={
1318
- "color": "hsl(142.1, 76.2%, 36.3%)",
1319
- "marginRight": "8px",
1320
- },
1321
  ),
1322
  f'Successfully uploaded "{filename}"',
1323
  ],
1324
  style={"color": "hsl(142.1, 76.2%, 36.3%)"},
1325
  ),
1326
- {"display": "block"}, # maybe add the above line here too #TODO
1327
- {
1328
- "display": "block",
1329
- "height": "calc(100vh - 40px)",
1330
- }, # Make visible after successful upload
1331
  )
1332
 
1333
  except Exception as e:
1334
  return (
 
1335
  None,
1336
  html.Div(
1337
  [
@@ -1339,22 +1324,18 @@ def process_upload(contents, filename):
1339
  className="fas fa-exclamation-triangle",
1340
  style={"color": "var(--destructive)", "marginRight": "8px"},
1341
  ),
1342
- f"Error processing file: {str(e)}",
1343
  ],
1344
  style={"color": "var(--destructive)"},
1345
  ),
1346
- {"display": "block"}, # Make visible after error
1347
  {"display": "none"},
1348
  )
1349
 
1350
 
1351
  # Function to analyze the topics and create statistics
1352
  def analyze_topics(df):
1353
- # Group by topic name and calculate metrics
1354
  topic_stats = (
1355
- # IMPORTANT!
1356
- # As deduplicated_topic_name, we have either the deduplicated names (if enabled by the process),
1357
- # either the kmeans_reclustered name (where available) and the ClusterNames.
1358
  df.groupby("deduplicated_topic_name")
1359
  .agg(
1360
  count=("id", "count"),
@@ -1364,204 +1345,94 @@ def analyze_topics(df):
1364
  )
1365
  .reset_index()
1366
  )
1367
-
1368
- # Calculate rates
1369
- topic_stats["negative_rate"] = (
1370
- topic_stats["negative_count"] / topic_stats["count"] * 100
1371
- ).round(1)
1372
- topic_stats["unresolved_rate"] = (
1373
- topic_stats["unresolved_count"] / topic_stats["count"] * 100
1374
- ).round(1)
1375
- topic_stats["urgent_rate"] = (
1376
- topic_stats["urgent_count"] / topic_stats["count"] * 100
1377
- ).round(1)
1378
-
1379
- # Apply binned layout
1380
  topic_stats = apply_binned_layout(topic_stats)
1381
-
1382
  return topic_stats
1383
 
1384
 
1385
- # New binned layout function
1386
-
1387
-
1388
  def apply_binned_layout(df, padding=0, bin_config=None, max_items_per_row=6):
1389
- """
1390
- Apply a binned layout where bubbles are grouped into rows based on dialog count.
1391
- Bubbles in each row will be centered horizontally.
1392
-
1393
- Args:
1394
- df: DataFrame containing the topic data
1395
- padding: Padding from edges as percentage
1396
- bin_config: List of tuples defining bin ranges and descriptions.
1397
- Example: [(300, None, "300+ dialogs"), (250, 299, "250-299 dialogs"), ...]
1398
- max_items_per_row: Maximum number of items to display in a single row
1399
-
1400
- Returns:
1401
- DataFrame with updated x, y positions
1402
- """
1403
- # Create a copy of the dataframe to avoid modifying the original
1404
  df_sorted = df.copy()
1405
-
1406
- # Default bin configuration if none is provided
1407
- # 8 rows x 6 bubbles is usually good
1408
  if bin_config is None:
1409
  bin_config = [
1410
- (100, None, "100+ dialogs"),
1411
- (50, 99, "50-99 dialogs"),
1412
- (25, 49, "25-49 dialogs"),
1413
- (9, 24, "9-24 dialogs"),
1414
- (7, 8, "7-8 dialogs"),
1415
- (5, 7, "5-6 dialogs"),
1416
- (4, 4, "4 dialogs"),
1417
- (0, 3, "0-3 dialogs"),
1418
  ]
1419
-
1420
- # Generate bin descriptions and conditions dynamically
1421
  bin_descriptions = {}
1422
  conditions = []
1423
  bin_values = []
1424
-
1425
  for i, (lower, upper, description) in enumerate(bin_config):
1426
  bin_name = f"Bin {i + 1}"
1427
  bin_descriptions[bin_name] = description
1428
  bin_values.append(bin_name)
1429
-
1430
- if upper is None: # No upper limit
1431
  conditions.append(df_sorted["count"] >= lower)
1432
  else:
1433
- conditions.append(
1434
- (df_sorted["count"] >= lower) & (df_sorted["count"] <= upper)
1435
- )
1436
-
1437
- # Apply the conditions to create the bin column
1438
- df_sorted["bin"] = np.select(conditions, bin_values, default="Bin 8")
1439
  df_sorted["bin_description"] = df_sorted["bin"].map(bin_descriptions)
1440
-
1441
- # Sort by bin (ascending to get Bin 1 first) and by count (descending) within each bin
1442
  df_sorted = df_sorted.sort_values(by=["bin", "count"], ascending=[True, False])
1443
-
1444
- # Now split bins that have more than max_items_per_row items
1445
  original_bins = df_sorted["bin"].unique()
1446
  new_rows = []
1447
  new_bin_descriptions = bin_descriptions.copy()
1448
-
1449
  for bin_name in original_bins:
1450
  bin_mask = df_sorted["bin"] == bin_name
1451
  bin_group = df_sorted[bin_mask]
1452
  bin_size = len(bin_group)
1453
-
1454
- # If bin has more items than max_items_per_row, split it
1455
  if bin_size > max_items_per_row:
1456
- # Calculate how many sub-bins we need
1457
  num_sub_bins = (bin_size + max_items_per_row - 1) // max_items_per_row
1458
-
1459
- # Calculate items per sub-bin (distribute evenly)
1460
  items_per_sub_bin = [bin_size // num_sub_bins] * num_sub_bins
1461
-
1462
- # Distribute the remainder one by one to achieve balance
1463
  remainder = bin_size % num_sub_bins
1464
  for i in range(remainder):
1465
  items_per_sub_bin[i] += 1
1466
-
1467
- # Original bin description
1468
  original_description = bin_descriptions[bin_name]
1469
-
1470
- # Create new row entries and update bin assignments
1471
  start_idx = 0
1472
  for i in range(num_sub_bins):
1473
- # Create new bin name with sub-bin index
1474
  new_bin_name = f"{bin_name}_{i + 1}"
1475
-
1476
- # Create new bin description with sub-bin index
1477
  new_description = f"{original_description} ({i + 1}/{num_sub_bins})"
1478
  new_bin_descriptions[new_bin_name] = new_description
1479
-
1480
- # Get slice of dataframe for this sub-bin
1481
  end_idx = start_idx + items_per_sub_bin[i]
1482
  sub_bin_rows = bin_group.iloc[start_idx:end_idx].copy()
1483
-
1484
- # Update bin name and description
1485
  sub_bin_rows["bin"] = new_bin_name
1486
  sub_bin_rows["bin_description"] = new_description
1487
-
1488
- # Add to new rows
1489
  new_rows.append(sub_bin_rows)
1490
-
1491
- # Update start index for next iteration
1492
  start_idx = end_idx
1493
-
1494
- # Remove the original bin from df_sorted
1495
  df_sorted = df_sorted[~bin_mask]
1496
-
1497
- # Combine the original dataframe (with small bins) and the new split bins
1498
  if new_rows:
1499
  df_sorted = pd.concat([df_sorted] + new_rows)
1500
-
1501
- # Re-sort with the new bin names
1502
  df_sorted = df_sorted.sort_values(by=["bin", "count"], ascending=[True, False])
1503
-
1504
- # Calculate the vertical positions for each row (bin)
1505
  bins_with_topics = sorted(df_sorted["bin"].unique())
1506
  num_rows = len(bins_with_topics)
1507
-
1508
  available_height = 100 - (2 * padding)
1509
  row_height = available_height / num_rows
1510
-
1511
- # Calculate and assign y-positions (vertical positions)
1512
- row_positions = {}
1513
- for i, bin_name in enumerate(bins_with_topics):
1514
- # Calculate row position (centered within its allocated space)
1515
- row_pos = padding + i * row_height + (row_height / 2)
1516
- row_positions[bin_name] = row_pos
1517
-
1518
  df_sorted["y"] = df_sorted["bin"].map(row_positions)
1519
-
1520
- # Center the bubbles in each row horizontally
1521
- center_point = 50 # Middle of the chart (0-100 scale)
1522
  for bin_name in bins_with_topics:
1523
- # Get topics in this bin
1524
  bin_mask = df_sorted["bin"] == bin_name
1525
  num_topics_in_bin = bin_mask.sum()
1526
-
1527
  if num_topics_in_bin == 1:
1528
- # If there's only one bubble, place it in the center
1529
  df_sorted.loc[bin_mask, "x"] = center_point
1530
  else:
1531
- if num_topics_in_bin < max_items_per_row:
1532
- # For fewer bubbles, add a little bit of spacing between them
1533
- # Calculate the total width needed
1534
- total_width = (num_topics_in_bin - 1) * 17.5 # 10 units between bubbles
1535
- # Calculate starting position (to center the group)
1536
- start_pos = center_point - (total_width / 2)
1537
- # Assign positions
1538
- positions = [start_pos + (i * 17.5) for i in range(num_topics_in_bin)]
1539
- df_sorted.loc[bin_mask, "x"] = positions
1540
- else:
1541
- # For multiple bubbles, distribute them evenly around the center
1542
- # Calculate the total width needed
1543
- total_width = (num_topics_in_bin - 1) * 15 # 15 units between bubbles
1544
-
1545
- # Calculate starting position (to center the group)
1546
- start_pos = center_point - (total_width / 2)
1547
-
1548
- # Assign positions
1549
- positions = [start_pos + (i * 15) for i in range(num_topics_in_bin)]
1550
- df_sorted.loc[bin_mask, "x"] = positions
1551
-
1552
- # Add original rank for reference
1553
  df_sorted["size_rank"] = range(1, len(df_sorted) + 1)
1554
-
1555
  return df_sorted
1556
 
1557
 
1558
- # New function to update positions based on selected size metric
1559
  def update_bubble_positions(df: pd.DataFrame) -> pd.DataFrame:
1560
- # For the main chart, we always use the binned layout
1561
  return apply_binned_layout(df)
1562
 
1563
 
1564
- # Callback to update the bubble chart
1565
  @callback(
1566
  Output("bubble-chart", "figure"),
1567
  [
@@ -1575,258 +1446,109 @@ def update_bubble_chart(data, color_metric):
1575
 
1576
  df = pd.DataFrame(data)
1577
 
1578
- # Update positions using binned layout
1579
- df = update_bubble_positions(df)
 
 
1580
 
1581
- # Always use count for sizing
1582
  size_values = df["count"]
1583
  raw_sizes = df["count"]
1584
  size_title = "Dialog Count"
1585
-
1586
- # Apply log scaling to the size values for better visualization
1587
- # To make the smallest bubble bigger, increase the min_size value (currently 2.5).
1588
- min_size = 1 # Minimum bubble size
1589
  if size_values.max() > size_values.min():
1590
- # Log-scale the sizes
1591
  log_sizes = np.log1p(size_values)
1592
- # Scale to a reasonable range for visualization
1593
- # To make the biggest bubble smaller, reduce the multiplier (currently 50).
1594
- size_values = (
1595
- min_size
1596
- + (log_sizes - log_sizes.min()) / (log_sizes.max() - log_sizes.min()) * 50
1597
- )
1598
  else:
1599
- # If all values are the same, use a default size
1600
  size_values = np.ones(len(df)) * 12.5
1601
 
1602
- # DEBUG: Print sizes of bubbles in the first and second bins
1603
- bins = sorted(df["bin"].unique())
1604
- if len(bins) >= 1:
1605
- # first_bin = bins[0]
1606
- # print(f"DEBUG - First bin '{first_bin}' bubble sizes:")
1607
- # first_bin_df = df[df["bin"] == first_bin]
1608
- # for idx, row in first_bin_df.iterrows():
1609
- # print(
1610
- # f" Topic: {row['deduplicated_topic_name']}, Raw size: {row['count']}, Displayed size: {size_values[idx]}"
1611
- # )
1612
- pass
1613
-
1614
- if len(bins) >= 2:
1615
- # second_bin = bins[1]
1616
- # print(f"DEBUG - Second bin '{second_bin}' bubble sizes:")
1617
- # second_bin_df = df[df["bin"] == second_bin]
1618
- # for idx, row in second_bin_df.iterrows():
1619
- # print(
1620
- # f" Topic: {row['deduplicated_topic_name']}, Raw size: {row['count']}, Displayed size: {size_values[idx]}"
1621
- # )
1622
- pass
1623
-
1624
- # Determine color based on selected metric
1625
  if color_metric == "negative_rate":
1626
  color_values = df["negative_rate"]
1627
- # color_title = "Negative Sentiment (%)"
1628
  color_title = "Negativity (%)"
1629
- # color_scale = "RdBu" # no ice, RdBu - og is Reds - matter is good too
1630
- # color_scale = "Portland"
1631
- # color_scale = "RdYlGn_r"
1632
- # color_scale = "Teal"
1633
  color_scale = "Teal"
1634
-
1635
  elif color_metric == "unresolved_rate":
1636
  color_values = df["unresolved_rate"]
1637
  color_title = "Unresolved (%)"
1638
- # color_scale = "Burg" # og is YlOrRd
1639
- # color_scale = "Temps"
1640
- # color_scale = "Armyrose"
1641
- # color_scale = "YlOrRd"
1642
  color_scale = "Teal"
1643
- else:
1644
  color_values = df["urgent_rate"]
1645
  color_title = "Urgency (%)"
1646
- # color_scale = "Magenta" # og is Blues
1647
- # color_scale = "Tealrose"
1648
- # color_scale = "Portland"
1649
  color_scale = "Teal"
1650
 
1651
- # Create enhanced hover text that includes bin information
1652
  hover_text = [
1653
  f"Topic: {topic}<br>{size_title}: {raw:.1f}<br>{color_title}: {color:.1f}<br>Group: {bin_desc}"
1654
- for topic, raw, color, bin_desc in zip(
1655
- df["deduplicated_topic_name"],
1656
- raw_sizes,
1657
- color_values,
1658
- df["bin_description"],
1659
- )
1660
  ]
1661
 
1662
- # Create bubble chart
1663
  fig = px.scatter(
1664
  df,
1665
- x="x",
1666
- y="y",
1667
  size=size_values,
1668
  color=color_values,
1669
- # text="deduplicated_topic_name", # Remove text here
1670
  hover_name="deduplicated_topic_name",
1671
- hover_data={
1672
- "x": False,
1673
- "y": False,
1674
- "bin_description": True,
1675
- },
1676
- size_max=42.5, # Maximum size of the bubbles, change this to adjust the size
1677
  color_continuous_scale=color_scale,
1678
- custom_data=[
1679
- "deduplicated_topic_name",
1680
- "count",
1681
- "negative_rate",
1682
- "unresolved_rate",
1683
- "urgent_rate",
1684
- "bin_description",
1685
- ],
1686
  )
1687
 
1688
- # Update traces: Remove text related properties
1689
  fig.update_traces(
1690
- mode="markers", # Remove '+text'
1691
  marker=dict(sizemode="area", opacity=0.8, line=dict(width=1, color="white")),
1692
  hovertemplate="%{hovertext}<extra></extra>",
1693
  hovertext=hover_text,
1694
  )
1695
 
1696
- # Create annotations for the bubbles
1697
  annotations = []
1698
  for i, row in df.iterrows():
1699
- # Wrap text every 2 words
1700
  words = row["deduplicated_topic_name"].split()
1701
- wrapped_text = "<br>".join(
1702
- [" ".join(words[i : i + 4]) for i in range(0, len(words), 4)]
1703
- )
1704
-
1705
- # Calculate size for vertical offset (approximately based on the bubble size)
1706
- # Add vertical offset based on bubble size to place text below the bubble
1707
- marker_size = (
1708
- size_values[i] / 20 # type: ignore # FIXME: size_values[df.index.get_loc(i)] / 20
1709
- ) # Adjust this divisor as needed to get proper spacing
1710
-
1711
  annotations.append(
1712
  dict(
1713
- x=row["x"],
1714
- y=row["y"]
1715
- + 0.125 # Adding this so in a row with maximum bubbles, the left one does not overlap with the bin label
1716
- + marker_size, # Add vertical offset to position text below the bubble
1717
- text=wrapped_text,
1718
- showarrow=False,
1719
- textangle=0,
1720
- font=dict(
1721
- # size=10,
1722
- # size=15,
1723
- size=9,
1724
- color="var(--foreground)",
1725
- family="Arial, sans-serif",
1726
- weight="bold",
1727
- ),
1728
- xanchor="center",
1729
- yanchor="top", # Anchor to top of text box so it hangs below the bubble
1730
- bgcolor="rgba(255,255,255,0.7)", # Add semi-transparent background for better readability
1731
- bordercolor="rgba(0,0,0,0.1)", # Add a subtle border color
1732
- borderwidth=1,
1733
- borderpad=1,
1734
- # TODO: Radius for rounded corners
1735
  )
1736
  )
1737
 
1738
- # Add bin labels and separator lines
1739
  unique_bins = sorted(df["bin"].unique())
1740
- bin_y_positions = [
1741
- df[df["bin"] == bin_name]["y"].mean() for bin_name in unique_bins
1742
- ]
1743
-
1744
- # Dynamically extract bin descriptions
1745
  bin_descriptions = df.set_index("bin")["bin_description"].to_dict()
1746
 
1747
  for bin_name, bin_y in zip(unique_bins, bin_y_positions):
1748
- # Add horizontal line
1749
- fig.add_shape(
1750
- type="line",
1751
- x0=0,
1752
- y0=bin_y,
1753
- x1=100,
1754
- y1=bin_y,
1755
- line=dict(color="rgba(0,0,0,0.1)", width=1, dash="dot"),
1756
- layer="below",
1757
- )
1758
-
1759
- # Add subtle lines for each bin and bin labels
1760
- for bin_name, bin_y in zip(unique_bins, bin_y_positions):
1761
- # Add horizontal line
1762
- fig.add_shape(
1763
- type="line",
1764
- x0=0,
1765
- y0=bin_y,
1766
- x1=100,
1767
- y1=bin_y,
1768
- line=dict(color="rgba(0,0,0,0.1)", width=1, dash="dot"),
1769
- layer="below",
1770
- )
1771
-
1772
- # Add bin label annotation
1773
  annotations.append(
1774
  dict(
1775
- x=0, # Position the label on the left side
1776
- y=bin_y,
1777
- xref="x",
1778
- yref="y",
1779
- text=bin_descriptions[bin_name],
1780
- showarrow=False,
1781
  font=dict(size=8.25, color="var(--muted-foreground)"),
1782
- align="left",
1783
- xanchor="left",
1784
- yanchor="middle",
1785
- bgcolor="rgba(255,255,255,0.7)",
1786
- borderpad=1,
1787
  )
1788
  )
1789
 
1790
  fig.update_layout(
1791
  title=None,
1792
- xaxis=dict(
1793
- showgrid=False,
1794
- zeroline=False,
1795
- showticklabels=False,
1796
- title=None,
1797
- range=[0, 100],
1798
- ),
1799
- yaxis=dict(
1800
- showgrid=False,
1801
- zeroline=False,
1802
- showticklabels=False,
1803
- title=None,
1804
- range=[0, 100],
1805
- autorange="reversed", # Keep largest at top
1806
- ),
1807
  hovermode="closest",
1808
  margin=dict(l=0, r=0, t=10, b=10),
1809
- coloraxis_colorbar=dict(
1810
- title=color_title,
1811
- title_font=dict(size=9),
1812
- tickfont=dict(size=8),
1813
- thickness=10,
1814
- len=0.6,
1815
- yanchor="middle",
1816
- y=0.5,
1817
- xpad=0,
1818
- ),
1819
  legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
1820
  paper_bgcolor="rgba(0,0,0,0)",
1821
  plot_bgcolor="rgba(0,0,0,0)",
1822
  hoverlabel=dict(bgcolor="white", font_size=12, font_family="Inter"),
1823
- annotations=annotations, # Add bin labels as annotations
1824
  )
1825
 
1826
  return fig
1827
 
1828
 
1829
- # Update the update_topic_details callback to use grayscale colors for tags based on frequency
1830
  @callback(
1831
  [
1832
  Output("topic-title", "children"),
@@ -1841,60 +1563,39 @@ def update_bubble_chart(data, color_metric):
1841
  Output("selected-topic-store", "data"),
1842
  ],
1843
  [
1844
- Input("bubble-chart", "hoverData"),
1845
- Input("bubble-chart", "clickData"),
1846
  Input("refresh-dialogs-btn", "n_clicks"),
1847
  ],
1848
- [State("stored-data", "data"), State("upload-data", "contents")],
1849
  )
1850
- def update_topic_details(
1851
- hover_data, click_data, refresh_clicks, stored_data, file_contents
1852
- ):
1853
- # Determine which data to use (prioritize click over hover)
1854
- hover_info = hover_data or click_data
1855
-
1856
- if not hover_info or not stored_data or not file_contents:
1857
- return (
1858
- "",
1859
- [],
1860
- [],
1861
- "",
1862
- {"display": "none"},
1863
- "",
1864
- {"display": "none"},
1865
- [],
1866
- {"display": "flex"},
1867
- None,
1868
- )
1869
-
1870
- # Extract topic name from the hover data
1871
- topic_name = hover_info["points"][0]["customdata"][0]
1872
-
1873
- # Get stored data for this topic
1874
  df_stored = pd.DataFrame(stored_data)
1875
  topic_data = df_stored[df_stored["deduplicated_topic_name"] == topic_name].iloc[0]
1876
 
1877
- # Get original data to sample conversations
1878
- content_type, content_string = file_contents.split(",")
1879
- decoded = base64.b64decode(content_string)
1880
-
1881
- if (
1882
- content_type
1883
- == "data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64"
1884
- ):
1885
- df_full = pd.read_excel(io.BytesIO(decoded), dtype={"Root_Cause": str})
1886
- else: # Assume CSV
1887
- df_full = pd.read_csv(
1888
- io.StringIO(decoded.decode("utf-8")), dtype={"Root_Cause": str}
1889
- )
1890
-
1891
- # Filter to this topic
1892
  topic_conversations = df_full[df_full["deduplicated_topic_name"] == topic_name]
1893
 
1894
- # Create the title
1895
  title = html.Div([html.Span(topic_name)])
1896
-
1897
- # Create metadata items
1898
  metadata_items = [
1899
  html.Div(
1900
  [
@@ -1902,10 +1603,8 @@ def update_topic_details(
1902
  html.Span(f"{int(topic_data['count'])} dialogs"),
1903
  html.Button(
1904
  [
1905
- html.I(
1906
- className="fas fa-table", style={"marginRight": "0.25rem"}
1907
- ),
1908
- "Show all dialogs inside",
1909
  ],
1910
  id="show-all-dialogs-btn",
1911
  className="show-dialogs-btn",
@@ -1916,8 +1615,6 @@ def update_topic_details(
1916
  style={"display": "flex", "alignItems": "center", "width": "100%"},
1917
  ),
1918
  ]
1919
-
1920
- # Create metrics boxes
1921
  metrics_boxes = [
1922
  html.Div(
1923
  [
@@ -1942,54 +1639,25 @@ def update_topic_details(
1942
  ),
1943
  ]
1944
 
1945
- # Extract and process root causes
1946
  root_causes_output = ""
1947
  root_causes_section_style = {"display": "none"}
1948
-
1949
- # Check if root_cause_subcluster column exists in the data
1950
  if "root_cause_subcluster" in topic_conversations.columns:
1951
- # Get unique root causes for this specific cluster
1952
- root_causes = topic_conversations["root_cause_subcluster"].dropna().unique()
1953
-
1954
- # Filter out common non-informative values including "Unclustered"
1955
  filtered_root_causes = [
1956
- rc
1957
- for rc in root_causes
1958
- if rc
1959
- not in [
1960
- "Sub-clustering disabled",
1961
- "Not eligible for sub-clustering",
1962
- "No valid root causes",
1963
- "No Subcluster",
1964
- "Unclustered",
1965
- "",
1966
- ]
1967
  ]
1968
-
1969
- # Debug: Print the unique root causes for this cluster
1970
- print(f"\n[DEBUG] Root causes for cluster '{topic_name}':")
1971
- print(f" All root causes: {list(root_causes)}")
1972
- print(f" Filtered root causes: {filtered_root_causes}")
1973
-
1974
  if filtered_root_causes:
1975
- # Create beautifully styled root cause tags with clickable icons
1976
  root_causes_output = html.Div(
1977
  [
1978
  html.Div(
1979
  [
1980
- html.I(
1981
- className="fas fa-exclamation-triangle root-cause-tag-icon"
1982
- ),
1983
  html.Span(root_cause, style={"marginRight": "6px"}),
1984
  html.I(
1985
  className="fas fa-external-link-alt root-cause-click-icon",
1986
  id={"type": "root-cause-icon", "index": root_cause},
1987
  title="Click to see specific chats assigned with this root cause.",
1988
- style={
1989
- "cursor": "pointer",
1990
- "fontSize": "0.55rem",
1991
- "opacity": "0.8",
1992
- },
1993
  ),
1994
  ],
1995
  className="root-cause-tag",
@@ -2001,30 +1669,19 @@ def update_topic_details(
2001
  )
2002
  root_causes_section_style = {"display": "block"}
2003
 
2004
- # Extract and process consolidated_tags with improved styling
2005
  tags_list = []
2006
- for _, row in topic_conversations.iterrows():
2007
- tags_str = row.get("consolidated_tags", "")
2008
- if pd.notna(tags_str):
2009
- tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
2010
- tags_list.extend(tags)
2011
-
2012
- # Count tag frequencies for better insight
2013
  tag_counts = {}
2014
  for tag in tags_list:
2015
  tag_counts[tag] = tag_counts.get(tag, 0) + 1
2016
 
2017
- # Sort by frequency (most common first) and then alphabetically for ties
2018
- sorted_tags = sorted(tag_counts.items(), key=lambda x: (-x[1], x[0]))
2019
-
2020
- # Keep only the top K tags
2021
- TOP_K = 15
2022
- sorted_tags = sorted_tags[:TOP_K]
2023
 
2024
- # Set tags section visibility and output
2025
  tags_section_style = {"display": "none"}
2026
  if sorted_tags:
2027
- # Create beautifully styled tags with count indicators and consistent color
2028
  tags_output = html.Div(
2029
  [
2030
  html.Div(
@@ -2041,87 +1698,37 @@ def update_topic_details(
2041
  tags_section_style = {"display": "block"}
2042
  else:
2043
  tags_output = html.Div(
2044
- [
2045
- html.I(className="fas fa-info-circle", style={"marginRight": "5px"}),
2046
- "No tags found for this topic",
2047
- ],
2048
  className="no-tags-message",
2049
  )
2050
 
2051
- # Sample up to 5 random dialogs
2052
  sample_size = min(5, len(topic_conversations))
2053
  if sample_size > 0:
2054
- sample_indices = random.sample(range(len(topic_conversations)), sample_size)
2055
- samples = topic_conversations.iloc[sample_indices]
2056
-
2057
  dialog_items = []
2058
  for _, row in samples.iterrows():
2059
- # Create dialog item with tags
2060
- sentiment_tag = html.Span(
2061
- row["Sentiment"], className="dialog-tag tag-sentiment"
2062
- )
2063
- resolution_tag = html.Span(
2064
- row["Resolution"], className="dialog-tag tag-resolution"
2065
- )
2066
- urgency_tag = html.Span(row["Urgency"], className="dialog-tag tag-urgency")
2067
-
2068
- # Add Chat ID tag if 'id' column exists
2069
- chat_id_tag = None
2070
  if "id" in row:
2071
- chat_id_tag = html.Span(
2072
- [
2073
- f"Chat ID: {row['id']} ",
2074
- html.I(
2075
- className="fas fa-arrow-up-right-from-square conversation-icon",
2076
- id={"type": "conversation-icon", "index": row["id"]},
2077
- title="View full conversation",
2078
- style={"marginLeft": "0.25rem"},
2079
- ),
2080
- ],
2081
- className="dialog-tag tag-chat-id",
2082
- style={"display": "inline-flex", "alignItems": "center"},
2083
- )
2084
-
2085
- # Add Root Cause tag if 'Root Cause' column exists
2086
- root_cause_tag = None
2087
- if (
2088
- "Root_Cause" in row
2089
- and pd.notna(row["Root_Cause"])
2090
- and row["Root_Cause"] != "na"
2091
- ):
2092
- root_cause_tag = html.Span(
2093
- f"Root Cause: {row['Root_Cause']}",
2094
- className="dialog-tag tag-root-cause",
2095
- )
2096
-
2097
- # Compile all tags, including the new Chat ID and Root Cause tags if available
2098
- tags = [sentiment_tag, resolution_tag, urgency_tag]
2099
- if chat_id_tag:
2100
- tags.append(chat_id_tag)
2101
- if root_cause_tag:
2102
- tags.append(root_cause_tag)
2103
 
2104
  dialog_items.append(
2105
  html.Div(
2106
- [
2107
- html.Div(row["Summary"], className="dialog-summary"),
2108
- html.Div(
2109
- tags,
2110
- className="dialog-metadata",
2111
- ),
2112
- ],
2113
  className="dialog-item",
2114
  )
2115
  )
2116
-
2117
  sample_dialogs = dialog_items
2118
  else:
2119
- sample_dialogs = [
2120
- html.Div(
2121
- "No sample dialogs available for this topic.",
2122
- style={"color": "var(--muted-foreground)"},
2123
- )
2124
- ]
2125
 
2126
  return (
2127
  title,
@@ -2133,11 +1740,11 @@ def update_topic_details(
2133
  tags_section_style,
2134
  sample_dialogs,
2135
  {"display": "none"},
2136
- {"topic_name": topic_name, "file_contents": file_contents},
2137
  )
2138
 
2139
 
2140
- # Callback to open modal when conversation icon is clicked
2141
  @callback(
2142
  [
2143
  Output("conversation-modal", "style"),
@@ -2145,40 +1752,22 @@ def update_topic_details(
2145
  Output("conversation-subheader", "children"),
2146
  ],
2147
  [Input({"type": "conversation-icon", "index": dash.dependencies.ALL}, "n_clicks")],
2148
- [State("upload-data", "contents")],
2149
  prevent_initial_call=True,
2150
  )
2151
- def open_conversation_modal(n_clicks_list, file_contents):
2152
- # Check if any icon was clicked
2153
- if not any(n_clicks_list) or not file_contents:
2154
  return {"display": "none"}, "", ""
2155
 
2156
- # Get which icon was clicked
2157
  ctx = dash.callback_context
2158
  if not ctx.triggered:
2159
- return (
2160
- {"display": "none"},
2161
- "",
2162
- "",
2163
- ) # Extract the chat ID from the triggered input
2164
  triggered_id = ctx.triggered[0]["prop_id"]
2165
  chat_id = json.loads(triggered_id.split(".")[0])["index"]
2166
 
2167
- # Get the full conversation from the uploaded file
2168
- content_type, content_string = file_contents.split(",")
2169
- decoded = base64.b64decode(content_string)
2170
-
2171
- if (
2172
- content_type
2173
- == "data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64"
2174
- ):
2175
- df_full = pd.read_excel(io.BytesIO(decoded), dtype={"Root_Cause": str})
2176
- else: # Assume CSV
2177
- df_full = pd.read_csv(
2178
- io.StringIO(decoded.decode("utf-8")), dtype={"Root_Cause": str}
2179
- )
2180
-
2181
- # Find the conversation with this chat ID
2182
  conversation_row = df_full[df_full["id"] == chat_id]
2183
  if len(conversation_row) == 0:
2184
  conversation_text = "Conversation not found."
@@ -2186,28 +1775,17 @@ def open_conversation_modal(n_clicks_list, file_contents):
2186
  else:
2187
  row = conversation_row.iloc[0]
2188
  conversation_text = row.get("conversation", "No conversation data available.")
2189
-
2190
- # Get cluster name if available
2191
  cluster_name = row.get("deduplicated_topic_name", "Unknown cluster")
2192
-
2193
- # Create subheader with both Chat ID and cluster name
2194
  subheader_content = html.Div(
2195
  [
2196
- html.Span(
2197
- f"Chat ID: {chat_id}",
2198
- style={"fontWeight": "600", "marginRight": "1rem"},
2199
- ),
2200
- html.Span(
2201
- f"Cluster: {cluster_name}",
2202
- style={"color": "hsl(215.4, 16.3%, 46.9%)"},
2203
- ),
2204
  ]
2205
  )
2206
-
2207
  return {"display": "flex"}, conversation_text, subheader_content
2208
 
2209
 
2210
- # Callback to close modal
2211
  @callback(
2212
  Output("conversation-modal", "style", allow_duplicate=True),
2213
  [Input("close-modal-btn", "n_clicks")],
@@ -2216,10 +1794,10 @@ def open_conversation_modal(n_clicks_list, file_contents):
2216
  def close_conversation_modal(n_clicks):
2217
  if n_clicks:
2218
  return {"display": "none"}
2219
- return {"display": "none"}
2220
 
2221
 
2222
- # Callback to open dialogs table modal when "Show all dialogs inside" button is clicked
2223
  @callback(
2224
  [
2225
  Output("dialogs-table-modal", "style"),
@@ -2227,174 +1805,51 @@ def close_conversation_modal(n_clicks):
2227
  Output("dialogs-table-content", "children"),
2228
  ],
2229
  [Input("show-all-dialogs-btn", "n_clicks")],
2230
- [State("selected-topic-store", "data")],
2231
  prevent_initial_call=True,
2232
  )
2233
- def open_dialogs_table_modal(n_clicks, selected_topic_data):
2234
- if not n_clicks or not selected_topic_data:
2235
  return {"display": "none"}, "", ""
2236
 
2237
  topic_name = selected_topic_data["topic_name"]
2238
- file_contents = selected_topic_data["file_contents"]
2239
-
2240
- # Get the full data
2241
- content_type, content_string = file_contents.split(",")
2242
- decoded = base64.b64decode(content_string)
2243
-
2244
- if (
2245
- content_type
2246
- == "data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64"
2247
- ):
2248
- df_full = pd.read_excel(io.BytesIO(decoded), dtype={"Root_Cause": str})
2249
- else: # Assume CSV
2250
- df_full = pd.read_csv(
2251
- io.StringIO(decoded.decode("utf-8")), dtype={"Root_Cause": str}
2252
- )
2253
 
2254
- # Filter to this topic
2255
  topic_conversations = df_full[df_full["deduplicated_topic_name"] == topic_name]
2256
-
2257
- # Create the table
2258
- table_rows = []
2259
-
2260
- # Header row
2261
- table_rows.append(
2262
- html.Tr(
2263
- [
2264
- html.Th("Chat ID"),
2265
- html.Th("Summary"),
2266
- html.Th("Root Cause"),
2267
- html.Th("Sentiment"),
2268
- html.Th("Resolution"),
2269
- html.Th("Urgency"),
2270
- html.Th("Tags"),
2271
- html.Th("Action"),
2272
- ]
2273
- )
2274
- )
2275
-
2276
- # Data rows
2277
  for _, row in topic_conversations.iterrows():
2278
- # Process tags
2279
- tags_str = row.get("consolidated_tags", "")
2280
- if pd.notna(tags_str):
2281
- tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
2282
- tags_display = html.Div(
2283
- [
2284
- html.Span(
2285
- tag,
2286
- className="dialog-tag-small",
2287
- style={"backgroundColor": "#6c757d", "color": "white"},
2288
- )
2289
- for tag in tags[:3] # Show only first 3 tags
2290
- ]
2291
- + (
2292
- [
2293
- html.Span(
2294
- f"+{len(tags) - 3}",
2295
- className="dialog-tag-small",
2296
- style={"backgroundColor": "#6c757d", "color": "white"},
2297
- )
2298
- ]
2299
- if len(tags) > 3
2300
- else []
2301
- ),
2302
- className="dialog-tags-cell",
2303
- )
2304
- else:
2305
- tags_display = html.Span(
2306
- "No tags",
2307
- style={"color": "var(--muted-foreground)", "fontStyle": "italic"},
2308
- )
2309
-
2310
  table_rows.append(
2311
- html.Tr(
2312
- [
2313
- html.Td(
2314
- row["id"],
2315
- style={"fontFamily": "monospace", "fontSize": "0.8rem"},
2316
- ),
2317
- html.Td(
2318
- row.get("Summary", "No summary"),
2319
- className="dialog-summary-cell",
2320
- ),
2321
- html.Td(
2322
- html.Span(
2323
- str(row.get("Root_Cause", "Unknown")).capitalize()
2324
- if not pd.isna(row.get("Root_Cause"))
2325
- else "Unknown",
2326
- className="dialog-tag-small",
2327
- style={
2328
- "backgroundColor": "#8B4513", # Brown color for root cause
2329
- "color": "white",
2330
- },
2331
- )
2332
- ),
2333
- html.Td(
2334
- html.Span( # if sentiment is negative, color it red, otherwise grey
2335
- row.get("Sentiment", "Unknown").capitalize(),
2336
- className="dialog-tag-small",
2337
- style={
2338
- "backgroundColor": "#dc3545"
2339
- if row.get("Sentiment") == "negative"
2340
- else "#6c757d",
2341
- "color": "white",
2342
- },
2343
- )
2344
- ),
2345
- html.Td(
2346
- html.Span( # if resolution is unresolved, color it red, otherwise grey
2347
- row.get("Resolution", "Unknown").capitalize(),
2348
- className="dialog-tag-small",
2349
- style={
2350
- "backgroundColor": "#dc3545"
2351
- if row.get("Resolution") == "unresolved"
2352
- else "#6c757d",
2353
- "color": "white",
2354
- },
2355
- )
2356
- ),
2357
- html.Td(
2358
- html.Span( # if urgency is urgent, color it red, otherwise grey
2359
- row.get("Urgency", "Unknown").capitalize(),
2360
- className="dialog-tag-small",
2361
- style={
2362
- "backgroundColor": "#dc3545"
2363
- if row.get("Urgency") == "urgent"
2364
- else "#6c757d",
2365
- "color": "white",
2366
- },
2367
- )
2368
- ),
2369
- html.Td(tags_display),
2370
- html.Td(
2371
- html.Button(
2372
- [
2373
- html.I(
2374
- className="fas fa-eye",
2375
- style={"marginRight": "0.25rem"},
2376
- ),
2377
- "View chat session",
2378
- ],
2379
- id={"type": "open-chat-btn", "index": row["id"]},
2380
- className="open-chat-btn",
2381
- n_clicks=0,
2382
- )
2383
- ),
2384
- ]
2385
- )
2386
  )
2387
-
2388
  table = html.Table(table_rows, className="dialogs-table")
2389
-
2390
- modal_title = (
2391
- f"All dialogs in Topic: {topic_name} ({len(topic_conversations)} dialogs)"
2392
- )
2393
-
2394
  return {"display": "flex"}, modal_title, table
2395
 
2396
 
2397
- # Callback to close dialogs table modal
2398
  @callback(
2399
  Output("dialogs-table-modal", "style", allow_duplicate=True),
2400
  [Input("close-dialogs-modal-btn", "n_clicks")],
@@ -2403,10 +1858,10 @@ def open_dialogs_table_modal(n_clicks, selected_topic_data):
2403
  def close_dialogs_table_modal(n_clicks):
2404
  if n_clicks:
2405
  return {"display": "none"}
2406
- return {"display": "none"}
2407
 
2408
 
2409
- # Callback to open conversation modal from dialogs table
2410
  @callback(
2411
  [
2412
  Output("conversation-modal", "style", allow_duplicate=True),
@@ -2414,77 +1869,34 @@ def close_dialogs_table_modal(n_clicks):
2414
  Output("conversation-subheader", "children", allow_duplicate=True),
2415
  ],
2416
  [Input({"type": "open-chat-btn", "index": dash.dependencies.ALL}, "n_clicks")],
2417
- [State("upload-data", "contents")],
2418
  prevent_initial_call=True,
2419
  )
2420
- def open_conversation_from_table(n_clicks_list, file_contents):
2421
- # Check if any button was clicked
2422
- if not any(n_clicks_list) or not file_contents:
2423
  return {"display": "none"}, "", ""
2424
 
2425
- # Get which button was clicked
2426
  ctx = dash.callback_context
2427
  if not ctx.triggered:
2428
  return {"display": "none"}, "", ""
2429
 
2430
- # Extract the chat ID from the triggered input
2431
  triggered_id = ctx.triggered[0]["prop_id"]
2432
  chat_id = json.loads(triggered_id.split(".")[0])["index"]
2433
 
2434
- # Debug: print the chat_id to understand its type and value
2435
- print(f"DEBUG: Looking for chat_id: {chat_id} (type: {type(chat_id)})")
2436
-
2437
- # Get the full conversation from the uploaded file
2438
- content_type, content_string = file_contents.split(",")
2439
- decoded = base64.b64decode(content_string)
2440
-
2441
- if (
2442
- content_type
2443
- == "data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64"
2444
- ):
2445
- df_full = pd.read_excel(io.BytesIO(decoded), dtype={"Root_Cause": str})
2446
- else: # Assume CSV
2447
- df_full = pd.read_csv(
2448
- io.StringIO(decoded.decode("utf-8")), dtype={"Root_Cause": str}
2449
- )
2450
-
2451
- # Debug: print some info about the dataframe
2452
- print(f"DEBUG: DataFrame shape: {df_full.shape}")
2453
- print(f"DEBUG: Available chat IDs (first 5): {df_full['id'].head().tolist()}")
2454
- print(f"DEBUG: Chat ID types in df: {df_full['id'].dtype}")
2455
-
2456
- # Try to match with different data type conversions
2457
  conversation_row = df_full[df_full["id"] == chat_id]
2458
-
2459
- # If not found, try converting types
2460
- if len(conversation_row) == 0:
2461
- # Try converting chat_id to string
2462
- conversation_row = df_full[df_full["id"].astype(str) == str(chat_id)]
2463
-
2464
- # If still not found, try converting df id to int
2465
- if len(conversation_row) == 0:
2466
- try:
2467
- conversation_row = df_full[df_full["id"] == int(chat_id)]
2468
- except (ValueError, TypeError):
2469
- pass
2470
-
2471
  if len(conversation_row) == 0:
2472
- conversation_text = f"Conversation not found for Chat ID: {chat_id}. Available IDs: {df_full['id'].head(10).tolist()}"
2473
  subheader_content = f"Chat ID: {chat_id} (Not Found)"
2474
  else:
2475
- conversation_row = conversation_row.iloc[0]
2476
- conversation_text = conversation_row.get(
2477
- "conversation",
2478
- "No conversation available, oopsie.", # fix here the conversation status
2479
- )
2480
-
2481
- # Create subheader with metadata
2482
- subheader_content = f"Chat ID: {chat_id} | Topic: {conversation_row.get('deduplicated_topic_name', 'Unknown')} | Sentiment: {conversation_row.get('Sentiment', 'Unknown')} | Resolution: {conversation_row.get('Resolution', 'Unknown')}"
2483
-
2484
  return {"display": "flex"}, conversation_text, subheader_content
2485
 
2486
 
2487
- # Callback to open root cause modal when root cause icon is clicked
2488
  @callback(
2489
  [
2490
  Output("root-cause-modal", "style"),
@@ -2492,181 +1904,64 @@ def open_conversation_from_table(n_clicks_list, file_contents):
2492
  Output("root-cause-table-content", "children"),
2493
  ],
2494
  [Input({"type": "root-cause-icon", "index": dash.dependencies.ALL}, "n_clicks")],
2495
- [State("selected-topic-store", "data")],
2496
  prevent_initial_call=True,
2497
  )
2498
- def open_root_cause_modal(n_clicks_list, selected_topic_data):
2499
- # Check if any icon was clicked
2500
- if not any(n_clicks_list) or not selected_topic_data:
2501
  return {"display": "none"}, "", ""
2502
 
2503
- # Get which icon was clicked
2504
  ctx = dash.callback_context
2505
  if not ctx.triggered:
2506
  return {"display": "none"}, "", ""
2507
 
2508
  triggered_id = ctx.triggered[0]["prop_id"]
2509
  root_cause = json.loads(triggered_id.split(".")[0])["index"]
2510
-
2511
  topic_name = selected_topic_data["topic_name"]
2512
- file_contents = selected_topic_data["file_contents"]
2513
-
2514
- # Get the full data
2515
- content_type, content_string = file_contents.split(",")
2516
- decoded = base64.b64decode(content_string)
2517
-
2518
- if (
2519
- content_type
2520
- == "data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64"
2521
- ):
2522
- df_full = pd.read_excel(io.BytesIO(decoded), dtype={"Root_Cause": str})
2523
- else: # Assume CSV
2524
- df_full = pd.read_csv(
2525
- io.StringIO(decoded.decode("utf-8")), dtype={"Root_Cause": str}
2526
- )
2527
-
2528
- # Filter to this topic and root cause
2529
  filtered_conversations = df_full[
2530
  (df_full["deduplicated_topic_name"] == topic_name)
2531
  & (df_full["root_cause_subcluster"] == root_cause)
2532
  ]
2533
 
2534
- # Create the table
2535
- table_rows = []
2536
-
2537
- # Header row
2538
- table_rows.append(
2539
- html.Tr(
2540
- [
2541
- html.Th("Chat ID"),
2542
- html.Th("Summary"),
2543
- html.Th("Sentiment"),
2544
- html.Th("Resolution"),
2545
- html.Th("Urgency"),
2546
- html.Th("Tags"),
2547
- html.Th("Action"),
2548
- ]
2549
- )
2550
- )
2551
-
2552
- # Data rows
2553
  for _, row in filtered_conversations.iterrows():
2554
- # Process tags
2555
- tags_str = row.get("consolidated_tags", "")
2556
- if pd.notna(tags_str):
2557
- tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
2558
- tags_display = html.Div(
2559
- [
2560
- html.Span(
2561
- tag,
2562
- className="dialog-tag-small",
2563
- style={"backgroundColor": "#6c757d", "color": "white"},
2564
- )
2565
- for tag in tags[:3] # Show only first 3 tags
2566
- ]
2567
- + (
2568
- [
2569
- html.Span(
2570
- f"+{len(tags) - 3}",
2571
- className="dialog-tag-small",
2572
- style={"backgroundColor": "#6c757d", "color": "white"},
2573
- )
2574
- ]
2575
- if len(tags) > 3
2576
- else []
2577
- ),
2578
- className="dialog-tags-cell",
2579
- )
2580
- else:
2581
- tags_display = html.Span(
2582
- "No tags",
2583
- style={"color": "var(--muted-foreground)", "fontStyle": "italic"},
2584
- )
2585
 
2586
  table_rows.append(
2587
- html.Tr(
2588
- [
2589
- html.Td(
2590
- row["id"],
2591
- style={"fontFamily": "monospace", "fontSize": "0.8rem"},
2592
- ),
2593
- html.Td(
2594
- row.get("Summary", "No summary"),
2595
- className="dialog-summary-cell",
2596
- ),
2597
- html.Td(
2598
- html.Span(
2599
- row.get("Sentiment", "Unknown").capitalize(),
2600
- className="dialog-tag-small",
2601
- style={
2602
- "backgroundColor": "#dc3545"
2603
- if row.get("Sentiment") == "negative"
2604
- else "#6c757d",
2605
- "color": "white",
2606
- },
2607
- )
2608
- ),
2609
- html.Td(
2610
- html.Span(
2611
- row.get("Resolution", "Unknown").capitalize(),
2612
- className="dialog-tag-small",
2613
- style={
2614
- "backgroundColor": "#dc3545"
2615
- if row.get("Resolution") == "unresolved"
2616
- else "#6c757d",
2617
- "color": "white",
2618
- },
2619
- )
2620
- ),
2621
- html.Td(
2622
- html.Span(
2623
- row.get("Urgency", "Unknown").capitalize(),
2624
- className="dialog-tag-small",
2625
- style={
2626
- "backgroundColor": "#dc3545"
2627
- if row.get("Urgency") == "urgent"
2628
- else "#6c757d",
2629
- "color": "white",
2630
- },
2631
- )
2632
- ),
2633
- html.Td(tags_display),
2634
- html.Td(
2635
- html.Button(
2636
- [
2637
- html.I(
2638
- className="fas fa-eye",
2639
- style={"marginRight": "0.25rem"},
2640
- ),
2641
- "View chat",
2642
- ],
2643
- id={"type": "open-chat-btn-rc", "index": row["id"]},
2644
- className="open-chat-btn",
2645
- n_clicks=0,
2646
- )
2647
- ),
2648
- ]
2649
- )
2650
  )
2651
-
2652
  table = html.Table(table_rows, className="dialogs-table")
2653
-
2654
- modal_title = f"Dialogs with Root Cause: {root_cause} (Topic: {topic_name})"
2655
  count_info = html.P(
2656
- f"Found {len(filtered_conversations)} dialogs with this root cause",
2657
- style={
2658
- "margin": "0 0 1rem 0",
2659
- "color": "var(--muted-foreground)",
2660
- "fontSize": "0.875rem",
2661
- },
2662
  )
2663
-
2664
  content = html.Div([count_info, table])
2665
-
2666
  return {"display": "flex"}, modal_title, content
2667
 
2668
 
2669
- # Callback to close root cause modal
2670
  @callback(
2671
  Output("root-cause-modal", "style", allow_duplicate=True),
2672
  [Input("close-root-cause-modal-btn", "n_clicks")],
@@ -2675,10 +1970,10 @@ def open_root_cause_modal(n_clicks_list, selected_topic_data):
2675
  def close_root_cause_modal(n_clicks):
2676
  if n_clicks:
2677
  return {"display": "none"}
2678
- return {"display": "none"}
2679
 
2680
 
2681
- # Callback to open conversation modal from root cause table
2682
  @callback(
2683
  [
2684
  Output("conversation-modal", "style", allow_duplicate=True),
@@ -2686,86 +1981,43 @@ def close_root_cause_modal(n_clicks):
2686
  Output("conversation-subheader", "children", allow_duplicate=True),
2687
  ],
2688
  [Input({"type": "open-chat-btn-rc", "index": dash.dependencies.ALL}, "n_clicks")],
2689
- [State("upload-data", "contents")],
2690
  prevent_initial_call=True,
2691
  )
2692
- def open_conversation_from_root_cause_table(n_clicks_list, file_contents):
2693
- # Check if any button was clicked
2694
- if not any(n_clicks_list) or not file_contents:
2695
  return {"display": "none"}, "", ""
2696
 
2697
- # Get which button was clicked
2698
  ctx = dash.callback_context
2699
  if not ctx.triggered:
2700
  return {"display": "none"}, "", ""
2701
-
2702
  triggered_id = ctx.triggered[0]["prop_id"]
2703
  chat_id = json.loads(triggered_id.split(".")[0])["index"]
2704
 
2705
- # Get the full conversation from the uploaded file
2706
- content_type, content_string = file_contents.split(",")
2707
- decoded = base64.b64decode(content_string)
2708
-
2709
- if (
2710
- content_type
2711
- == "data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64"
2712
- ):
2713
- df_full = pd.read_excel(io.BytesIO(decoded), dtype={"Root_Cause": str})
2714
- else: # Assume CSV
2715
- df_full = pd.read_csv(
2716
- io.StringIO(decoded.decode("utf-8")), dtype={"Root_Cause": str}
2717
- )
2718
-
2719
- # Find the conversation with this chat ID
2720
  conversation_row = df_full[df_full["id"] == chat_id]
2721
-
2722
- # If not found, try converting types
2723
  if len(conversation_row) == 0:
2724
  conversation_row = df_full[df_full["id"].astype(str) == str(chat_id)]
2725
 
2726
- if len(conversation_row) == 0:
2727
- try:
2728
- conversation_row = df_full[df_full["id"] == int(chat_id)]
2729
- except (ValueError, TypeError):
2730
- pass
2731
-
2732
  if len(conversation_row) == 0:
2733
  conversation_text = f"Conversation not found for Chat ID: {chat_id}"
2734
  subheader_content = f"Chat ID: {chat_id} (Not Found)"
2735
  else:
2736
  row = conversation_row.iloc[0]
2737
  conversation_text = row.get("conversation", "No conversation data available.")
2738
-
2739
- # Get additional metadata
2740
  root_cause = row.get("root_cause_subcluster", "Unknown")
2741
  cluster_name = row.get("deduplicated_topic_name", "Unknown cluster")
2742
-
2743
- # Create subheader with metadata including root cause
2744
- subheader_content = html.Div(
2745
- [
2746
- html.Span(
2747
- f"Chat ID: {chat_id}",
2748
- style={"fontWeight": "600", "marginRight": "1rem"},
2749
- ),
2750
- html.Span(
2751
- f"Cluster: {cluster_name}",
2752
- style={"color": "hsl(215.4, 16.3%, 46.9%)", "marginRight": "1rem"},
2753
- ),
2754
- html.Span(
2755
- f"Root Cause: {root_cause}",
2756
- style={"color": "#8b6f47", "fontWeight": "500"},
2757
- ),
2758
- ]
2759
- )
2760
-
2761
  return {"display": "flex"}, conversation_text, subheader_content
2762
 
2763
- # IMPORTANT: Expose the server for Gunicorn, needed for HF Spaces
2764
  server = app.server
2765
 
2766
- # if __name__ == "__main__":
2767
- # app.run(debug=False)
2768
-
2769
- # IMPORTANT: Expose the server for Gunicorn, needed for HF Spaces
2770
- if __name__ == '__main__':
2771
  app.run_server(debug=True)
 
83
  children="Sessions Observatory",
84
  className="section-header",
85
  ),
 
86
  dcc.Graph(
87
  id="bubble-chart",
88
  style={"height": "calc(100% - 154px)"},
89
+ ),
90
  html.Div(
91
  [
 
92
  html.Div(
93
  [
94
  html.Div(
 
101
  ],
102
  className="control-labels-row",
103
  ),
 
104
  html.Div(
105
  [
106
  html.Div(
 
185
  html.I(
186
  className="fas fa-info-circle",
187
  title="Root cause detection is experimental and may require manual review since it is generated by AI models. Root causes are only shown in clusters with identifiable root causes.",
 
188
  style={
189
  "marginLeft": "0.2rem",
190
+ "color": "#6c757d",
191
  "fontSize": "0.9rem",
192
  "cursor": "pointer",
193
  "verticalAlign": "middle",
 
202
  ),
203
  ],
204
  id="root-causes-section",
205
+ style={"display": "none"},
 
 
206
  ),
207
  # Added Tags section
208
  html.Div(
 
217
  ),
218
  ],
219
  id="tags-section",
220
+ style={"display": "none"},
 
 
221
  ),
222
  ],
223
  className="details-section",
 
268
  ),
269
  html.H3("No topic selected"),
270
  html.P(
271
+ "Click a bubble to view topic details."
272
  ),
273
  ],
274
  className="no-selection-message",
 
387
  ),
388
  # Store the processed data
389
  dcc.Store(id="stored-data"),
390
+ # NEW: Store for the minimal raw dataframe
391
+ dcc.Store(id="raw-data"),
392
  # Store the current selected topic for dialogs modal
393
  dcc.Store(id="selected-topic-store"),
394
  # Store the current selected root cause for root cause modal
 
397
  className="app-container",
398
  )
399
 
400
+ # Define CSS for the app (no changes needed here, so it's omitted for brevity)
401
  app.index_string = """
402
  <!DOCTYPE html>
403
  <html>
 
1221
  )
1222
  def update_topic_distribution_header(data):
1223
  if not data:
1224
+ return "Sessions Observatory"
1225
 
1226
  df = pd.DataFrame(data)
1227
+ total_dialogs = df["count"].sum()
1228
  return f"Sessions Observatory ({total_dialogs} dialogs)"
1229
 
1230
 
 
1232
  @callback(
1233
  [
1234
  Output("stored-data", "data"),
1235
+ Output("raw-data", "data"),
1236
  Output("upload-status", "children"),
1237
+ Output("upload-status", "style"),
1238
  Output("main-content", "style"),
1239
  ],
1240
  [Input("upload-data", "contents")],
 
1242
  )
1243
  def process_upload(contents, filename):
1244
  if contents is None:
1245
+ return None, None, "", {"display": "none"}, {"display": "none"}
1246
 
1247
  try:
 
1248
  content_type, content_string = contents.split(",")
1249
  decoded = base64.b64decode(content_string)
1250
 
1251
  if "csv" in filename.lower():
1252
+ df = pd.read_csv(io.StringIO(decoded.decode("utf-8")), dtype={"Root_Cause": str})
1253
  elif "xls" in filename.lower():
1254
+ df = pd.read_excel(io.BytesIO(decoded), dtype={"Root_Cause": str})
1255
+ else:
1256
+ return (
1257
+ None,
1258
+ None,
1259
+ html.Div(
1260
+ ["Unsupported file. Please upload a CSV or Excel file."],
1261
+ style={"color": "var(--destructive)"},
1262
+ ),
1263
+ {"display": "block"},
1264
+ {"display": "none"},
1265
  )
 
 
 
 
 
 
 
 
 
 
 
1266
 
 
1267
  EXCLUDE_UNCLUSTERED = True
1268
  if EXCLUDE_UNCLUSTERED and "deduplicated_topic_name" in df.columns:
1269
  df = df[df["deduplicated_topic_name"] != "Unclustered"].copy()
 
 
 
 
1270
  else:
1271
  return (
1272
+ None,
1273
  None,
1274
  html.Div(
1275
+ ["Please upload a CSV or Excel file with a 'deduplicated_topic_name' column."],
 
 
 
 
 
 
1276
  style={"color": "var(--destructive)"},
1277
  ),
1278
+ {"display": "block"},
1279
  {"display": "none"},
1280
  )
1281
 
1282
+ # Compute aggregated topic stats once
1283
  topic_stats = analyze_topics(df)
1284
 
1285
+ # Store only the columns you use elsewhere to keep payload smaller
1286
+ needed_cols = [
1287
+ "id",
1288
+ "conversation",
1289
+ "deduplicated_topic_name",
1290
+ "consolidated_tags",
1291
+ "Root_Cause",
1292
+ "root_cause_subcluster",
1293
+ "Sentiment",
1294
+ "Resolution",
1295
+ "Urgency",
1296
+ "Summary",
1297
+ ]
1298
+ df_min = df[[c for c in needed_cols if c in df.columns]].copy()
1299
+
1300
  return (
1301
  topic_stats.to_dict("records"),
1302
+ df_min.to_dict("records"),
1303
  html.Div(
1304
  [
1305
  html.I(
1306
  className="fas fa-check-circle",
1307
+ style={"color": "hsl(142.1, 76.2%, 36.3%)", "marginRight": "8px"},
 
 
 
1308
  ),
1309
  f'Successfully uploaded "{filename}"',
1310
  ],
1311
  style={"color": "hsl(142.1, 76.2%, 36.3%)"},
1312
  ),
1313
+ {"display": "block"},
1314
+ {"display": "block", "height": "calc(100vh - 40px)"},
 
 
 
1315
  )
1316
 
1317
  except Exception as e:
1318
  return (
1319
+ None,
1320
  None,
1321
  html.Div(
1322
  [
 
1324
  className="fas fa-exclamation-triangle",
1325
  style={"color": "var(--destructive)", "marginRight": "8px"},
1326
  ),
1327
+ f"Error: {e}",
1328
  ],
1329
  style={"color": "var(--destructive)"},
1330
  ),
1331
+ {"display": "block"},
1332
  {"display": "none"},
1333
  )
1334
 
1335
 
1336
  # Function to analyze the topics and create statistics
1337
  def analyze_topics(df):
 
1338
  topic_stats = (
 
 
 
1339
  df.groupby("deduplicated_topic_name")
1340
  .agg(
1341
  count=("id", "count"),
 
1345
  )
1346
  .reset_index()
1347
  )
1348
+ topic_stats["negative_rate"] = (topic_stats["negative_count"] / topic_stats["count"] * 100).round(1)
1349
+ topic_stats["unresolved_rate"] = (topic_stats["unresolved_count"] / topic_stats["count"] * 100).round(1)
1350
+ topic_stats["urgent_rate"] = (topic_stats["urgent_count"] / topic_stats["count"] * 100).round(1)
 
 
 
 
 
 
 
 
 
 
1351
  topic_stats = apply_binned_layout(topic_stats)
 
1352
  return topic_stats
1353
 
1354
 
1355
+ # New binned layout function (no changes needed)
 
 
1356
  def apply_binned_layout(df, padding=0, bin_config=None, max_items_per_row=6):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1357
  df_sorted = df.copy()
 
 
 
1358
  if bin_config is None:
1359
  bin_config = [
1360
+ (100, None, "100+ dialogs"), (50, 99, "50-99 dialogs"),
1361
+ (25, 49, "25-49 dialogs"), (9, 24, "9-24 dialogs"),
1362
+ (7, 8, "7-8 dialogs"), (5, 6, "5-6 dialogs"),
1363
+ (4, 4, "4 dialogs"), (0, 3, "0-3 dialogs"),
 
 
 
 
1364
  ]
 
 
1365
  bin_descriptions = {}
1366
  conditions = []
1367
  bin_values = []
 
1368
  for i, (lower, upper, description) in enumerate(bin_config):
1369
  bin_name = f"Bin {i + 1}"
1370
  bin_descriptions[bin_name] = description
1371
  bin_values.append(bin_name)
1372
+ if upper is None:
 
1373
  conditions.append(df_sorted["count"] >= lower)
1374
  else:
1375
+ conditions.append((df_sorted["count"] >= lower) & (df_sorted["count"] <= upper))
1376
+ df_sorted["bin"] = np.select(conditions, bin_values, default=f"Bin {len(bin_config)}")
 
 
 
 
1377
  df_sorted["bin_description"] = df_sorted["bin"].map(bin_descriptions)
 
 
1378
  df_sorted = df_sorted.sort_values(by=["bin", "count"], ascending=[True, False])
 
 
1379
  original_bins = df_sorted["bin"].unique()
1380
  new_rows = []
1381
  new_bin_descriptions = bin_descriptions.copy()
 
1382
  for bin_name in original_bins:
1383
  bin_mask = df_sorted["bin"] == bin_name
1384
  bin_group = df_sorted[bin_mask]
1385
  bin_size = len(bin_group)
 
 
1386
  if bin_size > max_items_per_row:
 
1387
  num_sub_bins = (bin_size + max_items_per_row - 1) // max_items_per_row
 
 
1388
  items_per_sub_bin = [bin_size // num_sub_bins] * num_sub_bins
 
 
1389
  remainder = bin_size % num_sub_bins
1390
  for i in range(remainder):
1391
  items_per_sub_bin[i] += 1
 
 
1392
  original_description = bin_descriptions[bin_name]
 
 
1393
  start_idx = 0
1394
  for i in range(num_sub_bins):
 
1395
  new_bin_name = f"{bin_name}_{i + 1}"
 
 
1396
  new_description = f"{original_description} ({i + 1}/{num_sub_bins})"
1397
  new_bin_descriptions[new_bin_name] = new_description
 
 
1398
  end_idx = start_idx + items_per_sub_bin[i]
1399
  sub_bin_rows = bin_group.iloc[start_idx:end_idx].copy()
 
 
1400
  sub_bin_rows["bin"] = new_bin_name
1401
  sub_bin_rows["bin_description"] = new_description
 
 
1402
  new_rows.append(sub_bin_rows)
 
 
1403
  start_idx = end_idx
 
 
1404
  df_sorted = df_sorted[~bin_mask]
 
 
1405
  if new_rows:
1406
  df_sorted = pd.concat([df_sorted] + new_rows)
 
 
1407
  df_sorted = df_sorted.sort_values(by=["bin", "count"], ascending=[True, False])
 
 
1408
  bins_with_topics = sorted(df_sorted["bin"].unique())
1409
  num_rows = len(bins_with_topics)
 
1410
  available_height = 100 - (2 * padding)
1411
  row_height = available_height / num_rows
1412
+ row_positions = {bin_name: padding + i * row_height + (row_height / 2) for i, bin_name in enumerate(bins_with_topics)}
 
 
 
 
 
 
 
1413
  df_sorted["y"] = df_sorted["bin"].map(row_positions)
1414
+ center_point = 50
 
 
1415
  for bin_name in bins_with_topics:
 
1416
  bin_mask = df_sorted["bin"] == bin_name
1417
  num_topics_in_bin = bin_mask.sum()
 
1418
  if num_topics_in_bin == 1:
 
1419
  df_sorted.loc[bin_mask, "x"] = center_point
1420
  else:
1421
+ spacing = 17.5 if num_topics_in_bin < max_items_per_row else 15
1422
+ total_width = (num_topics_in_bin - 1) * spacing
1423
+ start_pos = center_point - (total_width / 2)
1424
+ positions = [start_pos + (i * spacing) for i in range(num_topics_in_bin)]
1425
+ df_sorted.loc[bin_mask, "x"] = positions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1426
  df_sorted["size_rank"] = range(1, len(df_sorted) + 1)
 
1427
  return df_sorted
1428
 
1429
 
1430
+ # function to update positions based on selected size metric (no changes needed)
1431
  def update_bubble_positions(df: pd.DataFrame) -> pd.DataFrame:
 
1432
  return apply_binned_layout(df)
1433
 
1434
 
1435
+ # Callback to update the bubble chart (no changes needed)
1436
  @callback(
1437
  Output("bubble-chart", "figure"),
1438
  [
 
1446
 
1447
  df = pd.DataFrame(data)
1448
 
1449
+ # Note: `update_bubble_positions` is now called inside `analyze_topics` once
1450
+ # and the results are stored. We don't call it here anymore.
1451
+ # The 'x' and 'y' values are already in the `data`.
1452
+ # df = update_bubble_positions(df) # This line can be removed if positions are pre-calculated
1453
 
 
1454
  size_values = df["count"]
1455
  raw_sizes = df["count"]
1456
  size_title = "Dialog Count"
1457
+ min_size = 1
 
 
 
1458
  if size_values.max() > size_values.min():
 
1459
  log_sizes = np.log1p(size_values)
1460
+ size_values = (min_size + (log_sizes - log_sizes.min()) / (log_sizes.max() - log_sizes.min()) * 50)
 
 
 
 
 
1461
  else:
 
1462
  size_values = np.ones(len(df)) * 12.5
1463
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1464
  if color_metric == "negative_rate":
1465
  color_values = df["negative_rate"]
 
1466
  color_title = "Negativity (%)"
 
 
 
 
1467
  color_scale = "Teal"
 
1468
  elif color_metric == "unresolved_rate":
1469
  color_values = df["unresolved_rate"]
1470
  color_title = "Unresolved (%)"
 
 
 
 
1471
  color_scale = "Teal"
1472
+ else: # urgent_rate
1473
  color_values = df["urgent_rate"]
1474
  color_title = "Urgency (%)"
 
 
 
1475
  color_scale = "Teal"
1476
 
 
1477
  hover_text = [
1478
  f"Topic: {topic}<br>{size_title}: {raw:.1f}<br>{color_title}: {color:.1f}<br>Group: {bin_desc}"
1479
+ for topic, raw, color, bin_desc in zip(df["deduplicated_topic_name"], raw_sizes, color_values, df["bin_description"])
 
 
 
 
 
1480
  ]
1481
 
 
1482
  fig = px.scatter(
1483
  df,
1484
+ x="x", y="y",
 
1485
  size=size_values,
1486
  color=color_values,
 
1487
  hover_name="deduplicated_topic_name",
1488
+ hover_data={"x": False, "y": False, "bin_description": True},
1489
+ size_max=42.5,
 
 
 
 
1490
  color_continuous_scale=color_scale,
1491
+ custom_data=["deduplicated_topic_name", "count", "negative_rate", "unresolved_rate", "urgent_rate", "bin_description"],
 
 
 
 
 
 
 
1492
  )
1493
 
 
1494
  fig.update_traces(
1495
+ mode="markers",
1496
  marker=dict(sizemode="area", opacity=0.8, line=dict(width=1, color="white")),
1497
  hovertemplate="%{hovertext}<extra></extra>",
1498
  hovertext=hover_text,
1499
  )
1500
 
 
1501
  annotations = []
1502
  for i, row in df.iterrows():
 
1503
  words = row["deduplicated_topic_name"].split()
1504
+ wrapped_text = "<br>".join([" ".join(words[i : i + 4]) for i in range(0, len(words), 4)])
1505
+ # Use df.index.get_loc(i) to safely get the index position for size_values
1506
+ marker_size = (size_values[df.index.get_loc(i)] / 20)
 
 
 
 
 
 
 
1507
  annotations.append(
1508
  dict(
1509
+ x=row["x"], y=row["y"] + 0.125 + marker_size,
1510
+ text=wrapped_text, showarrow=False, textangle=0,
1511
+ font=dict(size=9, color="var(--foreground)", family="Arial, sans-serif", weight="bold"),
1512
+ xanchor="center", yanchor="top",
1513
+ bgcolor="rgba(255,255,255,0.7)", bordercolor="rgba(0,0,0,0.1)",
1514
+ borderwidth=1, borderpad=1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1515
  )
1516
  )
1517
 
 
1518
  unique_bins = sorted(df["bin"].unique())
1519
+ bin_y_positions = [df[df["bin"] == bin_name]["y"].mean() for bin_name in unique_bins]
 
 
 
 
1520
  bin_descriptions = df.set_index("bin")["bin_description"].to_dict()
1521
 
1522
  for bin_name, bin_y in zip(unique_bins, bin_y_positions):
1523
+ fig.add_shape(type="line", x0=0, y0=bin_y, x1=100, y1=bin_y, line=dict(color="rgba(0,0,0,0.1)", width=1, dash="dot"), layer="below")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1524
  annotations.append(
1525
  dict(
1526
+ x=0, y=bin_y, xref="x", yref="y",
1527
+ text=bin_descriptions[bin_name], showarrow=False,
 
 
 
 
1528
  font=dict(size=8.25, color="var(--muted-foreground)"),
1529
+ align="left", xanchor="left", yanchor="middle",
1530
+ bgcolor="rgba(255,255,255,0.7)", borderpad=1,
 
 
 
1531
  )
1532
  )
1533
 
1534
  fig.update_layout(
1535
  title=None,
1536
+ xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, title=None, range=[0, 100]),
1537
+ yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, title=None, range=[0, 100], autorange="reversed"),
 
 
 
 
 
 
 
 
 
 
 
 
 
1538
  hovermode="closest",
1539
  margin=dict(l=0, r=0, t=10, b=10),
1540
+ coloraxis_colorbar=dict(title=color_title, title_font=dict(size=9), tickfont=dict(size=8), thickness=10, len=0.6, yanchor="middle", y=0.5, xpad=0),
 
 
 
 
 
 
 
 
 
1541
  legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
1542
  paper_bgcolor="rgba(0,0,0,0)",
1543
  plot_bgcolor="rgba(0,0,0,0)",
1544
  hoverlabel=dict(bgcolor="white", font_size=12, font_family="Inter"),
1545
+ annotations=annotations,
1546
  )
1547
 
1548
  return fig
1549
 
1550
 
1551
+ # NEW: Update the topic details callback to be CLICK-ONLY and use the raw-data store
1552
  @callback(
1553
  [
1554
  Output("topic-title", "children"),
 
1563
  Output("selected-topic-store", "data"),
1564
  ],
1565
  [
1566
+ Input("bubble-chart", "clickData"), # Changed from hoverData
 
1567
  Input("refresh-dialogs-btn", "n_clicks"),
1568
  ],
1569
+ [State("stored-data", "data"), State("raw-data", "data")],
1570
  )
1571
+ def update_topic_details(click_data, refresh_clicks, stored_data, raw_data):
1572
+ # This callback now only fires on click or refresh
1573
+ ctx = dash.callback_context
1574
+ triggered_id = ctx.triggered[0]["prop_id"].split(".")[0]
1575
+
1576
+ # If nothing triggered this, or data is missing, show the initial message
1577
+ if not triggered_id or not stored_data or not raw_data:
1578
+ return "", [], [], "", {"display": "none"}, "", {"display": "none"}, [], {"display": "flex"}, None
1579
+
1580
+ # We need to know which topic is currently selected if we are refreshing
1581
+ if triggered_id == "refresh-dialogs-btn":
1582
+ # To refresh, we would need to know the current topic. This requires
1583
+ # getting it from a store. For simplicity, we can just use the last clickData.
1584
+ # A more robust solution would use another dcc.Store for the *active* topic.
1585
+ # For now, if there is no click_data, a refresh does nothing.
1586
+ if not click_data:
1587
+ return dash.no_update
1588
+
1589
+ topic_name = click_data["points"][0]["customdata"][0]
 
 
 
 
 
1590
  df_stored = pd.DataFrame(stored_data)
1591
  topic_data = df_stored[df_stored["deduplicated_topic_name"] == topic_name].iloc[0]
1592
 
1593
+ # Use the pre-processed data from the store - this is the fast part!
1594
+ df_full = pd.DataFrame(raw_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
1595
  topic_conversations = df_full[df_full["deduplicated_topic_name"] == topic_name]
1596
 
1597
+ # --- From here, all the UI building code is the same ---
1598
  title = html.Div([html.Span(topic_name)])
 
 
1599
  metadata_items = [
1600
  html.Div(
1601
  [
 
1603
  html.Span(f"{int(topic_data['count'])} dialogs"),
1604
  html.Button(
1605
  [
1606
+ html.I(className="fas fa-table", style={"marginRight": "0.25rem"}),
1607
+ "Show all dialogs",
 
 
1608
  ],
1609
  id="show-all-dialogs-btn",
1610
  className="show-dialogs-btn",
 
1615
  style={"display": "flex", "alignItems": "center", "width": "100%"},
1616
  ),
1617
  ]
 
 
1618
  metrics_boxes = [
1619
  html.Div(
1620
  [
 
1639
  ),
1640
  ]
1641
 
 
1642
  root_causes_output = ""
1643
  root_causes_section_style = {"display": "none"}
 
 
1644
  if "root_cause_subcluster" in topic_conversations.columns:
 
 
 
 
1645
  filtered_root_causes = [
1646
+ rc for rc in topic_conversations["root_cause_subcluster"].dropna().unique()
1647
+ if rc not in ["Sub-clustering disabled", "Not eligible for sub-clustering", "No valid root causes", "No Subcluster", "Unclustered", ""]
 
 
 
 
 
 
 
 
 
1648
  ]
 
 
 
 
 
 
1649
  if filtered_root_causes:
 
1650
  root_causes_output = html.Div(
1651
  [
1652
  html.Div(
1653
  [
1654
+ html.I(className="fas fa-exclamation-triangle root-cause-tag-icon"),
 
 
1655
  html.Span(root_cause, style={"marginRight": "6px"}),
1656
  html.I(
1657
  className="fas fa-external-link-alt root-cause-click-icon",
1658
  id={"type": "root-cause-icon", "index": root_cause},
1659
  title="Click to see specific chats assigned with this root cause.",
1660
+ style={"cursor": "pointer", "fontSize": "0.55rem", "opacity": "0.8"},
 
 
 
 
1661
  ),
1662
  ],
1663
  className="root-cause-tag",
 
1669
  )
1670
  root_causes_section_style = {"display": "block"}
1671
 
 
1672
  tags_list = []
1673
+ if "consolidated_tags" in topic_conversations.columns:
1674
+ for tags_str in topic_conversations["consolidated_tags"].dropna():
1675
+ tags_list.extend([tag.strip() for tag in tags_str.split(",") if tag.strip()])
1676
+
 
 
 
1677
  tag_counts = {}
1678
  for tag in tags_list:
1679
  tag_counts[tag] = tag_counts.get(tag, 0) + 1
1680
 
1681
+ sorted_tags = sorted(tag_counts.items(), key=lambda x: (-x[1], x[0]))[:15]
 
 
 
 
 
1682
 
 
1683
  tags_section_style = {"display": "none"}
1684
  if sorted_tags:
 
1685
  tags_output = html.Div(
1686
  [
1687
  html.Div(
 
1698
  tags_section_style = {"display": "block"}
1699
  else:
1700
  tags_output = html.Div(
1701
+ [html.I(className="fas fa-info-circle", style={"marginRight": "5px"}), "No tags found for this topic"],
 
 
 
1702
  className="no-tags-message",
1703
  )
1704
 
 
1705
  sample_size = min(5, len(topic_conversations))
1706
  if sample_size > 0:
1707
+ samples = topic_conversations.sample(n=sample_size)
 
 
1708
  dialog_items = []
1709
  for _, row in samples.iterrows():
1710
+ tags = [
1711
+ html.Span(row["Sentiment"], className="dialog-tag tag-sentiment"),
1712
+ html.Span(row["Resolution"], className="dialog-tag tag-resolution"),
1713
+ html.Span(row["Urgency"], className="dialog-tag tag-urgency"),
1714
+ ]
 
 
 
 
 
 
1715
  if "id" in row:
1716
+ tags.append(html.Span(
1717
+ [f"Chat ID: {row['id']} ", html.I(className="fas fa-arrow-up-right-from-square conversation-icon", id={"type": "conversation-icon", "index": row["id"]}, title="View full conversation", style={"marginLeft": "0.25rem"})],
1718
+ className="dialog-tag tag-chat-id", style={"display": "inline-flex", "alignItems": "center"}
1719
+ ))
1720
+ if "Root_Cause" in row and pd.notna(row["Root_Cause"]) and row["Root_Cause"] != "na":
1721
+ tags.append(html.Span(f"Root Cause: {row['Root_Cause']}", className="dialog-tag tag-root-cause"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1722
 
1723
  dialog_items.append(
1724
  html.Div(
1725
+ [html.Div(row["Summary"], className="dialog-summary"), html.Div(tags, className="dialog-metadata")],
 
 
 
 
 
 
1726
  className="dialog-item",
1727
  )
1728
  )
 
1729
  sample_dialogs = dialog_items
1730
  else:
1731
+ sample_dialogs = [html.Div("No sample dialogs available for this topic.", style={"color": "var(--muted-foreground)"})]
 
 
 
 
 
1732
 
1733
  return (
1734
  title,
 
1740
  tags_section_style,
1741
  sample_dialogs,
1742
  {"display": "none"},
1743
+ {"topic_name": topic_name}, # Pass only the topic name
1744
  )
1745
 
1746
 
1747
+ # NEW: Updated to use raw-data store
1748
  @callback(
1749
  [
1750
  Output("conversation-modal", "style"),
 
1752
  Output("conversation-subheader", "children"),
1753
  ],
1754
  [Input({"type": "conversation-icon", "index": dash.dependencies.ALL}, "n_clicks")],
1755
+ [State("raw-data", "data")],
1756
  prevent_initial_call=True,
1757
  )
1758
+ def open_conversation_modal(n_clicks_list, raw_data):
1759
+ if not any(n_clicks_list) or not raw_data:
 
1760
  return {"display": "none"}, "", ""
1761
 
 
1762
  ctx = dash.callback_context
1763
  if not ctx.triggered:
1764
+ return {"display": "none"}, "", ""
1765
+
 
 
 
1766
  triggered_id = ctx.triggered[0]["prop_id"]
1767
  chat_id = json.loads(triggered_id.split(".")[0])["index"]
1768
 
1769
+ df_full = pd.DataFrame(raw_data)
1770
+
 
 
 
 
 
 
 
 
 
 
 
 
 
1771
  conversation_row = df_full[df_full["id"] == chat_id]
1772
  if len(conversation_row) == 0:
1773
  conversation_text = "Conversation not found."
 
1775
  else:
1776
  row = conversation_row.iloc[0]
1777
  conversation_text = row.get("conversation", "No conversation data available.")
 
 
1778
  cluster_name = row.get("deduplicated_topic_name", "Unknown cluster")
 
 
1779
  subheader_content = html.Div(
1780
  [
1781
+ html.Span(f"Chat ID: {chat_id}", style={"fontWeight": "600", "marginRight": "1rem"}),
1782
+ html.Span(f"Cluster: {cluster_name}", style={"color": "hsl(215.4, 16.3%, 46.9%)"}),
 
 
 
 
 
 
1783
  ]
1784
  )
 
1785
  return {"display": "flex"}, conversation_text, subheader_content
1786
 
1787
 
1788
+ # Callback to close modal (no changes needed)
1789
  @callback(
1790
  Output("conversation-modal", "style", allow_duplicate=True),
1791
  [Input("close-modal-btn", "n_clicks")],
 
1794
  def close_conversation_modal(n_clicks):
1795
  if n_clicks:
1796
  return {"display": "none"}
1797
+ return dash.no_update
1798
 
1799
 
1800
+ # NEW: Updated to use raw-data store
1801
  @callback(
1802
  [
1803
  Output("dialogs-table-modal", "style"),
 
1805
  Output("dialogs-table-content", "children"),
1806
  ],
1807
  [Input("show-all-dialogs-btn", "n_clicks")],
1808
+ [State("selected-topic-store", "data"), State("raw-data", "data")],
1809
  prevent_initial_call=True,
1810
  )
1811
+ def open_dialogs_table_modal(n_clicks, selected_topic_data, raw_data):
1812
+ if not n_clicks or not selected_topic_data or not raw_data:
1813
  return {"display": "none"}, "", ""
1814
 
1815
  topic_name = selected_topic_data["topic_name"]
1816
+ df_full = pd.DataFrame(raw_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1817
 
 
1818
  topic_conversations = df_full[df_full["deduplicated_topic_name"] == topic_name]
1819
+
1820
+ table_rows = [
1821
+ html.Tr([
1822
+ html.Th("Chat ID"), html.Th("Summary"), html.Th("Root Cause"),
1823
+ html.Th("Sentiment"), html.Th("Resolution"), html.Th("Urgency"),
1824
+ html.Th("Tags"), html.Th("Action"),
1825
+ ])
1826
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
1827
  for _, row in topic_conversations.iterrows():
1828
+ tags_display = "No tags"
1829
+ if "consolidated_tags" in row and pd.notna(row["consolidated_tags"]):
1830
+ tags = [tag.strip() for tag in row["consolidated_tags"].split(",") if tag.strip()]
1831
+ tags_display = html.Div([
1832
+ html.Span(tag, className="dialog-tag-small", style={"backgroundColor": "#6c757d", "color": "white"}) for tag in tags[:3]
1833
+ ] + ([html.Span(f"+{len(tags) - 3}", className="dialog-tag-small", style={"backgroundColor": "#6c757d", "color": "white"})] if len(tags) > 3 else []))
1834
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1835
  table_rows.append(
1836
+ html.Tr([
1837
+ html.Td(row["id"], style={"fontFamily": "monospace", "fontSize": "0.8rem"}),
1838
+ html.Td(row.get("Summary", "No summary"), className="dialog-summary-cell"),
1839
+ html.Td(html.Span(str(row.get("Root_Cause", "Unknown")).capitalize() if pd.notna(row.get("Root_Cause")) else "Unknown", className="dialog-tag-small", style={"backgroundColor": "#8B4513", "color": "white"})),
1840
+ html.Td(html.Span(row.get("Sentiment", "Unknown").capitalize(), className="dialog-tag-small", style={"backgroundColor": "#dc3545" if row.get("Sentiment") == "negative" else "#6c757d", "color": "white"})),
1841
+ html.Td(html.Span(row.get("Resolution", "Unknown").capitalize(), className="dialog-tag-small", style={"backgroundColor": "#dc3545" if row.get("Resolution") == "unresolved" else "#6c757d", "color": "white"})),
1842
+ html.Td(html.Span(row.get("Urgency", "Unknown").capitalize(), className="dialog-tag-small", style={"backgroundColor": "#dc3545" if row.get("Urgency") == "urgent" else "#6c757d", "color": "white"})),
1843
+ html.Td(tags_display, className="dialog-tags-cell"),
1844
+ html.Td(html.Button([html.I(className="fas fa-eye", style={"marginRight": "0.25rem"}), "View chat"], id={"type": "open-chat-btn", "index": row["id"]}, className="open-chat-btn")),
1845
+ ])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1846
  )
 
1847
  table = html.Table(table_rows, className="dialogs-table")
1848
+ modal_title = f"All dialogs in Topic: {topic_name} ({len(topic_conversations)} dialogs)"
 
 
 
 
1849
  return {"display": "flex"}, modal_title, table
1850
 
1851
 
1852
+ # Callback to close dialogs table modal (no changes needed)
1853
  @callback(
1854
  Output("dialogs-table-modal", "style", allow_duplicate=True),
1855
  [Input("close-dialogs-modal-btn", "n_clicks")],
 
1858
  def close_dialogs_table_modal(n_clicks):
1859
  if n_clicks:
1860
  return {"display": "none"}
1861
+ return dash.no_update
1862
 
1863
 
1864
+ # NEW: Updated to use raw-data store
1865
  @callback(
1866
  [
1867
  Output("conversation-modal", "style", allow_duplicate=True),
 
1869
  Output("conversation-subheader", "children", allow_duplicate=True),
1870
  ],
1871
  [Input({"type": "open-chat-btn", "index": dash.dependencies.ALL}, "n_clicks")],
1872
+ [State("raw-data", "data")],
1873
  prevent_initial_call=True,
1874
  )
1875
+ def open_conversation_from_table(n_clicks_list, raw_data):
1876
+ if not any(n_clicks_list) or not raw_data:
 
1877
  return {"display": "none"}, "", ""
1878
 
 
1879
  ctx = dash.callback_context
1880
  if not ctx.triggered:
1881
  return {"display": "none"}, "", ""
1882
 
 
1883
  triggered_id = ctx.triggered[0]["prop_id"]
1884
  chat_id = json.loads(triggered_id.split(".")[0])["index"]
1885
 
1886
+ df_full = pd.DataFrame(raw_data)
1887
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1888
  conversation_row = df_full[df_full["id"] == chat_id]
 
 
 
 
 
 
 
 
 
 
 
 
 
1889
  if len(conversation_row) == 0:
1890
+ conversation_text = f"Conversation not found for Chat ID: {chat_id}"
1891
  subheader_content = f"Chat ID: {chat_id} (Not Found)"
1892
  else:
1893
+ row = conversation_row.iloc[0]
1894
+ conversation_text = row.get("conversation", "No conversation data available.")
1895
+ subheader_content = f"Chat ID: {chat_id} | Topic: {row.get('deduplicated_topic_name', 'Unknown')} | Sentiment: {row.get('Sentiment', 'Unknown')} | Resolution: {row.get('Resolution', 'Unknown')}"
 
 
 
 
 
 
1896
  return {"display": "flex"}, conversation_text, subheader_content
1897
 
1898
 
1899
+ # NEW: Updated to use raw-data store
1900
  @callback(
1901
  [
1902
  Output("root-cause-modal", "style"),
 
1904
  Output("root-cause-table-content", "children"),
1905
  ],
1906
  [Input({"type": "root-cause-icon", "index": dash.dependencies.ALL}, "n_clicks")],
1907
+ [State("selected-topic-store", "data"), State("raw-data", "data")],
1908
  prevent_initial_call=True,
1909
  )
1910
+ def open_root_cause_modal(n_clicks_list, selected_topic_data, raw_data):
1911
+ if not any(n_clicks_list) or not selected_topic_data or not raw_data:
 
1912
  return {"display": "none"}, "", ""
1913
 
 
1914
  ctx = dash.callback_context
1915
  if not ctx.triggered:
1916
  return {"display": "none"}, "", ""
1917
 
1918
  triggered_id = ctx.triggered[0]["prop_id"]
1919
  root_cause = json.loads(triggered_id.split(".")[0])["index"]
 
1920
  topic_name = selected_topic_data["topic_name"]
1921
+ df_full = pd.DataFrame(raw_data)
1922
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1923
  filtered_conversations = df_full[
1924
  (df_full["deduplicated_topic_name"] == topic_name)
1925
  & (df_full["root_cause_subcluster"] == root_cause)
1926
  ]
1927
 
1928
+ table_rows = [
1929
+ html.Tr([
1930
+ html.Th("Chat ID"), html.Th("Summary"), html.Th("Sentiment"),
1931
+ html.Th("Resolution"), html.Th("Urgency"), html.Th("Tags"), html.Th("Action"),
1932
+ ])
1933
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
1934
  for _, row in filtered_conversations.iterrows():
1935
+ tags_display = "No tags"
1936
+ if "consolidated_tags" in row and pd.notna(row["consolidated_tags"]):
1937
+ tags = [tag.strip() for tag in row["consolidated_tags"].split(",") if tag.strip()]
1938
+ tags_display = html.Div([
1939
+ html.Span(tag, className="dialog-tag-small", style={"backgroundColor": "#6c757d", "color": "white"}) for tag in tags[:3]
1940
+ ] + ([html.Span(f"+{len(tags) - 3}", className="dialog-tag-small", style={"backgroundColor": "#6c757d", "color": "white"})] if len(tags) > 3 else []))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1941
 
1942
  table_rows.append(
1943
+ html.Tr([
1944
+ html.Td(row["id"], style={"fontFamily": "monospace", "fontSize": "0.8rem"}),
1945
+ html.Td(row.get("Summary", "No summary"), className="dialog-summary-cell"),
1946
+ html.Td(html.Span(row.get("Sentiment", "Unknown").capitalize(), className="dialog-tag-small", style={"backgroundColor": "#dc3545" if row.get("Sentiment") == "negative" else "#6c757d", "color": "white"})),
1947
+ html.Td(html.Span(row.get("Resolution", "Unknown").capitalize(), className="dialog-tag-small", style={"backgroundColor": "#dc3545" if row.get("Resolution") == "unresolved" else "#6c757d", "color": "white"})),
1948
+ html.Td(html.Span(row.get("Urgency", "Unknown").capitalize(), className="dialog-tag-small", style={"backgroundColor": "#dc3545" if row.get("Urgency") == "urgent" else "#6c757d", "color": "white"})),
1949
+ html.Td(tags_display, className="dialog-tags-cell"),
1950
+ html.Td(html.Button([html.I(className="fas fa-eye", style={"marginRight": "0.25rem"}), "View chat"], id={"type": "open-chat-btn-rc", "index": row["id"]}, className="open-chat-btn")),
1951
+ ])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1952
  )
1953
+
1954
  table = html.Table(table_rows, className="dialogs-table")
1955
+ modal_title = f"Dialogs for Root Cause: {root_cause} (in Topic: {topic_name})"
 
1956
  count_info = html.P(
1957
+ f"Found {len(filtered_conversations)} dialogs with this root cause.",
1958
+ style={"margin": "0 0 1rem 0", "color": "var(--muted-foreground)", "fontSize": "0.875rem"},
 
 
 
 
1959
  )
 
1960
  content = html.Div([count_info, table])
 
1961
  return {"display": "flex"}, modal_title, content
1962
 
1963
 
1964
+ # Callback to close root cause modal (no changes needed)
1965
  @callback(
1966
  Output("root-cause-modal", "style", allow_duplicate=True),
1967
  [Input("close-root-cause-modal-btn", "n_clicks")],
 
1970
  def close_root_cause_modal(n_clicks):
1971
  if n_clicks:
1972
  return {"display": "none"}
1973
+ return dash.no_update
1974
 
1975
 
1976
+ # NEW: Updated to use raw-data store
1977
  @callback(
1978
  [
1979
  Output("conversation-modal", "style", allow_duplicate=True),
 
1981
  Output("conversation-subheader", "children", allow_duplicate=True),
1982
  ],
1983
  [Input({"type": "open-chat-btn-rc", "index": dash.dependencies.ALL}, "n_clicks")],
1984
+ [State("raw-data", "data")],
1985
  prevent_initial_call=True,
1986
  )
1987
+ def open_conversation_from_root_cause_table(n_clicks_list, raw_data):
1988
+ if not any(n_clicks_list) or not raw_data:
 
1989
  return {"display": "none"}, "", ""
1990
 
 
1991
  ctx = dash.callback_context
1992
  if not ctx.triggered:
1993
  return {"display": "none"}, "", ""
1994
+
1995
  triggered_id = ctx.triggered[0]["prop_id"]
1996
  chat_id = json.loads(triggered_id.split(".")[0])["index"]
1997
 
1998
+ df_full = pd.DataFrame(raw_data)
1999
+
 
 
 
 
 
 
 
 
 
 
 
 
 
2000
  conversation_row = df_full[df_full["id"] == chat_id]
 
 
2001
  if len(conversation_row) == 0:
2002
  conversation_row = df_full[df_full["id"].astype(str) == str(chat_id)]
2003
 
 
 
 
 
 
 
2004
  if len(conversation_row) == 0:
2005
  conversation_text = f"Conversation not found for Chat ID: {chat_id}"
2006
  subheader_content = f"Chat ID: {chat_id} (Not Found)"
2007
  else:
2008
  row = conversation_row.iloc[0]
2009
  conversation_text = row.get("conversation", "No conversation data available.")
 
 
2010
  root_cause = row.get("root_cause_subcluster", "Unknown")
2011
  cluster_name = row.get("deduplicated_topic_name", "Unknown cluster")
2012
+ subheader_content = html.Div([
2013
+ html.Span(f"Chat ID: {chat_id}", style={"fontWeight": "600", "marginRight": "1rem"}),
2014
+ html.Span(f"Cluster: {cluster_name}", style={"color": "hsl(215.4, 16.3%, 46.9%)", "marginRight": "1rem"}),
2015
+ html.Span(f"Root Cause: {root_cause}", style={"color": "#8b6f47", "fontWeight": "500"}),
2016
+ ])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2017
  return {"display": "flex"}, conversation_text, subheader_content
2018
 
2019
+ # IMPORTANT: Expose the server for Gunicorn
2020
  server = app.server
2021
 
2022
+ if __name__ == "__main__":
 
 
 
 
2023
  app.run_server(debug=True)