Pratik Bhavsar
added data exploration
5c8703b
raw
history blame
11.9 kB
import gradio as gr
import pandas as pd
import json
def get_updated_df(df, df_output):
df = df.iloc[: len(df_output)].copy()
df["response"] = df_output["response"].tolist()
df["rationale"] = df_output["rationale"].tolist()
df["explanation"] = df_output["explanation"].tolist()
df["score"] = df_output["score"].tolist()
cols = [
"conversation",
"tools_langchain",
"n_turns",
"len_query",
"n_tools",
"response",
"rationale",
"explanation",
"score",
]
return df[cols]
def get_chat_and_score_df(model, dataset):
df_output = pd.read_parquet(f"output/{model}/{dataset}.parquet")
df = pd.read_parquet(f"datasets/{dataset}.parquet")
df = get_updated_df(df, df_output)
return df
def format_chat_message(role, content, is_response=False):
"""Format individual chat messages with alignment based on role."""
role_style = role.lower()
alignment = "flex-end" if role_style == "user" else "flex-start"
max_width = "80%"
# Clean up any excessive whitespace while preserving intentional line breaks
cleaned_content = "\n".join(line.strip() for line in content.split("\n"))
background_color = (
"var(--response-bg)" if is_response else f"var(--message-bg-{role_style})"
)
return f"""
<div style="
display: flex;
justify-content: {alignment};
margin: 0.75rem 0;">
<div style="
max-width: {max_width};
padding: 1rem;
border-radius: 12px;
background-color: {background_color};
border: 1px solid var(--border-color);
box-shadow: 0 1px 2px var(--shadow-color);">
<div style="
font-weight: 600;
color: var(--primary-text);
margin-bottom: 0.5rem;
font-size: 0.9rem;
text-transform: uppercase;">
{role + (" Response" if is_response else "")}
</div>
<div style="
color: var(--text-color);
line-height: 1.6;
white-space: pre-wrap;
font-family: {is_response and 'monospace' or 'inherit'};
font-size: {is_response and '0.9rem' or 'inherit'};">
{cleaned_content}
</div>
</div>
</div>
"""
def format_response(response):
"""Format the response data, handling both JSON and text."""
try:
# Try to parse as JSON
response_data = json.loads(response)
# Format JSON response nicely
formatted_response = json.dumps(response_data, indent=2)
except (json.JSONDecodeError, TypeError):
# If not JSON, use as is
formatted_response = str(response)
return formatted_response
def parse_tool_schema(tool):
"""Parse tool schema to extract name, description, and parameters properly."""
name = tool.get("title", "Unnamed Tool")
description = tool.get("description", "No description available")
parameters = {}
if "properties" in tool:
for param_name, param_data in tool["properties"].items():
param_desc = param_data.get("description", "No description")
param_type = param_data.get("type", "unknown")
parameters[param_name] = f"{param_desc} (Type: {param_type})"
return name, description, parameters
def format_tool_info(tools):
"""Format tool information with improved schema parsing and dark theme support."""
if isinstance(tools, str):
try:
tools = json.loads(tools)
except:
return '<div style="padding: 1rem; color: var(--text-muted);">No tool information available</div>'
if not tools:
return '<div style="padding: 1rem; color: var(--text-muted);">No tool information available</div>'
tool_html = ""
for tool in tools:
name, description, parameters = parse_tool_schema(tool)
tool_html += f"""
<div style="
margin: 1rem 0;
padding: 1.5rem;
border-radius: 8px;
background-color: var(--surface-color);
border: 1px solid var(--border-color);">
<div style="
font-weight: 600;
color: var(--primary-text);
margin-bottom: 0.75rem;
font-size: 1.1rem;">
{name}
</div>
<div style="
color: var(--text-color);
margin-bottom: 1rem;
line-height: 1.5;">
{description}
</div>
<div style="
background-color: var(--surface-color-alt);
padding: 1rem;
border-radius: 4px;
border: 1px solid var(--border-color);">
{format_parameters(parameters)}
</div>
</div>
"""
return f"""
<div style="
max-height: 600px;
overflow-y: auto;
padding-right: 0.5rem;">
<style>
:root[data-theme="light"] {{
--surface-color: #f8f9fa;
--surface-color-alt: #ffffff;
--text-color: #202124;
--text-muted: #666666;
--primary-text: #1a73e8;
--border-color: #e9ecef;
--shadow-color: rgba(0,0,0,0.1);
--message-bg-user: #E5F6FD;
--message-bg-assistant: #F7F7F8;
--message-bg-system: #FFF3E0;
--score-high: #1a73e8;
--score-med: #f4b400;
--score-low: #ea4335;
}}
:root[data-theme="dark"] {{
--surface-color: #1e1e1e;
--surface-color-alt: #2d2d2d;
--text-color: #ffffff;
--text-muted: #a0a0a0;
--primary-text: #60a5fa;
--border-color: #404040;
--shadow-color: rgba(0,0,0,0.3);
--message-bg-user: #2d3748;
--message-bg-assistant: #1a1a1a;
--message-bg-system: #2c2516;
--response-bg: #2a2f3a;
--score-high: #60a5fa;
--score-med: #fbbf24;
--score-low: #ef4444;
}}
</style>
{tool_html}
</div>
"""
def format_parameters(parameters):
if not parameters:
return '<div style="color: var(--text-muted);">No parameters</div>'
params_html = ""
for name, desc in parameters.items():
params_html += f"""
<div style="margin: 0.75rem 0;">
<div style="
font-weight: 500;
color: var(--primary-text);
margin-bottom: 0.25rem;">
{name}
</div>
<div style="
color: var(--text-color);
line-height: 1.4;
font-size: 0.95rem;">
{desc}
</div>
</div>
"""
return params_html
def format_metrics(score, rationale, explanation):
"""Format metrics display with improved dark theme support."""
score_color = (
"var(--score-high)"
if score >= 0.7
else "var(--score-med)" if score >= 0.4 else "var(--score-low)"
)
return f"""
<div style="
padding: 1.5rem;
background-color: var(--surface-color);
border-radius: 8px;
border: 1px solid var(--border-color);
box-shadow: 0 2px 4px var(--shadow-color);">
<div style="margin-bottom: 1.5rem;">
<h3 style="
color: var(--text-color);
font-size: 1.1rem;
margin-bottom: 0.5rem;
font-weight: 600;">TSQ Score</h3>
<div style="
font-size: 2rem;
font-weight: 600;
color: {score_color};">
{score:.2f}
</div>
</div>
<div style="margin-bottom: 1.5rem;">
<h3 style="
color: var(--text-color);
font-size: 1.1rem;
margin-bottom: 0.5rem;
font-weight: 600;">Rationale</h3>
<div style="
color: var(--text-color);
line-height: 1.5;">
{rationale}
</div>
</div>
<div>
<h3 style="
color: var(--text-color);
font-size: 1.1rem;
margin-bottom: 0.5rem;
font-weight: 600;">Explanation</h3>
<div style="
color: var(--text-color);
line-height: 1.5;">
{explanation}
</div>
</div>
</div>
"""
def update_chat_display(df, index):
"""Update the chat visualization with improved dark theme support."""
if df is None or df.empty or index >= len(df):
return (
'<div style="padding: 1rem; color: var(--text-muted);">No data available</div>',
'<div style="padding: 1rem; color: var(--text-muted);">No metrics available</div>',
'<div style="padding: 1rem; color: var(--text-muted);">No tool information available</div>',
)
row = df.iloc[index]
messages = json.loads(row["conversation"])
response = row["response"]
formatted_response = format_response(response)
# Create list of all messages including the response
all_messages = [
format_chat_message(msg["role"], msg["content"]) for msg in messages
]
all_messages.append(
format_chat_message("Assistant", formatted_response, is_response=True)
)
chat_html = f"""
<div style="
background-color: var(--surface-color);
border-radius: 8px;
border: 1px solid var(--border-color);
box-shadow: 0 2px 4px var(--shadow-color);
padding: 1.5rem;">
{"".join(all_messages)}
</div>
"""
metrics_html = format_metrics(row["score"], row["rationale"], row["explanation"])
tool_html = format_tool_info(row["tools_langchain"])
return chat_html, metrics_html, tool_html
def filter_and_update_display(model, dataset, min_score, max_score, current_index):
try:
df_chat = get_chat_and_score_df(model, dataset)
df_chat = df_chat[
(df_chat["score"] >= min_score) & (df_chat["score"] <= max_score)
]
if df_chat.empty:
return (
'<div style="padding: 1rem; color: var(--text-muted);">No data available for selected filters</div>',
'<div style="padding: 1rem; color: var(--text-muted);">No metrics available</div>',
'<div style="padding: 1rem; color: var(--text-muted);">No tool information available</div>',
"0/0",
)
max_index = len(df_chat) - 1
current_index = min(current_index, max_index)
chat_html, metrics_html, tool_html = update_chat_display(df_chat, current_index)
index_display = f'<div style="font-weight: 500; color: var(--primary-text);">{current_index + 1}/{len(df_chat)}</div>'
return chat_html, metrics_html, tool_html, index_display
except Exception as e:
error_html = f"""
<div style="
padding: 1rem;
color: var(--score-low);
background-color: var(--surface-color);
border: 1px solid var(--score-low);
border-radius: 4px;">
Error: {str(e)}
</div>
"""
return (
error_html,
'<div style="padding: 1rem; color: var(--text-muted);">No metrics available</div>',
'<div style="padding: 1rem; color: var(--text-muted);">No tool information available</div>',
"0/0",
)