Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import gradio as gr | |
import pandas as pd | |
import json | |
def get_updated_df(df, df_output): | |
df = df.iloc[: len(df_output)].copy() | |
df["response"] = df_output["response"].tolist() | |
df["rationale"] = df_output["rationale"].tolist() | |
df["explanation"] = df_output["explanation"].tolist() | |
df["score"] = df_output["score"].tolist() | |
cols = [ | |
"conversation", | |
"tools_langchain", | |
"n_turns", | |
"len_query", | |
"n_tools", | |
"response", | |
"rationale", | |
"explanation", | |
"score", | |
] | |
return df[cols] | |
def get_chat_and_score_df(model, dataset): | |
df_output = pd.read_parquet(f"output/{model}/{dataset}.parquet") | |
df = pd.read_parquet(f"datasets/{dataset}.parquet") | |
df = get_updated_df(df, df_output) | |
return df | |
def format_chat_message(role, content, is_response=False): | |
"""Format individual chat messages with alignment based on role.""" | |
role_style = role.lower() | |
alignment = "flex-end" if role_style == "user" else "flex-start" | |
max_width = "80%" | |
# Clean up any excessive whitespace while preserving intentional line breaks | |
cleaned_content = "\n".join(line.strip() for line in content.split("\n")) | |
background_color = ( | |
"var(--response-bg)" if is_response else f"var(--message-bg-{role_style})" | |
) | |
return f""" | |
<div style=" | |
display: flex; | |
justify-content: {alignment}; | |
margin: 0.75rem 0;"> | |
<div style=" | |
max-width: {max_width}; | |
padding: 1rem; | |
border-radius: 12px; | |
background-color: {background_color}; | |
border: 1px solid var(--border-color); | |
box-shadow: 0 1px 2px var(--shadow-color);"> | |
<div style=" | |
font-weight: 600; | |
color: var(--primary-text); | |
margin-bottom: 0.5rem; | |
font-size: 0.9rem; | |
text-transform: uppercase;"> | |
{role + (" Response" if is_response else "")} | |
</div> | |
<div style=" | |
color: var(--text-color); | |
line-height: 1.6; | |
white-space: pre-wrap; | |
font-family: {is_response and 'monospace' or 'inherit'}; | |
font-size: {is_response and '0.9rem' or 'inherit'};"> | |
{cleaned_content} | |
</div> | |
</div> | |
</div> | |
""" | |
def format_response(response): | |
"""Format the response data, handling both JSON and text.""" | |
try: | |
# Try to parse as JSON | |
response_data = json.loads(response) | |
# Format JSON response nicely | |
formatted_response = json.dumps(response_data, indent=2) | |
except (json.JSONDecodeError, TypeError): | |
# If not JSON, use as is | |
formatted_response = str(response) | |
return formatted_response | |
def parse_tool_schema(tool): | |
"""Parse tool schema to extract name, description, and parameters properly.""" | |
name = tool.get("title", "Unnamed Tool") | |
description = tool.get("description", "No description available") | |
parameters = {} | |
if "properties" in tool: | |
for param_name, param_data in tool["properties"].items(): | |
param_desc = param_data.get("description", "No description") | |
param_type = param_data.get("type", "unknown") | |
parameters[param_name] = f"{param_desc} (Type: {param_type})" | |
return name, description, parameters | |
def format_tool_info(tools): | |
"""Format tool information with improved schema parsing and dark theme support.""" | |
if isinstance(tools, str): | |
try: | |
tools = json.loads(tools) | |
except: | |
return '<div style="padding: 1rem; color: var(--text-muted);">No tool information available</div>' | |
if not tools: | |
return '<div style="padding: 1rem; color: var(--text-muted);">No tool information available</div>' | |
tool_html = "" | |
for tool in tools: | |
name, description, parameters = parse_tool_schema(tool) | |
tool_html += f""" | |
<div style=" | |
margin: 1rem 0; | |
padding: 1.5rem; | |
border-radius: 8px; | |
background-color: var(--surface-color); | |
border: 1px solid var(--border-color);"> | |
<div style=" | |
font-weight: 600; | |
color: var(--primary-text); | |
margin-bottom: 0.75rem; | |
font-size: 1.1rem;"> | |
{name} | |
</div> | |
<div style=" | |
color: var(--text-color); | |
margin-bottom: 1rem; | |
line-height: 1.5;"> | |
{description} | |
</div> | |
<div style=" | |
background-color: var(--surface-color-alt); | |
padding: 1rem; | |
border-radius: 4px; | |
border: 1px solid var(--border-color);"> | |
{format_parameters(parameters)} | |
</div> | |
</div> | |
""" | |
return f""" | |
<div style=" | |
max-height: 600px; | |
overflow-y: auto; | |
padding-right: 0.5rem;"> | |
<style> | |
:root[data-theme="light"] {{ | |
--surface-color: #f8f9fa; | |
--surface-color-alt: #ffffff; | |
--text-color: #202124; | |
--text-muted: #666666; | |
--primary-text: #1a73e8; | |
--border-color: #e9ecef; | |
--shadow-color: rgba(0,0,0,0.1); | |
--message-bg-user: #E5F6FD; | |
--message-bg-assistant: #F7F7F8; | |
--message-bg-system: #FFF3E0; | |
--score-high: #1a73e8; | |
--score-med: #f4b400; | |
--score-low: #ea4335; | |
}} | |
:root[data-theme="dark"] {{ | |
--surface-color: #1e1e1e; | |
--surface-color-alt: #2d2d2d; | |
--text-color: #ffffff; | |
--text-muted: #a0a0a0; | |
--primary-text: #60a5fa; | |
--border-color: #404040; | |
--shadow-color: rgba(0,0,0,0.3); | |
--message-bg-user: #2d3748; | |
--message-bg-assistant: #1a1a1a; | |
--message-bg-system: #2c2516; | |
--response-bg: #2a2f3a; | |
--score-high: #60a5fa; | |
--score-med: #fbbf24; | |
--score-low: #ef4444; | |
}} | |
</style> | |
{tool_html} | |
</div> | |
""" | |
def format_parameters(parameters): | |
if not parameters: | |
return '<div style="color: var(--text-muted);">No parameters</div>' | |
params_html = "" | |
for name, desc in parameters.items(): | |
params_html += f""" | |
<div style="margin: 0.75rem 0;"> | |
<div style=" | |
font-weight: 500; | |
color: var(--primary-text); | |
margin-bottom: 0.25rem;"> | |
{name} | |
</div> | |
<div style=" | |
color: var(--text-color); | |
line-height: 1.4; | |
font-size: 0.95rem;"> | |
{desc} | |
</div> | |
</div> | |
""" | |
return params_html | |
def format_metrics(score, rationale, explanation): | |
"""Format metrics display with improved dark theme support.""" | |
score_color = ( | |
"var(--score-high)" | |
if score >= 0.7 | |
else "var(--score-med)" if score >= 0.4 else "var(--score-low)" | |
) | |
return f""" | |
<div style=" | |
padding: 1.5rem; | |
background-color: var(--surface-color); | |
border-radius: 8px; | |
border: 1px solid var(--border-color); | |
box-shadow: 0 2px 4px var(--shadow-color);"> | |
<div style="margin-bottom: 1.5rem;"> | |
<h3 style=" | |
color: var(--text-color); | |
font-size: 1.1rem; | |
margin-bottom: 0.5rem; | |
font-weight: 600;">TSQ Score</h3> | |
<div style=" | |
font-size: 2rem; | |
font-weight: 600; | |
color: {score_color};"> | |
{score:.2f} | |
</div> | |
</div> | |
<div style="margin-bottom: 1.5rem;"> | |
<h3 style=" | |
color: var(--text-color); | |
font-size: 1.1rem; | |
margin-bottom: 0.5rem; | |
font-weight: 600;">Rationale</h3> | |
<div style=" | |
color: var(--text-color); | |
line-height: 1.5;"> | |
{rationale} | |
</div> | |
</div> | |
<div> | |
<h3 style=" | |
color: var(--text-color); | |
font-size: 1.1rem; | |
margin-bottom: 0.5rem; | |
font-weight: 600;">Explanation</h3> | |
<div style=" | |
color: var(--text-color); | |
line-height: 1.5;"> | |
{explanation} | |
</div> | |
</div> | |
</div> | |
""" | |
def update_chat_display(df, index): | |
"""Update the chat visualization with improved dark theme support.""" | |
if df is None or df.empty or index >= len(df): | |
return ( | |
'<div style="padding: 1rem; color: var(--text-muted);">No data available</div>', | |
'<div style="padding: 1rem; color: var(--text-muted);">No metrics available</div>', | |
'<div style="padding: 1rem; color: var(--text-muted);">No tool information available</div>', | |
) | |
row = df.iloc[index] | |
messages = json.loads(row["conversation"]) | |
response = row["response"] | |
formatted_response = format_response(response) | |
# Create list of all messages including the response | |
all_messages = [ | |
format_chat_message(msg["role"], msg["content"]) for msg in messages | |
] | |
all_messages.append( | |
format_chat_message("Assistant", formatted_response, is_response=True) | |
) | |
chat_html = f""" | |
<div style=" | |
background-color: var(--surface-color); | |
border-radius: 8px; | |
border: 1px solid var(--border-color); | |
box-shadow: 0 2px 4px var(--shadow-color); | |
padding: 1.5rem;"> | |
{"".join(all_messages)} | |
</div> | |
""" | |
metrics_html = format_metrics(row["score"], row["rationale"], row["explanation"]) | |
tool_html = format_tool_info(row["tools_langchain"]) | |
return chat_html, metrics_html, tool_html | |
def filter_and_update_display(model, dataset, min_score, max_score, current_index): | |
try: | |
df_chat = get_chat_and_score_df(model, dataset) | |
df_chat = df_chat[ | |
(df_chat["score"] >= min_score) & (df_chat["score"] <= max_score) | |
] | |
if df_chat.empty: | |
return ( | |
'<div style="padding: 1rem; color: var(--text-muted);">No data available for selected filters</div>', | |
'<div style="padding: 1rem; color: var(--text-muted);">No metrics available</div>', | |
'<div style="padding: 1rem; color: var(--text-muted);">No tool information available</div>', | |
"0/0", | |
) | |
max_index = len(df_chat) - 1 | |
current_index = min(current_index, max_index) | |
chat_html, metrics_html, tool_html = update_chat_display(df_chat, current_index) | |
index_display = f'<div style="font-weight: 500; color: var(--primary-text);">{current_index + 1}/{len(df_chat)}</div>' | |
return chat_html, metrics_html, tool_html, index_display | |
except Exception as e: | |
error_html = f""" | |
<div style=" | |
padding: 1rem; | |
color: var(--score-low); | |
background-color: var(--surface-color); | |
border: 1px solid var(--score-low); | |
border-radius: 4px;"> | |
Error: {str(e)} | |
</div> | |
""" | |
return ( | |
error_html, | |
'<div style="padding: 1rem; color: var(--text-muted);">No metrics available</div>', | |
'<div style="padding: 1rem; color: var(--text-muted);">No tool information available</div>', | |
"0/0", | |
) | |