Spaces:

galileo-ai
/

agent-leaderboard

Running on CPU Upgrade

Pratik Bhavsar

added data exploration

5c8703b 7 months ago

11.9 kB

	import gradio as gr
	import pandas as pd
	import json


	def get_updated_df(df, df_output):
	df = df.iloc[: len(df_output)].copy()
	df["response"] = df_output["response"].tolist()
	df["rationale"] = df_output["rationale"].tolist()
	df["explanation"] = df_output["explanation"].tolist()
	df["score"] = df_output["score"].tolist()
	cols = [
	"conversation",
	"tools_langchain",
	"n_turns",
	"len_query",
	"n_tools",
	"response",
	"rationale",
	"explanation",
	"score",
	]
	return df[cols]


	def get_chat_and_score_df(model, dataset):
	df_output = pd.read_parquet(f"output/{model}/{dataset}.parquet")
	df = pd.read_parquet(f"datasets/{dataset}.parquet")
	df = get_updated_df(df, df_output)
	return df


	def format_chat_message(role, content, is_response=False):
	"""Format individual chat messages with alignment based on role."""
	role_style = role.lower()
	alignment = "flex-end" if role_style == "user" else "flex-start"
	max_width = "80%"

	# Clean up any excessive whitespace while preserving intentional line breaks
	cleaned_content = "\n".join(line.strip() for line in content.split("\n"))

	background_color = (
	"var(--response-bg)" if is_response else f"var(--message-bg-{role_style})"
	)

	return f"""
	<div style="
	display: flex;
	justify-content: {alignment};
	margin: 0.75rem 0;">
	<div style="
	max-width: {max_width};
	padding: 1rem;
	border-radius: 12px;
	background-color: {background_color};
	border: 1px solid var(--border-color);
	box-shadow: 0 1px 2px var(--shadow-color);">
	<div style="
	font-weight: 600;
	color: var(--primary-text);
	margin-bottom: 0.5rem;
	font-size: 0.9rem;
	text-transform: uppercase;">
	{role + (" Response" if is_response else "")}
	</div>
	<div style="
	color: var(--text-color);
	line-height: 1.6;
	white-space: pre-wrap;
	font-family: {is_response and 'monospace' or 'inherit'};
	font-size: {is_response and '0.9rem' or 'inherit'};">
	{cleaned_content}
	</div>
	</div>
	</div>
	"""


	def format_response(response):
	"""Format the response data, handling both JSON and text."""
	try:
	# Try to parse as JSON
	response_data = json.loads(response)
	# Format JSON response nicely
	formatted_response = json.dumps(response_data, indent=2)
	except (json.JSONDecodeError, TypeError):
	# If not JSON, use as is
	formatted_response = str(response)

	return formatted_response


	def parse_tool_schema(tool):
	"""Parse tool schema to extract name, description, and parameters properly."""
	name = tool.get("title", "Unnamed Tool")
	description = tool.get("description", "No description available")

	parameters = {}
	if "properties" in tool:
	for param_name, param_data in tool["properties"].items():
	param_desc = param_data.get("description", "No description")
	param_type = param_data.get("type", "unknown")
	parameters[param_name] = f"{param_desc} (Type: {param_type})"

	return name, description, parameters


	def format_tool_info(tools):
	"""Format tool information with improved schema parsing and dark theme support."""
	if isinstance(tools, str):
	try:
	tools = json.loads(tools)
	except:
	return '<div style="padding: 1rem; color: var(--text-muted);">No tool information available</div>'

	if not tools:
	return '<div style="padding: 1rem; color: var(--text-muted);">No tool information available</div>'

	tool_html = ""
	for tool in tools:
	name, description, parameters = parse_tool_schema(tool)
	tool_html += f"""
	<div style="
	margin: 1rem 0;
	padding: 1.5rem;
	border-radius: 8px;
	background-color: var(--surface-color);
	border: 1px solid var(--border-color);">
	<div style="
	font-weight: 600;
	color: var(--primary-text);
	margin-bottom: 0.75rem;
	font-size: 1.1rem;">
	{name}
	</div>
	<div style="
	color: var(--text-color);
	margin-bottom: 1rem;
	line-height: 1.5;">
	{description}
	</div>
	<div style="
	background-color: var(--surface-color-alt);
	padding: 1rem;
	border-radius: 4px;
	border: 1px solid var(--border-color);">
	{format_parameters(parameters)}
	</div>
	</div>
	"""
	return f"""
	<div style="
	max-height: 600px;
	overflow-y: auto;
	padding-right: 0.5rem;">
	<style>
	:root[data-theme="light"] {{
	--surface-color: #f8f9fa;
	--surface-color-alt: #ffffff;
	--text-color: #202124;
	--text-muted: #666666;
	--primary-text: #1a73e8;
	--border-color: #e9ecef;
	--shadow-color: rgba(0,0,0,0.1);
	--message-bg-user: #E5F6FD;
	--message-bg-assistant: #F7F7F8;
	--message-bg-system: #FFF3E0;
	--score-high: #1a73e8;
	--score-med: #f4b400;
	--score-low: #ea4335;
	}}

	:root[data-theme="dark"] {{
	--surface-color: #1e1e1e;
	--surface-color-alt: #2d2d2d;
	--text-color: #ffffff;
	--text-muted: #a0a0a0;
	--primary-text: #60a5fa;
	--border-color: #404040;
	--shadow-color: rgba(0,0,0,0.3);
	--message-bg-user: #2d3748;
	--message-bg-assistant: #1a1a1a;
	--message-bg-system: #2c2516;
	--response-bg: #2a2f3a;
	--score-high: #60a5fa;
	--score-med: #fbbf24;
	--score-low: #ef4444;
	}}
	</style>
	{tool_html}
	</div>
	"""

	def format_parameters(parameters):
	if not parameters:
	return '<div style="color: var(--text-muted);">No parameters</div>'

	params_html = ""
	for name, desc in parameters.items():
	params_html += f"""
	<div style="margin: 0.75rem 0;">
	<div style="
	font-weight: 500;
	color: var(--primary-text);
	margin-bottom: 0.25rem;">
	{name}
	</div>
	<div style="
	color: var(--text-color);
	line-height: 1.4;
	font-size: 0.95rem;">
	{desc}
	</div>
	</div>
	"""
	return params_html

	def format_metrics(score, rationale, explanation):
	"""Format metrics display with improved dark theme support."""
	score_color = (
	"var(--score-high)"
	if score >= 0.7
	else "var(--score-med)" if score >= 0.4 else "var(--score-low)"
	)
	return f"""
	<div style="
	padding: 1.5rem;
	background-color: var(--surface-color);
	border-radius: 8px;
	border: 1px solid var(--border-color);
	box-shadow: 0 2px 4px var(--shadow-color);">
	<div style="margin-bottom: 1.5rem;">
	<h3 style="
	color: var(--text-color);
	font-size: 1.1rem;
	margin-bottom: 0.5rem;
	font-weight: 600;">TSQ Score</h3>
	<div style="
	font-size: 2rem;
	font-weight: 600;
	color: {score_color};">
	{score:.2f}
	</div>
	</div>
	<div style="margin-bottom: 1.5rem;">
	<h3 style="
	color: var(--text-color);
	font-size: 1.1rem;
	margin-bottom: 0.5rem;
	font-weight: 600;">Rationale</h3>
	<div style="
	color: var(--text-color);
	line-height: 1.5;">
	{rationale}
	</div>
	</div>
	<div>
	<h3 style="
	color: var(--text-color);
	font-size: 1.1rem;
	margin-bottom: 0.5rem;
	font-weight: 600;">Explanation</h3>
	<div style="
	color: var(--text-color);
	line-height: 1.5;">
	{explanation}
	</div>
	</div>
	</div>
	"""

	def update_chat_display(df, index):
	"""Update the chat visualization with improved dark theme support."""
	if df is None or df.empty or index >= len(df):
	return (
	'<div style="padding: 1rem; color: var(--text-muted);">No data available</div>',
	'<div style="padding: 1rem; color: var(--text-muted);">No metrics available</div>',
	'<div style="padding: 1rem; color: var(--text-muted);">No tool information available</div>',
	)

	row = df.iloc[index]

	messages = json.loads(row["conversation"])
	response = row["response"]
	formatted_response = format_response(response)

	# Create list of all messages including the response
	all_messages = [
	format_chat_message(msg["role"], msg["content"]) for msg in messages
	]
	all_messages.append(
	format_chat_message("Assistant", formatted_response, is_response=True)
	)

	chat_html = f"""
	<div style="
	background-color: var(--surface-color);
	border-radius: 8px;
	border: 1px solid var(--border-color);
	box-shadow: 0 2px 4px var(--shadow-color);
	padding: 1.5rem;">
	{"".join(all_messages)}
	</div>
	"""

	metrics_html = format_metrics(row["score"], row["rationale"], row["explanation"])
	tool_html = format_tool_info(row["tools_langchain"])

	return chat_html, metrics_html, tool_html


	def filter_and_update_display(model, dataset, min_score, max_score, current_index):
	try:
	df_chat = get_chat_and_score_df(model, dataset)
	df_chat = df_chat[
	(df_chat["score"] >= min_score) & (df_chat["score"] <= max_score)
	]

	if df_chat.empty:
	return (
	'<div style="padding: 1rem; color: var(--text-muted);">No data available for selected filters</div>',
	'<div style="padding: 1rem; color: var(--text-muted);">No metrics available</div>',
	'<div style="padding: 1rem; color: var(--text-muted);">No tool information available</div>',
	"0/0",
	)

	max_index = len(df_chat) - 1
	current_index = min(current_index, max_index)
	chat_html, metrics_html, tool_html = update_chat_display(df_chat, current_index)

	index_display = f'<div style="font-weight: 500; color: var(--primary-text);">{current_index + 1}/{len(df_chat)}</div>'
	return chat_html, metrics_html, tool_html, index_display

	except Exception as e:
	error_html = f"""
	<div style="
	padding: 1rem;
	color: var(--score-low);
	background-color: var(--surface-color);
	border: 1px solid var(--score-low);
	border-radius: 4px;">
	Error: {str(e)}
	</div>
	"""
	return (
	error_html,
	'<div style="padding: 1rem; color: var(--text-muted);">No metrics available</div>',
	'<div style="padding: 1rem; color: var(--text-muted);">No tool information available</div>',
	"0/0",
	)