|
import gradio as gr
|
|
import os
|
|
import pandas as pd
|
|
import json
|
|
from PIL import Image, ImageSequence
|
|
import io
|
|
from functools import reduce
|
|
import numpy as np
|
|
from datetime import datetime, timedelta
|
|
import matplotlib.pyplot as plt
|
|
from leaderboard_utils import (
|
|
get_organization,
|
|
get_mario_planning_leaderboard,
|
|
get_sokoban_leaderboard,
|
|
get_2048_leaderboard,
|
|
get_candy_leaderboard,
|
|
get_tetris_planning_leaderboard,
|
|
get_ace_attorney_leaderboard,
|
|
get_combined_leaderboard,
|
|
GAME_ORDER
|
|
)
|
|
from data_visualization import (
|
|
get_combined_leaderboard_with_group_bar,
|
|
create_horizontal_bar_chart,
|
|
get_combined_leaderboard_with_single_radar
|
|
)
|
|
from gallery_tab import create_video_gallery
|
|
|
|
|
|
|
|
HAS_ENHANCED_LEADERBOARD = True
|
|
|
|
|
|
|
|
TIME_POINTS = {
|
|
"03/25/2025": "rank_data_03_25_2025.json",
|
|
|
|
}
|
|
|
|
|
|
with open(TIME_POINTS["03/25/2025"], "r") as f:
|
|
rank_data = json.load(f)
|
|
|
|
|
|
with open("rank_single_model_03_25_2025.json", "r") as f:
|
|
model_rank_data = json.load(f)
|
|
|
|
|
|
leaderboard_state = {
|
|
"current_game": None,
|
|
"previous_overall": {
|
|
|
|
"Super Mario Bros": True,
|
|
"Sokoban": True,
|
|
"2048": True,
|
|
"Candy Crush": True,
|
|
|
|
"Tetris": True,
|
|
"Ace Attorney": True
|
|
},
|
|
"previous_details": {
|
|
|
|
"Super Mario Bros": False,
|
|
"Sokoban": False,
|
|
"2048": False,
|
|
"Candy Crush": False,
|
|
|
|
"Tetris": False,
|
|
"Ace Attorney": False
|
|
}
|
|
}
|
|
|
|
|
|
|
|
with open('assets/game_video_link.json', 'r') as f:
|
|
VIDEO_LINKS = json.load(f)
|
|
|
|
with open('assets/news.json', 'r') as f:
|
|
NEWS_DATA = json.load(f)
|
|
|
|
def load_rank_data(time_point):
|
|
"""Load rank data for a specific time point"""
|
|
if time_point in TIME_POINTS:
|
|
try:
|
|
with open(TIME_POINTS[time_point], "r") as f:
|
|
return json.load(f)
|
|
except FileNotFoundError:
|
|
return None
|
|
return None
|
|
|
|
|
|
def add_score_note():
|
|
return gr.Markdown("*Note: 'n/a' in the table indicates no data point for that model.*", elem_classes="score-note")
|
|
|
|
|
|
def prepare_dataframe_for_display(df, for_game=None):
|
|
"""Format DataFrame for better display in the UI"""
|
|
|
|
display_df = df.copy()
|
|
|
|
|
|
norm_columns = [col for col in display_df.columns if col.startswith('norm_')]
|
|
if norm_columns:
|
|
display_df = display_df.drop(columns=norm_columns)
|
|
|
|
|
|
for col in display_df.columns:
|
|
if col.endswith(' Score'):
|
|
display_df[col] = display_df[col].apply(lambda x: '-' if x == '_' else x)
|
|
|
|
|
|
if for_game:
|
|
|
|
score_col = f"{for_game} Score"
|
|
if score_col in display_df.columns:
|
|
|
|
display_df[score_col] = pd.to_numeric(display_df[score_col], errors='coerce')
|
|
|
|
display_df = display_df.sort_values(by=score_col, ascending=False)
|
|
|
|
display_df = display_df[~display_df[score_col].isna()]
|
|
else:
|
|
|
|
|
|
|
|
|
|
score_cols = [col for col in display_df.columns if col.endswith(' Score')]
|
|
if score_cols:
|
|
temp_sort_df = display_df.copy()
|
|
for col in score_cols:
|
|
temp_sort_df[col] = pd.to_numeric(temp_sort_df[col], errors='coerce')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
temp_sort_df['temp_avg_score_for_sort'] = temp_sort_df[score_cols].mean(axis=1)
|
|
|
|
|
|
|
|
display_df = display_df.loc[temp_sort_df.sort_values(by=['temp_avg_score_for_sort', 'Player'], ascending=[False, True]).index]
|
|
|
|
|
|
new_columns = {}
|
|
for col in display_df.columns:
|
|
if col.endswith(' Score'):
|
|
|
|
game_name = col.replace(' Score', '')
|
|
new_col = f"{game_name}\nScore"
|
|
new_columns[col] = new_col
|
|
|
|
|
|
if new_columns:
|
|
display_df = display_df.rename(columns=new_columns)
|
|
|
|
return display_df
|
|
|
|
|
|
def update_df_with_height(df):
|
|
"""Update DataFrame with consistent height parameter."""
|
|
|
|
col_widths = ["40px"]
|
|
col_widths.append("230px")
|
|
col_widths.append("120px")
|
|
|
|
for _ in range(len(df.columns) - 2):
|
|
col_widths.append("120px")
|
|
|
|
return gr.update(value=df,
|
|
show_row_numbers=True,
|
|
show_fullscreen_button=True,
|
|
line_breaks=True,
|
|
show_search="search",
|
|
|
|
column_widths=col_widths)
|
|
|
|
def update_leaderboard(
|
|
mario_plan_overall, mario_plan_details,
|
|
sokoban_overall, sokoban_details,
|
|
_2048_overall, _2048_details,
|
|
candy_overall, candy_details,
|
|
|
|
tetris_plan_overall, tetris_plan_details,
|
|
ace_attorney_overall, ace_attorney_details,
|
|
top_n=10,
|
|
data_source=None):
|
|
global leaderboard_state
|
|
|
|
|
|
data = data_source if data_source is not None else rank_data
|
|
|
|
|
|
current_overall = {
|
|
|
|
"Super Mario Bros": mario_plan_overall,
|
|
"Sokoban": sokoban_overall,
|
|
"2048": _2048_overall,
|
|
"Candy Crush": candy_overall,
|
|
|
|
"Tetris": tetris_plan_overall,
|
|
"Ace Attorney": ace_attorney_overall
|
|
}
|
|
|
|
current_details = {
|
|
|
|
"Super Mario Bros": mario_plan_details,
|
|
"Sokoban": sokoban_details,
|
|
"2048": _2048_details,
|
|
"Candy Crush": candy_details,
|
|
|
|
"Tetris": tetris_plan_details,
|
|
"Ace Attorney": ace_attorney_details
|
|
}
|
|
|
|
|
|
changed_game = None
|
|
for game in current_overall.keys():
|
|
if (current_overall[game] != leaderboard_state["previous_overall"][game] or
|
|
current_details[game] != leaderboard_state["previous_details"][game]):
|
|
changed_game = game
|
|
break
|
|
|
|
if changed_game:
|
|
|
|
if current_details[changed_game] and not leaderboard_state["previous_details"][changed_game]:
|
|
|
|
for game in current_overall.keys():
|
|
if game != changed_game:
|
|
current_overall[game] = False
|
|
current_details[game] = False
|
|
leaderboard_state["previous_overall"][game] = False
|
|
leaderboard_state["previous_details"][game] = False
|
|
|
|
|
|
leaderboard_state["current_game"] = changed_game
|
|
leaderboard_state["previous_overall"][changed_game] = True
|
|
leaderboard_state["previous_details"][changed_game] = True
|
|
current_overall[changed_game] = True
|
|
|
|
|
|
elif current_overall[changed_game] and not leaderboard_state["previous_overall"][changed_game]:
|
|
|
|
if leaderboard_state["current_game"] and leaderboard_state["previous_details"][leaderboard_state["current_game"]]:
|
|
|
|
leaderboard_state["previous_details"][leaderboard_state["current_game"]] = False
|
|
current_details[leaderboard_state["current_game"]] = False
|
|
leaderboard_state["current_game"] = None
|
|
|
|
|
|
leaderboard_state["previous_overall"][changed_game] = True
|
|
leaderboard_state["previous_details"][changed_game] = False
|
|
|
|
|
|
elif not current_overall[changed_game] and leaderboard_state["previous_overall"][changed_game]:
|
|
|
|
if leaderboard_state["current_game"] == changed_game:
|
|
current_overall[changed_game] = True
|
|
else:
|
|
leaderboard_state["previous_overall"][changed_game] = False
|
|
if leaderboard_state["current_game"] == changed_game:
|
|
leaderboard_state["current_game"] = None
|
|
|
|
|
|
elif not current_details[changed_game] and leaderboard_state["previous_details"][changed_game]:
|
|
leaderboard_state["previous_details"][changed_game] = False
|
|
if leaderboard_state["current_game"] == changed_game:
|
|
leaderboard_state["current_game"] = None
|
|
|
|
current_overall[changed_game] = True
|
|
current_details[changed_game] = False
|
|
leaderboard_state["previous_overall"][changed_game] = True
|
|
leaderboard_state["previous_details"][changed_game] = False
|
|
|
|
|
|
all_games_selected = all(current_overall.values()) and not any(current_details.values())
|
|
if all_games_selected and changed_game and current_details[changed_game]:
|
|
|
|
for game in current_overall.keys():
|
|
if game != changed_game:
|
|
current_overall[game] = False
|
|
current_details[game] = False
|
|
leaderboard_state["previous_overall"][game] = False
|
|
leaderboard_state["previous_details"][game] = False
|
|
|
|
|
|
leaderboard_state["current_game"] = changed_game
|
|
leaderboard_state["previous_overall"][changed_game] = True
|
|
leaderboard_state["previous_details"][changed_game] = True
|
|
current_overall[changed_game] = True
|
|
|
|
|
|
selected_games = {
|
|
|
|
"Super Mario Bros": current_overall["Super Mario Bros"],
|
|
"Sokoban": current_overall["Sokoban"],
|
|
"2048": current_overall["2048"],
|
|
"Candy Crush": current_overall["Candy Crush"],
|
|
|
|
"Tetris": current_overall["Tetris"],
|
|
"Ace Attorney": current_overall["Ace Attorney"]
|
|
}
|
|
|
|
|
|
if leaderboard_state["current_game"]:
|
|
|
|
|
|
|
|
if leaderboard_state["current_game"] == "Super Mario Bros":
|
|
df = get_mario_planning_leaderboard(data)
|
|
elif leaderboard_state["current_game"] == "Sokoban":
|
|
df = get_sokoban_leaderboard(data)
|
|
elif leaderboard_state["current_game"] == "2048":
|
|
df = get_2048_leaderboard(data)
|
|
elif leaderboard_state["current_game"] == "Candy Crush":
|
|
df = get_candy_leaderboard(data)
|
|
elif leaderboard_state["current_game"] == "Tetris":
|
|
df = get_tetris_planning_leaderboard(data)
|
|
elif leaderboard_state["current_game"] == "Ace Attorney":
|
|
df = get_ace_attorney_leaderboard(data)
|
|
else:
|
|
df = pd.DataFrame()
|
|
|
|
display_df = prepare_dataframe_for_display(df, leaderboard_state["current_game"])
|
|
chart = create_horizontal_bar_chart(df, leaderboard_state["current_game"])
|
|
radar_chart = chart
|
|
group_bar_chart = chart
|
|
else:
|
|
|
|
df, group_bar_chart = get_combined_leaderboard_with_group_bar(data, selected_games, top_n)
|
|
display_df = prepare_dataframe_for_display(df)
|
|
_, radar_chart = get_combined_leaderboard_with_single_radar(data, selected_games)
|
|
chart = radar_chart
|
|
|
|
|
|
return (update_df_with_height(display_df), chart, radar_chart, group_bar_chart,
|
|
current_overall["Super Mario Bros"], current_details["Super Mario Bros"],
|
|
current_overall["Sokoban"], current_details["Sokoban"],
|
|
current_overall["2048"], current_details["2048"],
|
|
current_overall["Candy Crush"], current_details["Candy Crush"],
|
|
current_overall["Tetris"], current_details["Tetris"],
|
|
current_overall["Ace Attorney"], current_details["Ace Attorney"])
|
|
|
|
def update_leaderboard_with_time(time_point,
|
|
mario_plan_overall, mario_plan_details,
|
|
sokoban_overall, sokoban_details,
|
|
_2048_overall, _2048_details,
|
|
candy_overall, candy_details,
|
|
|
|
tetris_plan_overall, tetris_plan_details,
|
|
ace_attorney_overall, ace_attorney_details):
|
|
|
|
global rank_data
|
|
new_rank_data = load_rank_data(time_point)
|
|
if new_rank_data is not None:
|
|
rank_data = new_rank_data
|
|
|
|
|
|
return update_leaderboard(
|
|
mario_plan_overall, mario_plan_details,
|
|
sokoban_overall, sokoban_details,
|
|
_2048_overall, _2048_details,
|
|
candy_overall, candy_details,
|
|
|
|
tetris_plan_overall, tetris_plan_details,
|
|
ace_attorney_overall, ace_attorney_details)
|
|
|
|
def get_total_model_count(data_source):
|
|
"""Get the total number of unique models in the data"""
|
|
selected_games = {
|
|
"Super Mario Bros": True,
|
|
"Sokoban": True,
|
|
"2048": True,
|
|
"Candy Crush": True,
|
|
"Tetris": True,
|
|
"Ace Attorney": True
|
|
}
|
|
df = get_combined_leaderboard(data_source, selected_games)
|
|
return len(df["Player"].unique())
|
|
|
|
def get_initial_state():
|
|
"""Get the initial state for the leaderboard"""
|
|
return {
|
|
"current_game": None,
|
|
"previous_overall": {
|
|
|
|
"Super Mario Bros": True,
|
|
"Sokoban": True,
|
|
"2048": True,
|
|
"Candy Crush": True,
|
|
|
|
"Tetris": True,
|
|
"Ace Attorney": True
|
|
},
|
|
"previous_details": {
|
|
|
|
"Super Mario Bros": False,
|
|
"Sokoban": False,
|
|
"2048": False,
|
|
"Candy Crush": False,
|
|
|
|
"Tetris": False,
|
|
"Ace Attorney": False
|
|
}
|
|
}
|
|
|
|
def clear_filters(top_n=10, data_source=None):
|
|
global leaderboard_state
|
|
|
|
|
|
data = data_source if data_source is not None else rank_data
|
|
|
|
selected_games = {
|
|
"Super Mario Bros": True,
|
|
"Sokoban": True,
|
|
"2048": True,
|
|
"Candy Crush": True,
|
|
"Tetris": True,
|
|
"Ace Attorney": True
|
|
}
|
|
|
|
df, group_bar_chart = get_combined_leaderboard_with_group_bar(data, selected_games, top_n)
|
|
display_df = prepare_dataframe_for_display(df)
|
|
_, radar_chart = get_combined_leaderboard_with_single_radar(data, selected_games)
|
|
|
|
leaderboard_state = get_initial_state()
|
|
|
|
|
|
return (update_df_with_height(display_df), radar_chart, radar_chart, group_bar_chart,
|
|
True, False,
|
|
True, False,
|
|
True, False,
|
|
True, False,
|
|
True, False,
|
|
True, False)
|
|
|
|
def create_timeline_slider():
|
|
"""Create a custom timeline slider component"""
|
|
timeline_html = """
|
|
<div class="timeline-container">
|
|
<style>
|
|
.timeline-container {
|
|
width: 85%; /* Increased from 70% to 85% */
|
|
padding: 8px;
|
|
font-family: Arial, sans-serif;
|
|
height: 40px;
|
|
display: flex;
|
|
align-items: center;
|
|
}
|
|
.timeline-track {
|
|
position: relative;
|
|
height: 6px;
|
|
background: #e0e0e0;
|
|
border-radius: 3px;
|
|
margin: 0;
|
|
width: 100%;
|
|
}
|
|
.timeline-progress {
|
|
position: absolute;
|
|
height: 100%;
|
|
background: #2196F3;
|
|
border-radius: 3px;
|
|
width: 100%;
|
|
}
|
|
.timeline-handle {
|
|
position: absolute;
|
|
right: 0;
|
|
top: 50%;
|
|
transform: translate(50%, -50%);
|
|
width: 20px;
|
|
height: 20px;
|
|
background: #2196F3;
|
|
border: 3px solid white;
|
|
border-radius: 50%;
|
|
cursor: pointer;
|
|
box-shadow: 0 2px 6px rgba(0,0,0,0.3);
|
|
}
|
|
.timeline-date {
|
|
position: absolute;
|
|
top: -25px;
|
|
transform: translateX(-50%);
|
|
background: #2196F3; /* Changed to match slider blue color */
|
|
color: #ffffff !important;
|
|
padding: 3px 8px;
|
|
border-radius: 4px;
|
|
font-size: 12px;
|
|
white-space: nowrap;
|
|
font-weight: 600;
|
|
box-shadow: 0 2px 6px rgba(0,0,0,0.2);
|
|
letter-spacing: 0.5px;
|
|
text-shadow: 0 1px 2px rgba(0,0,0,0.2);
|
|
}
|
|
</style>
|
|
<div class="timeline-track">
|
|
<div class="timeline-progress"></div>
|
|
<div class="timeline-handle">
|
|
<div class="timeline-date">03/25/2025</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<script>
|
|
(function() {
|
|
const container = document.querySelector('.timeline-container');
|
|
const track = container.querySelector('.timeline-track');
|
|
const handle = container.querySelector('.timeline-handle');
|
|
let isDragging = false;
|
|
|
|
// For now, we only have one time point
|
|
const timePoints = {
|
|
"03/25/2025": 1.0
|
|
};
|
|
|
|
function updatePosition(e) {
|
|
if (!isDragging) return;
|
|
|
|
const rect = track.getBoundingClientRect();
|
|
let x = (e.clientX - rect.left) / rect.width;
|
|
x = Math.max(0, Math.min(1, x));
|
|
|
|
// For now, snap to the only available time point
|
|
x = 1.0;
|
|
|
|
handle.style.right = `${(1 - x) * 100}%`;
|
|
}
|
|
|
|
handle.addEventListener('mousedown', (e) => {
|
|
isDragging = true;
|
|
e.preventDefault();
|
|
});
|
|
|
|
document.addEventListener('mousemove', updatePosition);
|
|
document.addEventListener('mouseup', () => {
|
|
isDragging = false;
|
|
});
|
|
|
|
// Prevent text selection while dragging
|
|
container.addEventListener('selectstart', (e) => {
|
|
if (isDragging) e.preventDefault();
|
|
});
|
|
})();
|
|
</script>
|
|
"""
|
|
return gr.HTML(timeline_html)
|
|
|
|
def build_app():
|
|
with gr.Blocks(css="""
|
|
/* Fix for scrolling issues */
|
|
html, body {
|
|
overflow-y: auto !important;
|
|
overflow-x: hidden !important;
|
|
width: 100% !important;
|
|
height: 100% !important;
|
|
max-height: none !important;
|
|
position: relative !important;
|
|
}
|
|
.radar-tip {
|
|
font-size: 14px;
|
|
color: #555;
|
|
margin-top: 5px;
|
|
margin-bottom: 20px;
|
|
font-style: italic;
|
|
}
|
|
|
|
|
|
/* Force scrolling to work on the main container */
|
|
.gradio-container, #root, #app {
|
|
width: 100% !important;
|
|
max-width: 1200px !important;
|
|
margin-left: auto !important;
|
|
margin-right: auto !important;
|
|
min-height: auto !important;
|
|
height: auto !important;
|
|
overflow: visible !important;
|
|
position: relative !important;
|
|
}
|
|
|
|
/* Remove ALL inner scrollbars - very important! */
|
|
.gradio-container * {
|
|
scrollbar-width: none !important; /* Firefox */
|
|
}
|
|
|
|
/* Hide scrollbars for Chrome, Safari and Opera */
|
|
.gradio-container *::-webkit-scrollbar {
|
|
display: none !important;
|
|
}
|
|
|
|
/* Only allow scrollbar on body */
|
|
body::-webkit-scrollbar {
|
|
display: block !important;
|
|
width: 10px !important;
|
|
}
|
|
|
|
body::-webkit-scrollbar-track {
|
|
background: #f1f1f1 !important;
|
|
}
|
|
|
|
body::-webkit-scrollbar-thumb {
|
|
background: #888 !important;
|
|
border-radius: 5px !important;
|
|
}
|
|
|
|
body::-webkit-scrollbar-thumb:hover {
|
|
background: #555 !important;
|
|
}
|
|
|
|
/* Clean up table styling */
|
|
.table-container {
|
|
width: 100% !important;
|
|
overflow: hidden !important;
|
|
border-radius: 8px;
|
|
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
|
|
}
|
|
|
|
/* Remove duplicate scrollbars */
|
|
.gradio-dataframe [data-testid="table"],
|
|
[data-testid="dataframe"] [data-testid="table"],
|
|
.gradio-dataframe tbody,
|
|
[data-testid="dataframe"] tbody,
|
|
.table-container > div,
|
|
.table-container > div > div {
|
|
overflow: hidden !important;
|
|
/* max-height: none !important; */ /* REMOVED */
|
|
}
|
|
|
|
/* Ensure table contents are visible without scrollbars */
|
|
.gradio-dataframe,
|
|
[data-testid="dataframe"] {
|
|
overflow: visible !important;
|
|
/* max-height: none !important; */ /* REMOVED */
|
|
border: none !important;
|
|
}
|
|
|
|
/* Visualization styling */
|
|
.visualization-container .js-plotly-plot {
|
|
margin-left: auto !important;
|
|
margin-right: auto !important;
|
|
display: block !important;
|
|
max-width: 1000px;
|
|
}
|
|
|
|
/* Section styling */
|
|
.section-title {
|
|
font-size: 1.5em;
|
|
font-weight: bold;
|
|
color: #2c3e50;
|
|
margin-bottom: 15px;
|
|
padding-bottom: 10px;
|
|
border-bottom: 2px solid #e9ecef;
|
|
text-align: center;
|
|
}
|
|
|
|
/* Fix table styling */
|
|
.table-container table {
|
|
width: 100%;
|
|
border-collapse: separate;
|
|
border-spacing: 0;
|
|
table-layout: fixed !important;
|
|
}
|
|
|
|
/* Column width customization - adjust for row numbers being first column */
|
|
.table-container th:nth-child(2),
|
|
.table-container td:nth-child(2) {
|
|
width: 230px !important;
|
|
min-width: 200px !important;
|
|
max-width: 280px !important;
|
|
padding-left: 8px !important;
|
|
padding-right: 8px !important;
|
|
}
|
|
|
|
.table-container th:nth-child(3),
|
|
.table-container td:nth-child(3) {
|
|
width: 120px !important;
|
|
min-width: 100px !important;
|
|
max-width: 140px !important;
|
|
}
|
|
|
|
/* Game score columns */
|
|
.table-container th:nth-child(n+4),
|
|
.table-container td:nth-child(n+4) {
|
|
width: 120px !important;
|
|
min-width: 100px !important;
|
|
max-width: 140px !important;
|
|
text-align: center !important;
|
|
}
|
|
|
|
/* Make headers sticky */
|
|
.table-container th {
|
|
position: sticky !important;
|
|
top: 0 !important;
|
|
background-color: var(--header-bg, #f8f9fa) !important;
|
|
z-index: 10 !important;
|
|
font-weight: bold;
|
|
padding: 16px 10px !important;
|
|
border-bottom: 2px solid var(--border-color, #e9ecef);
|
|
white-space: pre-wrap !important;
|
|
word-wrap: break-word !important;
|
|
line-height: 1.2 !important;
|
|
height: auto !important;
|
|
min-height: 60px !important;
|
|
vertical-align: middle !important;
|
|
color: var(--header-text, #2c3e50) !important;
|
|
}
|
|
|
|
/* Dark mode specific styles */
|
|
.dark .table-container th {
|
|
--header-bg: #2d3748;
|
|
--header-text: #e2e8f0;
|
|
--border-color: #4a5568;
|
|
}
|
|
|
|
/* Light mode specific styles */
|
|
.light .table-container th {
|
|
--header-bg: #f8f9fa;
|
|
--header-text: #2c3e50;
|
|
--border-color: #e9ecef;
|
|
}
|
|
|
|
/* Simple cell styling */
|
|
.table-container td {
|
|
padding: 8px 8px;
|
|
border-bottom: 1px solid var(--border-color, #e9ecef);
|
|
}
|
|
|
|
/* Row number column styling */
|
|
.gradio-dataframe thead tr th[id="0"],
|
|
.gradio-dataframe tbody tr td:nth-child(1),
|
|
[data-testid="dataframe"] thead tr th[id="0"],
|
|
[data-testid="dataframe"] tbody tr td:nth-child(1),
|
|
.svelte-1gfkn6j thead tr th:first-child,
|
|
.svelte-1gfkn6j tbody tr td:first-child {
|
|
width: 40px !important;
|
|
min-width: 40px !important;
|
|
max-width: 40px !important;
|
|
padding: 4px !important;
|
|
text-align: center !important;
|
|
font-size: 0.85em !important;
|
|
}
|
|
|
|
/* Fix for Gradio footer causing scroll issues */
|
|
footer {
|
|
position: relative !important;
|
|
width: 100% !important;
|
|
margin-top: 40px !important;
|
|
}
|
|
""") as demo:
|
|
gr.Markdown("# 🎮 Lmgame Bench: Leaderboard 🎲")
|
|
|
|
|
|
gr.HTML("""
|
|
|
|
<script>
|
|
// Function to add line breaks to table headers
|
|
function formatTableHeaders() {
|
|
// Find all table headers in the document
|
|
const headers = document.querySelectorAll('th');
|
|
|
|
headers.forEach(header => {
|
|
let text = header.textContent || '';
|
|
|
|
// Skip if already processed
|
|
if (header.getAttribute('data-processed') === 'true') {
|
|
return;
|
|
}
|
|
|
|
// Store original content for reference
|
|
if (!header.getAttribute('data-original')) {
|
|
header.setAttribute('data-original', header.innerHTML);
|
|
}
|
|
|
|
let newContent = header.innerHTML;
|
|
|
|
// Format Super Mario Brosheader
|
|
if (text.includes('Super Mario Bros')) {
|
|
newContent = newContent.replace(/Super\s+Mario\s+Bros/g, 'Super<br>Mario Bros');
|
|
}
|
|
|
|
// Format Tetrisheaders
|
|
if (text.includes('Tetris(complete)')) {
|
|
newContent = newContent.replace(/Tetris\s+\(complete\)/g, 'Tetris<br>(complete)');
|
|
}
|
|
|
|
if (text.includes('Tetris')) {
|
|
newContent = newContent.replace(/Tetris\s+\(planning\s+only\)/g, 'Tetris');
|
|
}
|
|
|
|
// Format Candy Crush header
|
|
if (text.includes('Candy Crush')) {
|
|
newContent = newContent.replace(/Candy\s+Crash/g, 'Candy<br>Crash');
|
|
}
|
|
|
|
// Make Organization header wider and fix its name
|
|
if (text.includes('Organization') || text.includes('Organi-zation')) {
|
|
header.style.minWidth = '150px';
|
|
header.style.width = '150px';
|
|
|
|
// Fix the Organization header name if it has a line break
|
|
if (text.includes('Organi-') || text.includes('zation')) {
|
|
newContent = newContent.replace(/Organi-<br>zation|Organi-zation/, 'Organization');
|
|
}
|
|
}
|
|
|
|
// Update content if changed
|
|
if (newContent !== header.innerHTML) {
|
|
header.innerHTML = newContent;
|
|
header.setAttribute('data-processed', 'true');
|
|
|
|
// Also ensure headers have proper styling
|
|
header.style.whiteSpace = 'normal';
|
|
header.style.lineHeight = '1.2';
|
|
header.style.verticalAlign = 'middle';
|
|
header.style.minHeight = '70px';
|
|
header.style.fontSize = '0.9em';
|
|
}
|
|
});
|
|
}
|
|
|
|
// Function to fix player name cells to prevent line breaking
|
|
function fixPlayerCells() {
|
|
// Find all table cells in the document
|
|
const tables = document.querySelectorAll('table');
|
|
|
|
tables.forEach(table => {
|
|
// Process rows starting from index 1 (skip header)
|
|
const rows = table.querySelectorAll('tr');
|
|
|
|
rows.forEach((row, index) => {
|
|
// Skip header row
|
|
if (index === 0) return;
|
|
|
|
// Get the player cell (typically 2nd cell)
|
|
const playerCell = row.querySelector('td:nth-child(2)');
|
|
const orgCell = row.querySelector('td:nth-child(3)');
|
|
|
|
if (playerCell) {
|
|
playerCell.style.whiteSpace = 'nowrap';
|
|
playerCell.style.overflow = 'hidden';
|
|
playerCell.style.textOverflow = 'ellipsis';
|
|
playerCell.style.maxWidth = '230px';
|
|
playerCell.style.textAlign = 'left';
|
|
}
|
|
|
|
if (orgCell) {
|
|
orgCell.style.whiteSpace = 'nowrap';
|
|
orgCell.style.overflow = 'hidden';
|
|
orgCell.style.textOverflow = 'ellipsis';
|
|
orgCell.style.minWidth = '150px';
|
|
orgCell.style.width = '150px';
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
// Function to run all formatting
|
|
function formatTable() {
|
|
formatTableHeaders();
|
|
fixPlayerCells();
|
|
}
|
|
|
|
// Run on load and then periodically to catch any new tables
|
|
setInterval(formatTable, 500);
|
|
|
|
// Also run when the DOM content is loaded
|
|
if (document.readyState === 'loading') {
|
|
document.addEventListener('DOMContentLoaded', formatTable);
|
|
} else {
|
|
formatTable();
|
|
}
|
|
|
|
// Run when the page is fully loaded with resources
|
|
window.addEventListener('load', formatTable);
|
|
</script>
|
|
""")
|
|
|
|
with gr.Tabs():
|
|
with gr.Tab("🏆 Agent Leaderboard"):
|
|
|
|
with gr.Row():
|
|
gr.Markdown("### 📊 Data Visualization")
|
|
|
|
|
|
detailed_visualization = gr.Plot(
|
|
label="Performance Visualization",
|
|
visible=False,
|
|
elem_classes="visualization-container"
|
|
)
|
|
|
|
with gr.Column(visible=True) as overall_visualizations:
|
|
with gr.Tabs():
|
|
with gr.Tab("📈 Radar Chart"):
|
|
|
|
radar_visualization = gr.Plot(
|
|
label="Comparative Analysis (Radar Chart)",
|
|
elem_classes="visualization-container"
|
|
)
|
|
gr.Markdown(
|
|
"*💡 Click a legend entry to isolate that model. Double-click additional ones to add them for comparison.*",
|
|
elem_classes="radar-tip"
|
|
)
|
|
|
|
with gr.Tab("📊 Group Bar Chart"):
|
|
with gr.Row():
|
|
|
|
agent_max_models = get_total_model_count(rank_data)
|
|
top_n_slider = gr.Slider(
|
|
minimum=1,
|
|
maximum=agent_max_models,
|
|
step=1,
|
|
value=min(10, agent_max_models),
|
|
label=f"Number of Top Models to Display (max: {agent_max_models})",
|
|
elem_classes="top-n-slider"
|
|
)
|
|
group_bar_visualization = gr.Plot(
|
|
label="Comparative Analysis (Group Bar Chart)",
|
|
elem_classes="visualization-container"
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Row():
|
|
gr.Markdown("### 🎮 Game Selection")
|
|
with gr.Row():
|
|
|
|
|
|
|
|
|
|
with gr.Column():
|
|
gr.Markdown("**🎮 Super Mario Bros**")
|
|
mario_plan_overall = gr.Checkbox(label="Super Mario Bros Score", value=True)
|
|
mario_plan_details = gr.Checkbox(label="Super Mario Bros Details", value=False)
|
|
with gr.Column():
|
|
gr.Markdown("**📦 Sokoban**")
|
|
sokoban_overall = gr.Checkbox(label="Sokoban Score", value=True)
|
|
sokoban_details = gr.Checkbox(label="Sokoban Details", value=False)
|
|
with gr.Column():
|
|
gr.Markdown("**🔢 2048**")
|
|
_2048_overall = gr.Checkbox(label="2048 Score", value=True)
|
|
_2048_details = gr.Checkbox(label="2048 Details", value=False)
|
|
with gr.Column():
|
|
gr.Markdown("**🍬 Candy Crush**")
|
|
candy_overall = gr.Checkbox(label="Candy Crush Score", value=True)
|
|
candy_details = gr.Checkbox(label="Candy Crush Details", value=False)
|
|
|
|
|
|
|
|
|
|
with gr.Column():
|
|
gr.Markdown("**🎯 Tetris**")
|
|
tetris_plan_overall = gr.Checkbox(label="Tetris Score", value=True)
|
|
tetris_plan_details = gr.Checkbox(label="Tetris Details", value=False)
|
|
with gr.Column():
|
|
gr.Markdown("**⚖️ Ace Attorney**")
|
|
ace_attorney_overall = gr.Checkbox(label="Ace Attorney Score", value=True)
|
|
ace_attorney_details = gr.Checkbox(label="Ace Attorney Details", value=False)
|
|
|
|
|
|
with gr.Row():
|
|
with gr.Column(scale=2):
|
|
gr.Markdown("**⏰ Time Tracker**")
|
|
timeline = create_timeline_slider()
|
|
with gr.Column(scale=1):
|
|
gr.Markdown("**🔄 Controls**")
|
|
clear_btn = gr.Button("Reset Filters", variant="secondary")
|
|
|
|
|
|
with gr.Row():
|
|
gr.Markdown("### 📋 Detailed Results")
|
|
|
|
|
|
with gr.Row():
|
|
gr.Markdown("*All data analysis can be replicated by checking [this Jupyter notebook](https://colab.research.google.com/drive/1CYFiJGm3EoBXXI8vICPVR82J9qrmmRvc#scrollTo=qft1Oald-21J)*")
|
|
|
|
|
|
initial_df = get_combined_leaderboard(rank_data, {
|
|
|
|
"Super Mario Bros": True,
|
|
"Sokoban": True,
|
|
"2048": True,
|
|
"Candy Crush": True,
|
|
|
|
"Tetris": True,
|
|
"Ace Attorney": True
|
|
})
|
|
|
|
|
|
initial_display_df = prepare_dataframe_for_display(initial_df)
|
|
|
|
|
|
col_widths = ["40px"]
|
|
col_widths.append("230px")
|
|
col_widths.append("120px")
|
|
|
|
for _ in range(len(initial_display_df.columns) - 2):
|
|
col_widths.append("120px")
|
|
|
|
|
|
with gr.Row():
|
|
leaderboard_df = gr.DataFrame(
|
|
value=initial_display_df,
|
|
interactive=True,
|
|
elem_id="leaderboard-table",
|
|
elem_classes="table-container",
|
|
wrap=True,
|
|
show_row_numbers=True,
|
|
show_fullscreen_button=True,
|
|
line_breaks=True,
|
|
max_height=1000,
|
|
show_search="search",
|
|
column_widths=col_widths
|
|
)
|
|
|
|
|
|
with gr.Row():
|
|
score_note = add_score_note()
|
|
|
|
|
|
checkbox_list = [
|
|
|
|
mario_plan_overall, mario_plan_details,
|
|
sokoban_overall, sokoban_details,
|
|
_2048_overall, _2048_details,
|
|
candy_overall, candy_details,
|
|
|
|
tetris_plan_overall, tetris_plan_details,
|
|
ace_attorney_overall, ace_attorney_details
|
|
]
|
|
|
|
|
|
def update_visualizations(*checkbox_states):
|
|
|
|
|
|
is_details_view = any([
|
|
checkbox_states[1],
|
|
checkbox_states[3],
|
|
checkbox_states[5],
|
|
checkbox_states[7],
|
|
checkbox_states[9],
|
|
checkbox_states[11]
|
|
])
|
|
|
|
|
|
return {
|
|
detailed_visualization: gr.update(visible=is_details_view),
|
|
overall_visualizations: gr.update(visible=not is_details_view)
|
|
}
|
|
|
|
|
|
for checkbox in checkbox_list:
|
|
checkbox.change(
|
|
update_visualizations,
|
|
inputs=checkbox_list,
|
|
outputs=[detailed_visualization, overall_visualizations]
|
|
)
|
|
|
|
|
|
for checkbox in checkbox_list:
|
|
checkbox.change(
|
|
lambda *args: update_leaderboard(*args, data_source=rank_data),
|
|
inputs=checkbox_list + [top_n_slider],
|
|
outputs=[
|
|
leaderboard_df,
|
|
detailed_visualization,
|
|
radar_visualization,
|
|
group_bar_visualization
|
|
] + checkbox_list
|
|
)
|
|
|
|
|
|
top_n_slider.change(
|
|
lambda *args: update_leaderboard(*args, data_source=rank_data),
|
|
inputs=checkbox_list + [top_n_slider],
|
|
outputs=[
|
|
leaderboard_df,
|
|
detailed_visualization,
|
|
radar_visualization,
|
|
group_bar_visualization
|
|
] + checkbox_list
|
|
)
|
|
|
|
|
|
clear_btn.click(
|
|
lambda *args: clear_filters(*args, data_source=rank_data),
|
|
inputs=[top_n_slider],
|
|
outputs=[
|
|
leaderboard_df,
|
|
detailed_visualization,
|
|
radar_visualization,
|
|
group_bar_visualization
|
|
] + checkbox_list
|
|
)
|
|
|
|
|
|
demo.load(
|
|
lambda: clear_filters(data_source=rank_data),
|
|
inputs=[],
|
|
outputs=[
|
|
leaderboard_df,
|
|
detailed_visualization,
|
|
radar_visualization,
|
|
group_bar_visualization
|
|
] + checkbox_list
|
|
)
|
|
|
|
with gr.Tab("🤖 Model Leaderboard"):
|
|
|
|
with gr.Row():
|
|
gr.Markdown("### 📊 Data Visualization")
|
|
|
|
|
|
model_detailed_visualization = gr.Plot(
|
|
label="Performance Visualization",
|
|
visible=False,
|
|
elem_classes="visualization-container"
|
|
)
|
|
|
|
with gr.Column(visible=True) as model_overall_visualizations:
|
|
with gr.Tabs():
|
|
with gr.Tab("📈 Radar Chart"):
|
|
model_radar_visualization = gr.Plot(
|
|
label="Comparative Analysis (Radar Chart)",
|
|
elem_classes="visualization-container"
|
|
)
|
|
gr.Markdown(
|
|
"*💡 Click a legend entry to isolate that model. Double-click additional ones to add them for comparison.*",
|
|
elem_classes="radar-tip"
|
|
)
|
|
with gr.Tab("📊 Group Bar Chart"):
|
|
with gr.Row():
|
|
|
|
model_max_models = get_total_model_count(model_rank_data)
|
|
model_top_n_slider = gr.Slider(
|
|
minimum=1,
|
|
maximum=model_max_models,
|
|
step=1,
|
|
value=min(10, model_max_models),
|
|
label=f"Number of Top Models to Display (max: {model_max_models})",
|
|
elem_classes="top-n-slider"
|
|
)
|
|
model_group_bar_visualization = gr.Plot(
|
|
label="Comparative Analysis (Group Bar Chart)",
|
|
elem_classes="visualization-container"
|
|
)
|
|
|
|
|
|
with gr.Row():
|
|
gr.Markdown("### 🎮 Game Selection")
|
|
with gr.Row():
|
|
with gr.Column():
|
|
gr.Markdown("**🎮 Super Mario Bros**")
|
|
model_mario_plan_overall = gr.Checkbox(label="Super Mario Bros Score", value=True)
|
|
model_mario_plan_details = gr.Checkbox(label="Super Mario Bros Details", value=False)
|
|
with gr.Column():
|
|
gr.Markdown("**📦 Sokoban**")
|
|
model_sokoban_overall = gr.Checkbox(label="Sokoban Score", value=True)
|
|
model_sokoban_details = gr.Checkbox(label="Sokoban Details", value=False)
|
|
with gr.Column():
|
|
gr.Markdown("**🔢 2048**")
|
|
model_2048_overall = gr.Checkbox(label="2048 Score", value=True)
|
|
model_2048_details = gr.Checkbox(label="2048 Details", value=False)
|
|
with gr.Column():
|
|
gr.Markdown("**🍬 Candy Crush**")
|
|
model_candy_overall = gr.Checkbox(label="Candy Crush Score", value=True)
|
|
model_candy_details = gr.Checkbox(label="Candy Crush Details", value=False)
|
|
with gr.Column():
|
|
gr.Markdown("**🎯 Tetris**")
|
|
model_tetris_plan_overall = gr.Checkbox(label="Tetris Score", value=True)
|
|
model_tetris_plan_details = gr.Checkbox(label="Tetris Details", value=False)
|
|
with gr.Column():
|
|
gr.Markdown("**⚖️ Ace Attorney**")
|
|
model_ace_attorney_overall = gr.Checkbox(label="Ace Attorney Score", value=True)
|
|
model_ace_attorney_details = gr.Checkbox(label="Ace Attorney Details", value=False)
|
|
|
|
|
|
with gr.Row():
|
|
with gr.Column(scale=2):
|
|
gr.Markdown("**⏰ Time Tracker**")
|
|
model_timeline = create_timeline_slider()
|
|
with gr.Column(scale=1):
|
|
gr.Markdown("**🔄 Controls**")
|
|
model_clear_btn = gr.Button("Reset Filters", variant="secondary")
|
|
|
|
|
|
with gr.Row():
|
|
gr.Markdown("### 📋 Detailed Results")
|
|
|
|
|
|
model_initial_df = get_combined_leaderboard(model_rank_data, {
|
|
"Super Mario Bros": True,
|
|
"Sokoban": True,
|
|
"2048": True,
|
|
"Candy Crush": True,
|
|
"Tetris": True,
|
|
"Ace Attorney": True
|
|
})
|
|
|
|
|
|
model_initial_display_df = prepare_dataframe_for_display(model_initial_df)
|
|
|
|
|
|
with gr.Row():
|
|
model_leaderboard_df = gr.DataFrame(
|
|
value=model_initial_display_df,
|
|
interactive=True,
|
|
elem_id="model-leaderboard-table",
|
|
elem_classes="table-container",
|
|
wrap=True,
|
|
show_row_numbers=True,
|
|
show_fullscreen_button=True,
|
|
line_breaks=True,
|
|
max_height=1000,
|
|
show_search="search",
|
|
column_widths=col_widths
|
|
)
|
|
|
|
|
|
with gr.Row():
|
|
model_score_note = add_score_note()
|
|
|
|
|
|
model_checkbox_list = [
|
|
model_mario_plan_overall, model_mario_plan_details,
|
|
model_sokoban_overall, model_sokoban_details,
|
|
model_2048_overall, model_2048_details,
|
|
model_candy_overall, model_candy_details,
|
|
model_tetris_plan_overall, model_tetris_plan_details,
|
|
model_ace_attorney_overall, model_ace_attorney_details
|
|
]
|
|
|
|
|
|
def update_model_visualizations(*checkbox_states):
|
|
|
|
is_details_view = any([
|
|
checkbox_states[1],
|
|
checkbox_states[3],
|
|
checkbox_states[5],
|
|
checkbox_states[7],
|
|
checkbox_states[9],
|
|
checkbox_states[11]
|
|
])
|
|
|
|
|
|
return {
|
|
model_detailed_visualization: gr.update(visible=is_details_view),
|
|
model_overall_visualizations: gr.update(visible=not is_details_view)
|
|
}
|
|
|
|
|
|
for checkbox in model_checkbox_list:
|
|
checkbox.change(
|
|
update_model_visualizations,
|
|
inputs=model_checkbox_list,
|
|
outputs=[model_detailed_visualization, model_overall_visualizations]
|
|
)
|
|
|
|
|
|
for checkbox in model_checkbox_list:
|
|
checkbox.change(
|
|
lambda *args: update_leaderboard(*args, data_source=model_rank_data),
|
|
inputs=model_checkbox_list + [model_top_n_slider],
|
|
outputs=[
|
|
model_leaderboard_df,
|
|
model_detailed_visualization,
|
|
model_radar_visualization,
|
|
model_group_bar_visualization
|
|
] + model_checkbox_list
|
|
)
|
|
|
|
|
|
model_top_n_slider.change(
|
|
lambda *args: update_leaderboard(*args, data_source=model_rank_data),
|
|
inputs=model_checkbox_list + [model_top_n_slider],
|
|
outputs=[
|
|
model_leaderboard_df,
|
|
model_detailed_visualization,
|
|
model_radar_visualization,
|
|
model_group_bar_visualization
|
|
] + model_checkbox_list
|
|
)
|
|
|
|
|
|
model_clear_btn.click(
|
|
lambda *args: clear_filters(*args, data_source=model_rank_data),
|
|
inputs=[model_top_n_slider],
|
|
outputs=[
|
|
model_leaderboard_df,
|
|
model_detailed_visualization,
|
|
model_radar_visualization,
|
|
model_group_bar_visualization
|
|
] + model_checkbox_list
|
|
)
|
|
|
|
|
|
demo.load(
|
|
lambda: clear_filters(data_source=model_rank_data),
|
|
inputs=[],
|
|
outputs=[
|
|
model_leaderboard_df,
|
|
model_detailed_visualization,
|
|
model_radar_visualization,
|
|
model_group_bar_visualization
|
|
] + model_checkbox_list
|
|
)
|
|
|
|
with gr.Tab("🎥 Gallery"):
|
|
video_gallery = create_video_gallery()
|
|
|
|
return demo
|
|
|
|
if __name__ == "__main__":
|
|
demo_app = build_app()
|
|
|
|
demo_app.launch(
|
|
debug=True,
|
|
show_error=True,
|
|
share=True,
|
|
height="100%",
|
|
width="100%"
|
|
) |