Spaces:

galileo-ai
/

agent-leaderboard

Running on CPU Upgrade

App Files Files Community

Pratik Bhavsar commited on Jul 2

Commit

c411387

1 Parent(s): 5f94245

cleaned up v1

Browse files

Files changed (5) hide show

app.py +0 -26
data_loader.py +2 -2
results.csv → results_v1.csv +0 -0
tabs/data_exploration.py +0 -810
tabs/leaderboard.py +1 -1

app.py CHANGED Viewed

@@ -14,8 +14,6 @@ from data_loader import (
     SCORES,
 )
 from tabs.leaderboard import create_leaderboard_tab, filter_leaderboard
-from tabs.model_comparison import create_model_comparison_tab, compare_models
-from tabs.data_exploration import create_exploration_tab, filter_and_display
 def create_app():
@@ -32,10 +30,6 @@ def create_app():
                 df, CATEGORIES, METHODOLOGY, HEADER_CONTENT, CARDS
             )
-            mc_info, mc_plot = create_model_comparison_tab(df, HEADER_CONTENT)
-            exp_outputs = create_exploration_tab(df)
         # Initial loads
         app.load(
             fn=lambda: filter_leaderboard(
@@ -44,26 +38,6 @@ def create_app():
             outputs=[lb_output, lb_plot1, lb_plot2],
         )
-        app.load(
-            fn=lambda: compare_models(
-                df, [df.sort_values("Model Avg", ascending=False).iloc[0]["Model"]]
-            ),
-            outputs=[mc_info, mc_plot],
-        )
-        app.load(
-            fn=lambda: filter_and_display(
-                MODELS[0],
-                DATASETS[0],
-                min(SCORES),
-                max(SCORES),
-                0,
-                0,
-                0,
-            ),
-            outputs=exp_outputs[:-1],
-        )
     return app

     SCORES,
 )
 from tabs.leaderboard import create_leaderboard_tab, filter_leaderboard
 def create_app():
                 df, CATEGORIES, METHODOLOGY, HEADER_CONTENT, CARDS
             )
         # Initial loads
         app.load(
             fn=lambda: filter_leaderboard(
             outputs=[lb_output, lb_plot1, lb_plot2],
         )
     return app

data_loader.py CHANGED Viewed

@@ -23,7 +23,7 @@ SCORES = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
 def load_data():
     """Load and preprocess the data."""
-    df = pd.read_csv("results.csv").dropna()
     # Add combined I/O cost column with 3:1 ratio
     df["IO Cost"] = (
@@ -596,7 +596,7 @@ HEADER_CONTENT = (
     <div class="header-content">
         <div class="title-section">
-            <div class="title-gradient">Agent Leaderboard</div>
             <div class="description">
                 GenAI is evolving rapidly with developers building high ROI agents. <br>

 def load_data():
     """Load and preprocess the data."""
+    df = pd.read_csv("results_v1.csv").dropna()
     # Add combined I/O cost column with 3:1 ratio
     df["IO Cost"] = (
     <div class="header-content">
         <div class="title-section">
+            <div class="title-gradient">Agent Leaderboard v1</div>
             <div class="description">
                 GenAI is evolving rapidly with developers building high ROI agents. <br>

results.csv → results_v1.csv RENAMED Viewed

File without changes

tabs/data_exploration.py DELETED Viewed

@@ -1,810 +0,0 @@
-import gradio as gr
-import pandas as pd
-import numpy as np
-from data_loader import MODELS, DATASETS, SCORES, HEADER_CONTENT
-from chat import (
-    format_chat_display,
-    format_metrics_display,
-    format_tool_info,
-)
-def get_updated_df(df, df_output):
-    df = df.iloc[: len(df_output)].copy()
-    df["response"] = df_output["response"].tolist()
-    df["rationale"] = df_output["rationale"].tolist()
-    df["explanation"] = df_output["explanation"].tolist()
-    df["score"] = df_output["score"].tolist()
-    cols = [
-        "conversation",
-        "tools_langchain",
-        "n_turns",
-        "len_query",
-        "n_tools",
-        "response",
-        "rationale",
-        "explanation",
-        "score",
-    ]
-    return df[cols]
-def get_chat_and_score_df(model, dataset):
-    df_output = pd.read_parquet(f"output/{model}/{dataset}.parquet")
-    df = pd.read_parquet(f"datasets/{dataset}.parquet")
-    df = get_updated_df(df, df_output)
-    return df
-def on_filter_change(
-    model,
-    dataset,
-    min_score,
-    max_score,
-    min_n_turns,
-    min_len_query,
-    min_n_tools,
-):
-    try:
-        # Call filter_and_display with index 0 and unpack 4 values
-        chat_html, metrics_html, tool_html, index_html = filter_and_display(
-            model,
-            dataset,
-            min_score,
-            max_score,
-            min_n_turns,
-            min_len_query,
-            min_n_tools,
-            0,
-        )
-        # Return exactly 4 values
-        return chat_html, metrics_html, tool_html, index_html
-    except Exception as e:
-        error_html = f"""
-        <div style="padding: 1.5rem; color: var(--score-low);">
-            <div style="font-weight: 600;">Filter Error</div>
-            <div style="font-family: monospace; background-color: var(--surface-color-alt); padding: 0.5rem; margin-top: 0.5rem;">
-                {str(e)}
-            </div>
-        </div>
-        """
-        return (
-            error_html,
-            "<div style='text-align: center;'>No metrics available</div>",
-            "<div style='text-align: center;'>No tool information available</div>",
-            "<div style='text-align: center;'>0/0</div>",
-        )
-def navigate_prev(
-    current_idx,
-    model,
-    dataset,
-    min_score,
-    max_score,
-    min_n_turns,
-    min_len_query,
-    min_n_tools,
-):
-    try:
-        # Handle current_idx as dictionary
-        if isinstance(current_idx, dict) and "value" in current_idx:
-            idx_val = int(current_idx["value"])
-        else:
-            idx_val = int(current_idx) if current_idx is not None else 0
-        new_index = max(0, idx_val - 1)
-        chat_html, metrics_html, tool_html, index_html = filter_and_display(
-            model,
-            dataset,
-            min_score,
-            max_score,
-            min_n_turns,
-            min_len_query,
-            min_n_tools,
-            new_index,
-        )
-        return chat_html, metrics_html, tool_html, index_html, new_index
-    except Exception as e:
-        error_html = f"""
-        <div style="padding: 1.5rem; color: var(--score-low);">
-            <div style="font-weight: 600;">Navigation Error</div>
-            <div style="font-family: monospace; background-color: var(--surface-color-alt); padding: 0.5rem; margin-top: 0.5rem;">
-                {str(e)}
-            </div>
-        </div>
-        """
-        return (
-            error_html,
-            "<div style='text-align: center;'>No metrics available</div>",
-            "<div style='text-align: center;'>No tool information available</div>",
-            "<div style='text-align: center;'>0/0</div>",
-            current_idx or 0,
-        )
-def navigate_next(
-    current_idx,
-    model,
-    dataset,
-    min_score,
-    max_score,
-    min_n_turns,
-    min_len_query,
-    min_n_tools,
-):
-    try:
-        # Handle current_idx as dictionary
-        if isinstance(current_idx, dict) and "value" in current_idx:
-            idx_val = int(current_idx["value"])
-        else:
-            idx_val = int(current_idx) if current_idx is not None else 0
-        new_index = idx_val + 1
-        chat_html, metrics_html, tool_html, index_html = filter_and_display(
-            model,
-            dataset,
-            min_score,
-            max_score,
-            min_n_turns,
-            min_len_query,
-            min_n_tools,
-            new_index,
-        )
-        return chat_html, metrics_html, tool_html, index_html, new_index
-    except Exception as e:
-        error_html = f"""
-        <div style="padding: 1.5rem; color: var(--score-low);">
-            <div style="font-weight: 600;">Navigation Error</div>
-            <div style="font-family: monospace; background-color: var(--surface-color-alt); padding: 0.5rem; margin-top: 0.5rem;">
-                {str(e)}
-            </div>
-        </div>
-        """
-        return (
-            error_html,
-            "<div style='text-align: center;'>No metrics available</div>",
-            "<div style='text-align: center;'>No tool information available</div>",
-            "<div style='text-align: center;'>0/0</div>",
-            current_idx or 0,
-        )
-def filter_and_display(
-    model,
-    dataset,
-    min_score,
-    max_score,
-    min_n_turns,
-    min_len_query,
-    min_n_tools,
-    index=0,
-):
-    """Combined function to filter data and update display"""
-    try:
-        # Extract model
-        if isinstance(model, dict):
-            if "value" in model:
-                model_str = str(model["value"])
-            else:
-                model_str = MODELS[0]
-        else:
-            model_str = str(model) if model is not None else MODELS[0]
-        # Extract dataset
-        if isinstance(dataset, dict):
-            if "value" in dataset:
-                dataset_str = str(dataset["value"])
-            else:
-                dataset_str = DATASETS[0]
-        else:
-            dataset_str = str(dataset) if dataset is not None else DATASETS[0]
-        # Extract min_score
-        if isinstance(min_score, dict):
-            if "value" in min_score:
-                min_score_val = float(min_score["value"])
-            else:
-                min_score_val = float(min(SCORES))
-        else:
-            min_score_val = (
-                float(min_score) if min_score is not None else float(min(SCORES))
-            )
-        # Extract max_score
-        if isinstance(max_score, dict):
-            if "value" in max_score:
-                max_score_val = float(max_score["value"])
-            else:
-                max_score_val = float(max(SCORES))
-        else:
-            max_score_val = (
-                float(max_score) if max_score is not None else float(max(SCORES))
-            )
-        # Extract min_n_turns
-        if isinstance(min_n_turns, dict):
-            if "value" in min_n_turns:
-                min_n_turns_val = int(min_n_turns["value"])
-            else:
-                min_n_turns_val = 0
-        else:
-            min_n_turns_val = int(min_n_turns) if min_n_turns is not None else 0
-        # Extract min_len_query
-        if isinstance(min_len_query, dict):
-            if "value" in min_len_query:
-                min_len_query_val = int(min_len_query["value"])
-            else:
-                min_len_query_val = 0
-        else:
-            min_len_query_val = int(min_len_query) if min_len_query is not None else 0
-        # Extract min_n_tools
-        if isinstance(min_n_tools, dict):
-            if "value" in min_n_tools:
-                min_n_tools_val = int(min_n_tools["value"])
-            else:
-                min_n_tools_val = 0
-        else:
-            min_n_tools_val = int(min_n_tools) if min_n_tools is not None else 0
-        # Extract index
-        if isinstance(index, dict):
-            if "value" in index:
-                try:
-                    index_val = int(index["value"])
-                except (ValueError, TypeError):
-                    index_val = 0
-            else:
-                index_val = 0
-        else:
-            try:
-                index_val = int(index) if index is not None else 0
-            except (ValueError, TypeError):
-                index_val = 0
-        # Get the data
-        df_chat = get_chat_and_score_df(model_str, dataset_str)
-        # Ensure filter columns exist
-        for col, default in [
-            ("score", 0.0),
-            ("n_turns", 0),
-            ("len_query", 0),
-            ("n_tools", 0),
-        ]:
-            if col not in df_chat.columns:
-                df_chat[col] = default
-            else:
-                df_chat[col] = pd.to_numeric(df_chat[col], errors="coerce").fillna(
-                    default
-                )
-        # Apply all filters
-        df_filtered = df_chat[
-            (df_chat["score"] >= min_score_val)
-            & (df_chat["score"] <= max_score_val)
-            & (df_chat["n_turns"] >= min_n_turns_val)
-            & (df_chat["len_query"] >= min_len_query_val)
-            & (df_chat["n_tools"] >= min_n_tools_val)
-        ].copy()
-        # Check if dataframe is empty
-        if len(df_filtered) == 0:
-            empty_message = """
-            <div style="
-                padding: 1.5rem;
-                text-align: center;
-                color: var(--text-muted);
-                background-color: var(--surface-color-alt);
-                border-radius: 8px;
-                border: 1px dashed var(--border-color);
-                margin: 1rem 0;">
-                <div style="font-size: 2rem; margin-bottom: 1rem;">📭</div>
-                <div style="font-weight: 500; margin-bottom: 0.5rem;">No Results Found</div>
-                <div style="font-style: italic; font-size: 0.9rem;">Try adjusting your filters to see more data</div>
-            </div>
-            """
-            return (
-                empty_message,
-                empty_message,
-                empty_message,
-                "<div style='text-align: center; color: var(--text-muted);'>0/0</div>",
-            )
-        # Ensure index is valid
-        max_index = len(df_filtered) - 1
-        valid_index = max(0, min(index_val, max_index))
-        # Get the row
-        row = df_filtered.iloc[valid_index]
-        # Format displays
-        chat_html = format_chat_display(row)
-        metrics_html = format_metrics_display(row)
-        # Get tools info with error handling
-        try:
-            tool_html = format_tool_info(row["tools_langchain"])
-        except Exception as e:
-            tool_html = f"""
-            <div style="padding: 1rem; background-color: var(--surface-color-alt); border-radius: 8px; color: var(--text-muted);">
-                <div style="font-weight: 500; margin-bottom: 0.5rem;">Tool Information Unavailable</div>
-                <div style="font-size: 0.9rem;">Error: {str(e)}</div>
-            </div>
-            """
-        # Index display
-        index_html = f"""
-        <div style="
-            display: flex;
-            align-items: center;
-            justify-content: center;
-            font-weight: 500;
-            color: var(--primary-text);
-            background-color: var(--surface-color-alt);
-            padding: 0.5rem 1rem;
-            border-radius: 20px;
-            font-size: 0.9rem;
-            width: fit-content;
-            margin: 0 auto;">
-            <span style="margin-right: 0.5rem;">📄</span>{valid_index + 1}/{len(df_filtered)}
-        </div>
-        """
-        return chat_html, metrics_html, tool_html, index_html
-    except Exception as e:
-        error_html = f"""
-        <div style="
-            padding: 1.5rem;
-            color: var(--score-low);
-            background-color: var(--surface-color);
-            border: 1px solid var(--score-low);
-            border-radius: 8px;
-            margin: 1rem 0;
-            display: flex;
-            align-items: flex-start;">
-            <div style="flex-shrink: 0; margin-right: 1rem; font-size: 1.5rem;">⚠️</div>
-            <div>
-                <div style="font-weight: 600; margin-bottom: 0.5rem;">Error Occurred</div>
-                <div style="
-                    font-family: monospace;
-                    background-color: var(--surface-color-alt);
-                    padding: 1rem;
-                    border-radius: 4px;
-                    white-space: pre-wrap;
-                    font-size: 0.9rem;">
-                    {str(e)}
-                </div>
-            </div>
-        </div>
-        """
-        return (
-            error_html,
-            "<div style='padding: 1.5rem; color: var(--text-muted); text-align: center;'>No metrics available</div>",
-            "<div style='padding: 1.5rem; color: var(--text-muted); text-align: center;'>No tool information available</div>",
-            "<div style='text-align: center; color: var(--text-muted);'>0/0</div>",
-        )
-def create_exploration_tab(df):
-    """Create an enhanced data exploration tab with better UI and functionality."""
-    # Main UI setup
-    with gr.Tab("Data Exploration"):
-        # CSS styling (unchanged)
-        gr.HTML(
-            """
-        <style>
-            /* Custom styling for the exploration tab */
-            :root[data-theme="light"] {
-                --surface-color: #f8f9fa;
-                --surface-color-alt: #ffffff;
-                --text-color: #202124;
-                --text-muted: #666666;
-                --primary-text: #1a73e8;
-                --primary-text-light: rgba(26, 115, 232, 0.3);
-                --border-color: #e9ecef;
-                --border-color-light: #f1f3f5;
-                --shadow-color: rgba(0,0,0,0.05);
-                --message-bg-user: #E5F6FD;
-                --message-bg-assistant: #F7F7F8;
-                --message-bg-system: #FFF3E0;
-                --response-bg: #F0F7FF;
-                --score-high: #1a73e8;
-                --score-med: #f4b400;
-                --score-low: #ea4335;
-            }
-            :root[data-theme="dark"] {
-                --surface-color: #1e1e1e;
-                --surface-color-alt: #2d2d2d;
-                --text-color: #ffffff;
-                --text-muted: #a0a0a0;
-                --primary-text: #60a5fa;
-                --primary-text-light: rgba(96, 165, 250, 0.3);
-                --border-color: #404040;
-                --border-color-light: #333333;
-                --shadow-color: rgba(0,0,0,0.2);
-                --message-bg-user: #2d3748;
-                --message-bg-assistant: #1a1a1a;
-                --message-bg-system: #2c2516;
-                --response-bg: #1e2a3a;
-                --score-high: #60a5fa;
-                --score-med: #fbbf24;
-                --score-low: #ef4444;
-            }
-            #exploration-header {
-                margin-bottom: 1.5rem;
-                padding-bottom: 1rem;
-                border-bottom: 1px solid var(--border-color);
-            }
-            .filter-container {
-                background-color: var(--surface-color);
-                border-radius: 10px;
-                padding: 1rem;
-                margin-bottom: 1.5rem;
-                border: 1px solid var(--border-color);
-                box-shadow: 0 2px 6px var(--shadow-color);
-            }
-            .navigation-buttons button {
-                min-width: 120px;
-                font-weight: 500;
-            }
-            .content-panel {
-                margin-top: 1.5rem;
-            }
-            @media (max-width: 768px) {
-                .filter-row {
-                    flex-direction: column;
-                }
-            }
-        </style>
-        """
-        )
-        # Header
-        with gr.Row(elem_id="exploration-header"):
-            gr.HTML(HEADER_CONTENT)
-        # Filters section
-        with gr.Column(elem_classes="filter-container"):
-            gr.Markdown("### 🔍 Filter Options")
-            with gr.Row(equal_height=True, elem_classes="filter-row"):
-                explore_model = gr.Dropdown(
-                    choices=MODELS,
-                    value=MODELS[0],
-                    label="Model",
-                    container=True,
-                    scale=1,
-                    info="Select AI model",
-                )
-                explore_dataset = gr.Dropdown(
-                    choices=DATASETS,
-                    value=DATASETS[0],
-                    label="Dataset",
-                    container=True,
-                    scale=1,
-                    info="Select evaluation dataset",
-                )
-            with gr.Row(equal_height=True, elem_classes="filter-row"):
-                min_score = gr.Slider(
-                    minimum=float(min(SCORES)),
-                    maximum=float(max(SCORES)),
-                    value=float(min(SCORES)),
-                    step=0.1,
-                    label="Minimum TSQ Score",
-                    container=True,
-                    scale=1,
-                    info="Filter responses with scores above this threshold",
-                )
-                max_score = gr.Slider(
-                    minimum=float(min(SCORES)),
-                    maximum=float(max(SCORES)),
-                    value=float(max(SCORES)),
-                    step=0.1,
-                    label="Maximum TSQ Score",
-                    container=True,
-                    scale=1,
-                    info="Filter responses with scores below this threshold",
-                )
-            # Get the data for initial ranges
-            df_chat = get_chat_and_score_df(explore_model.value, explore_dataset.value)
-            # Ensure columns exist and get ranges
-            n_turns_max = int(df_chat["n_turns"].max())
-            len_query_max = int(df_chat["len_query"].max())
-            n_tools_max = int(df_chat["n_tools"].max())
-            with gr.Row(equal_height=True, elem_classes="filter-row"):
-                n_turns_filter = gr.Slider(
-                    minimum=0,
-                    maximum=n_turns_max,
-                    value=0,
-                    step=1,
-                    label="Minimum Turn Count",
-                    container=True,
-                    scale=1,
-                    info="Filter by minimum number of conversation turns",
-                )
-                len_query_filter = gr.Slider(
-                    minimum=0,
-                    maximum=len_query_max,
-                    value=0,
-                    step=10,
-                    label="Minimum Query Length",
-                    container=True,
-                    scale=1,
-                    info="Filter by minimum length of query in characters",
-                )
-                n_tools_filter = gr.Slider(
-                    minimum=0,
-                    maximum=n_tools_max,
-                    value=0,
-                    step=1,
-                    label="Minimum Tool Count",
-                    container=True,
-                    scale=1,
-                    info="Filter by minimum number of tools used",
-                )
-            with gr.Row():
-                reset_btn = gr.Button("Reset Filters", size="sm", variant="secondary")
-        # Navigation row
-        with gr.Row(variant="panel"):
-            with gr.Column(scale=1):
-                prev_btn = gr.Button(
-                    "← Previous",
-                    size="lg",
-                    variant="secondary",
-                    elem_classes="navigation-buttons",
-                )
-            with gr.Column(scale=1, min_width=100):
-                # Get initial count from default data
-                df_initial = get_chat_and_score_df(MODELS[0], DATASETS[0])
-                initial_count = len(df_initial)
-                index_display = gr.HTML(
-                    value=f"""<div style="
-                        display: flex;
-                        align-items: center;
-                        justify-content: center;
-                        font-weight: 500;
-                        color: var(--primary-text);
-                        background-color: var(--surface-color-alt);
-                        padding: 0.5rem 1rem;
-                        border-radius: 20px;
-                        font-size: 0.9rem;
-                        width: fit-content;
-                        margin: 0 auto;">
-                        <span style="margin-right: 0.5rem;">📄</span>1/{initial_count}
-                    </div>""",
-                    elem_id="index-display",
-                )
-            with gr.Column(scale=1):
-                next_btn = gr.Button(
-                    "Next →",
-                    size="lg",
-                    variant="secondary",
-                    elem_classes="navigation-buttons",
-                )
-        # Content areas
-        with gr.Row(equal_height=True):
-            with gr.Column(scale=1):
-                chat_display = gr.HTML()
-            with gr.Column(scale=1):
-                metrics_display = gr.HTML()
-        with gr.Row():
-            tool_info_display = gr.HTML()
-        # State for tracking current index (simple integer state)
-        current_index = gr.State(value=0)
-        def reset_index():
-            """Reset the current index to 0"""
-            return 0
-        # Add these explicit event handlers for model and dataset changes
-        explore_model.change(
-            reset_index,
-            inputs=[],
-            outputs=[current_index],
-        )
-        explore_dataset.change(
-            reset_index,
-            inputs=[],
-            outputs=[current_index],
-        )
-        min_score.change(
-            reset_index,
-            inputs=[],
-            outputs=[current_index],
-        )
-        max_score.change(
-            reset_index,
-            inputs=[],
-            outputs=[current_index],
-        )
-        n_turns_filter.change(
-            reset_index,
-            inputs=[],
-            outputs=[current_index],
-        )
-        len_query_filter.change(
-            reset_index,
-            inputs=[],
-            outputs=[current_index],
-        )
-        n_tools_filter.change(
-            reset_index,
-            inputs=[],
-            outputs=[current_index],
-        )
-        # Reset filters
-        def reset_filters():
-            return (
-                MODELS[0],
-                DATASETS[0],
-                float(min(SCORES)),
-                float(max(SCORES)),
-                0,  # n_turns
-                0,  # len_query
-                0,  # n_tools
-            )
-        reset_btn.click(
-            reset_filters,
-            outputs=[
-                explore_model,
-                explore_dataset,
-                min_score,
-                max_score,
-                n_turns_filter,
-                len_query_filter,
-                n_tools_filter,
-            ],
-        )
-        # Connect filter changes
-        # Replace the existing filter connections with this:
-        for control in [
-            explore_model,
-            explore_dataset,
-            min_score,
-            max_score,
-            n_turns_filter,
-            len_query_filter,
-            n_tools_filter,
-        ]:
-            control.change(
-                on_filter_change,
-                inputs=[
-                    explore_model,
-                    explore_dataset,
-                    min_score,
-                    max_score,
-                    n_turns_filter,
-                    len_query_filter,
-                    n_tools_filter,
-                ],
-                outputs=[
-                    chat_display,
-                    metrics_display,
-                    tool_info_display,
-                    index_display,
-                ],
-            )
-        # Connect navigation buttons with necessary filter parameters
-        prev_btn.click(
-            navigate_prev,
-            inputs=[
-                current_index,
-                explore_model,
-                explore_dataset,
-                min_score,
-                max_score,
-                n_turns_filter,
-                len_query_filter,
-                n_tools_filter,
-            ],
-            outputs=[
-                chat_display,
-                metrics_display,
-                tool_info_display,
-                index_display,
-                current_index,
-            ],
-        )
-        next_btn.click(
-            navigate_next,
-            inputs=[
-                current_index,
-                explore_model,
-                explore_dataset,
-                min_score,
-                max_score,
-                n_turns_filter,
-                len_query_filter,
-                n_tools_filter,
-            ],
-            outputs=[
-                chat_display,
-                metrics_display,
-                tool_info_display,
-                index_display,
-                current_index,
-            ],
-        )
-        def update_slider_ranges(model, dataset):
-            df_chat = get_chat_and_score_df(model, dataset)
-            # Make sure columns are numeric first
-            df_chat["n_turns"] = pd.to_numeric(
-                df_chat["n_turns"], errors="coerce"
-            ).fillna(0)
-            df_chat["len_query"] = pd.to_numeric(
-                df_chat["len_query"], errors="coerce"
-            ).fillna(0)
-            df_chat["n_tools"] = pd.to_numeric(
-                df_chat["n_tools"], errors="coerce"
-            ).fillna(0)
-            # Calculate maximums with safety buffers
-            n_turns_max = max(1, int(df_chat["n_turns"].max()))
-            len_query_max = max(10, int(df_chat["len_query"].max()))
-            n_tools_max = max(1, int(df_chat["n_tools"].max()))
-            # Return updated sliders using gr.update()
-            return (
-                gr.update(maximum=n_turns_max, value=0),
-                gr.update(maximum=len_query_max, value=0),
-                gr.update(maximum=n_tools_max, value=0),
-            )
-        # Connect model and dataset changes to slider range updates
-        explore_model.change(
-            update_slider_ranges,
-            inputs=[explore_model, explore_dataset],
-            outputs=[n_turns_filter, len_query_filter, n_tools_filter],
-        )
-        explore_dataset.change(
-            update_slider_ranges,
-            inputs=[explore_model, explore_dataset],
-            outputs=[n_turns_filter, len_query_filter, n_tools_filter],
-        )
-        return [
-            chat_display,
-            metrics_display,
-            tool_info_display,
-            index_display,
-        ]

tabs/leaderboard.py CHANGED Viewed

@@ -156,7 +156,7 @@ def filter_leaderboard(df, model_type, category, sort_by):
 def create_leaderboard_tab(df, CATEGORIES, METHODOLOGY, HEADER_CONTENT, CARDS):
-    with gr.Tab("Leaderboard"):
         gr.HTML(HEADER_CONTENT + CARDS)
         gr.HTML(DESCRIPTION_HTML)

 def create_leaderboard_tab(df, CATEGORIES, METHODOLOGY, HEADER_CONTENT, CARDS):
+    with gr.Tab("Leaderboard v1"):
         gr.HTML(HEADER_CONTENT + CARDS)
         gr.HTML(DESCRIPTION_HTML)