File size: 3,356 Bytes
ee1b999
 
 
1971175
ee1b999
 
 
 
 
 
1971175
ee1b999
 
 
 
 
 
11de2f8
a0bf2a2
 
 
 
 
 
 
 
 
 
 
7830648
a0bf2a2
 
b077021
ee1b999
 
 
b077021
ee1b999
 
dcfd58f
 
 
02a4349
dcfd58f
 
 
 
 
 
 
 
 
ee1b999
 
 
 
02a4349
ee1b999
 
 
 
 
 
 
 
 
 
8bd1c00
 
 
ee1b999
 
9181029
dcfd58f
9181029
 
 
dcfd58f
9181029
ee1b999
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import matplotlib
matplotlib.use('Agg')
import gradio as gr


from ui_components import create_leaderboard_display, get_full_leaderboard_data

from content import (
    CITATION_BUTTON_LABEL,
    CITATION_BUTTON_TEXT,
    INTRO_PARAGRAPH
)

# --- Global State for Viewers (simple caching) ---
CACHED_VIEWERS = {}
CACHED_TAG_MAPS = {}

def build_page():
    with gr.Row(elem_id="intro-row"):
        with gr.Column(scale=1):
            gr.HTML(INTRO_PARAGRAPH, elem_id="intro-paragraph")

        with gr.Column(scale=1):
            gr.Image(
                value="assets/overall.svg",
                show_label=False,
                interactive=False,
                show_download_button=False,
                show_fullscreen_button=False,
                show_share_button=False,
                elem_id="diagram-image"
            )

    # --- Leaderboard Display Section ---
    gr.Markdown("---")
    CATEGORY_NAME = "Overall"
    gr.HTML(f'<h2>AstaBench {CATEGORY_NAME} Leaderboard <span style="font-weight: normal; color: inherit;">(Aggregate)</span></h2>', elem_id="main-header")

    with gr.Tabs() as tabs:
        with gr.Tab("Results: Test Set") as test_tab:
            test_df, test_tag_map = get_full_leaderboard_data("test")
            if not test_df.empty:
                gr.Markdown("**Test Set** results are reserved for final assessment. This helps ensure that the agent generalizes well to unseen problems.")
                create_leaderboard_display(
                    full_df=test_df,
                    tag_map=test_tag_map,
                    category_name=CATEGORY_NAME, # Use our constant
                    split_name="test"
                )
            else:
                gr.Markdown("No data available for test split.")
        with gr.Tab("Results: Validation Set") as validation_tab:
            # 1. Load all necessary data for the "validation" split ONCE.
            validation_df, validation_tag_map = get_full_leaderboard_data("validation")
            # Check if data was loaded successfully before trying to display it
            if not validation_df.empty:
                gr.Markdown("**Validation Set** results are used during development to tune and compare agents before final testing.")
                # 2. Render the display by calling the factory with the loaded data.
                create_leaderboard_display(
                    full_df=validation_df,
                    tag_map=validation_tag_map,
                    category_name=CATEGORY_NAME, # Use our constant
                    split_name="validation"
                )
            else:
                gr.Markdown("No data available for validation split.")

    # hiding this for now till we have the real paper data
    # with gr.Accordion("📙 Citation", open=False):
    #     gr.Textbox(value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, elem_id="citation-button-main", interactive=False)


    # JavaScript to show the TEST nav, hide the VALIDATION nav, AND fix the plots.
    show_validation_js = """
        () => {setTimeout(() => { window.dispatchEvent(new Event('resize')) }, 0);}
        """
    # Assign the pure JS functions to the select events. No Python `fn` is needed.
    validation_tab.select(fn=None, inputs=None, outputs=None, js=show_validation_js)

if __name__ == "__main__":
    demo.launch()