Spaces:
Running
Running
File size: 3,356 Bytes
ee1b999 1971175 ee1b999 1971175 ee1b999 11de2f8 a0bf2a2 7830648 a0bf2a2 b077021 ee1b999 b077021 ee1b999 dcfd58f 02a4349 dcfd58f ee1b999 02a4349 ee1b999 8bd1c00 ee1b999 9181029 dcfd58f 9181029 dcfd58f 9181029 ee1b999 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import matplotlib
matplotlib.use('Agg')
import gradio as gr
from ui_components import create_leaderboard_display, get_full_leaderboard_data
from content import (
CITATION_BUTTON_LABEL,
CITATION_BUTTON_TEXT,
INTRO_PARAGRAPH
)
# --- Global State for Viewers (simple caching) ---
CACHED_VIEWERS = {}
CACHED_TAG_MAPS = {}
def build_page():
with gr.Row(elem_id="intro-row"):
with gr.Column(scale=1):
gr.HTML(INTRO_PARAGRAPH, elem_id="intro-paragraph")
with gr.Column(scale=1):
gr.Image(
value="assets/overall.svg",
show_label=False,
interactive=False,
show_download_button=False,
show_fullscreen_button=False,
show_share_button=False,
elem_id="diagram-image"
)
# --- Leaderboard Display Section ---
gr.Markdown("---")
CATEGORY_NAME = "Overall"
gr.HTML(f'<h2>AstaBench {CATEGORY_NAME} Leaderboard <span style="font-weight: normal; color: inherit;">(Aggregate)</span></h2>', elem_id="main-header")
with gr.Tabs() as tabs:
with gr.Tab("Results: Test Set") as test_tab:
test_df, test_tag_map = get_full_leaderboard_data("test")
if not test_df.empty:
gr.Markdown("**Test Set** results are reserved for final assessment. This helps ensure that the agent generalizes well to unseen problems.")
create_leaderboard_display(
full_df=test_df,
tag_map=test_tag_map,
category_name=CATEGORY_NAME, # Use our constant
split_name="test"
)
else:
gr.Markdown("No data available for test split.")
with gr.Tab("Results: Validation Set") as validation_tab:
# 1. Load all necessary data for the "validation" split ONCE.
validation_df, validation_tag_map = get_full_leaderboard_data("validation")
# Check if data was loaded successfully before trying to display it
if not validation_df.empty:
gr.Markdown("**Validation Set** results are used during development to tune and compare agents before final testing.")
# 2. Render the display by calling the factory with the loaded data.
create_leaderboard_display(
full_df=validation_df,
tag_map=validation_tag_map,
category_name=CATEGORY_NAME, # Use our constant
split_name="validation"
)
else:
gr.Markdown("No data available for validation split.")
# hiding this for now till we have the real paper data
# with gr.Accordion("📙 Citation", open=False):
# gr.Textbox(value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, elem_id="citation-button-main", interactive=False)
# JavaScript to show the TEST nav, hide the VALIDATION nav, AND fix the plots.
show_validation_js = """
() => {setTimeout(() => { window.dispatchEvent(new Event('resize')) }, 0);}
"""
# Assign the pure JS functions to the select events. No Python `fn` is needed.
validation_tab.select(fn=None, inputs=None, outputs=None, js=show_validation_js)
if __name__ == "__main__":
demo.launch() |