File size: 8,659 Bytes
b2c8d29
8b2c873
8553d06
 
 
 
 
 
eeb88fb
 
09497a7
8553d06
eeb88fb
 
 
09497a7
 
8553d06
eeb88fb
8b2c873
14394ad
 
b2c8d29
eeb88fb
 
 
09497a7
eeb88fb
 
 
8553d06
 
b2c8d29
 
8553d06
 
 
 
 
 
 
 
 
 
 
 
b2c8d29
eeb88fb
bc925b6
2a2ba62
4301eca
eeb88fb
14394ad
 
 
 
 
 
 
 
 
eeb88fb
 
 
 
 
 
b2c8d29
8553d06
eeb88fb
f724d2e
eeb88fb
8553d06
 
6a59158
8553d06
 
 
 
eeb88fb
8553d06
 
 
3d5ede1
8553d06
 
4301eca
1f300cb
8553d06
eeb88fb
 
14394ad
 
eeb88fb
 
4301eca
 
 
eeb88fb
 
 
 
 
3d5ede1
4301eca
1f300cb
eeb88fb
 
09497a7
eeb88fb
 
 
14394ad
 
eeb88fb
 
 
 
 
8553d06
eeb88fb
14394ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eeb88fb
 
14394ad
eeb88fb
 
14394ad
eeb88fb
 
14394ad
eeb88fb
 
b2c8d29
8553d06
 
 
be407a0
b2c8d29
8553d06
b2c8d29
 
 
8553d06
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import gradio as gr
from utils import MEGABenchEvalDataLoader
import os
from constants import *

# Get the directory of the current script
current_dir = os.path.dirname(os.path.abspath(__file__))

# Construct paths to CSS files
base_css_file = os.path.join(current_dir, "static", "css", "style.css")
table_css_file = os.path.join(current_dir, "static", "css", "table.css")

# Read CSS files
with open(base_css_file, "r") as f:
    base_css = f.read()
with open(table_css_file, "r") as f:
    table_css = f.read()

# Initialize data loaders
default_loader = MEGABenchEvalDataLoader("./static/eval_results/Default")
# Initialize single image loader only if enabled
si_loader = MEGABenchEvalDataLoader("./static/eval_results/SI") if ENABLE_SINGLE_IMAGE_TABLE else None

with gr.Blocks() as block:
    # Add a style element that we'll update
    css_style = gr.HTML(
        f"<style>{base_css}\n{table_css}</style>",
        visible=False
    )
    
    gr.Markdown(
        LEADERBOARD_INTRODUCTION
    )
    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        with gr.TabItem("πŸ“Š MEGA-Bench", elem_id="qa-tab-table1", id=1):
            with gr.Row():
                with gr.Accordion("Citation", open=False):
                    citation_button = gr.Textbox(
                        value=CITATION_BUTTON_TEXT,
                        label=CITATION_BUTTON_LABEL,
                        elem_id="citation-button",
                        lines=10,
                    )
            gr.Markdown(
                TABLE_INTRODUCTION
            )

            # Define different captions for each table
            default_caption = "**Table 1: MEGA-Bench full results.** The number in the parentheses is the number of tasks of each keyword. <br> The Core set contains $N_{\\text{core}} = 440$ tasks evaluated by rule-based metrics, and the Open-ended set contains $N_{\\text{open}} = 65$ tasks evaluated by a VLM judge (we use GPT-4o-0806). <br> Different from the results in our paper, we only use the Core results with CoT prompting here for clarity and compatibility with the released data. <br> $\\text{Overall} \\ = \\ \\frac{\\text{Core} \\ \\cdot \\ N_{\\text{core}} \\ + \\ \\text{Open-ended} \\ \\cdot \\ N_{\\text{open}}}{N_{\\text{core}} \\ + \\ N_{\\text{open}}}$ <br> * indicates self-reported results from the model authors."

            single_image_caption = "**Table 2: MEGA-Bench Single-image setting results.** The number in the parentheses is the number of tasks in each keyword. <br> This subset contains 273 single-image tasks from the Core set and 42 single-image tasks from the Open-ended set. For open-source models, we drop the image input in the 1-shot demonstration example so that the entire query contains a single image only. <br> Compared to the default table, some models with only single-image support are added."

            with gr.Row():
                # Only show table selector if single image table is enabled
                if ENABLE_SINGLE_IMAGE_TABLE:
                    table_selector = gr.Radio(
                        choices=["Default", "Single Image"],
                        label="Select table to display. Default: all MEGA-Bench tasks; Single Image: single-image tasks only.",
                        value="Default"
                    )

            caption_component = gr.Markdown(
                value=default_caption,
                elem_classes="table-caption",
                latex_delimiters=[{"left": "$", "right": "$", "display": False}],
            )

            with gr.Row():
                super_group_selector = gr.Radio(
                    choices=list(default_loader.SUPER_GROUPS.keys()),
                    label="Select a dimension to display breakdown results. We use different column colors to distinguish the overall benchmark scores and breakdown results.",
                    value=list(default_loader.SUPER_GROUPS.keys())[0]
                )
                model_group_selector = gr.Radio(
                    choices=list(BASE_MODEL_GROUPS.keys()),
                    label="Select a model group",
                    value="All"
                )

            initial_headers, initial_data = default_loader.get_leaderboard_data(list(default_loader.SUPER_GROUPS.keys())[0], "All")
            data_component = gr.Dataframe(
                value=initial_data,
                headers=initial_headers,
                datatype=["number", "html"] + ["number"] * (len(initial_headers) - 2),
                interactive=False,
                elem_classes="custom-dataframe",
                max_height=2400,
                column_widths=["100px", "240px"] + ["160px"] * 3 + ["210px"] * (len(initial_headers) - 5),
            )

            def update_table_and_caption(table_type, super_group, model_group):
                # If single image is disabled, always use default table
                if not ENABLE_SINGLE_IMAGE_TABLE or table_type == "Default":
                    headers, data = default_loader.get_leaderboard_data(super_group, model_group)
                    caption = default_caption
                else:  # Single-image
                    headers, data = si_loader.get_leaderboard_data(super_group, model_group)
                    caption = single_image_caption
                
                return [
                    gr.Dataframe(
                        value=data,
                        headers=headers,
                        datatype=["number", "html"] + ["number"] * (len(headers) - 2),
                        interactive=False,
                        column_widths=["100px", "240px"] + ["160px"] * 3 + ["210px"] * (len(headers) - 5),
                    ),
                    caption,
                    f"<style>{base_css}\n{table_css}</style>"
                ]

            def update_selectors(table_type):
                # If single image is disabled, always use default loader
                loader = default_loader if not ENABLE_SINGLE_IMAGE_TABLE or table_type == "Default" else si_loader
                return [
                    gr.Radio(choices=list(loader.SUPER_GROUPS.keys())),
                    gr.Radio(choices=list(loader.MODEL_GROUPS.keys()))
                ]

            refresh_button = gr.Button("Refresh")
            
            # Set up different handlers based on whether single image table is enabled
            if ENABLE_SINGLE_IMAGE_TABLE:
                refresh_button.click(
                    fn=update_table_and_caption, 
                    inputs=[table_selector, super_group_selector, model_group_selector], 
                    outputs=[data_component, caption_component, css_style]
                )
                
                table_selector.change(
                    fn=update_selectors,
                    inputs=[table_selector],
                    outputs=[super_group_selector, model_group_selector]
                ).then(
                    fn=update_table_and_caption,
                    inputs=[table_selector, super_group_selector, model_group_selector],
                    outputs=[data_component, caption_component, css_style]
                )
            else:
                # Simplified handlers when single image is disabled
                refresh_button.click(
                    fn=lambda super_group, model_group: update_table_and_caption("Default", super_group, model_group), 
                    inputs=[super_group_selector, model_group_selector], 
                    outputs=[data_component, caption_component, css_style]
                )
            
            # These handlers are needed in both cases
            super_group_selector.change(
                fn=update_table_and_caption, 
                inputs=[table_selector if ENABLE_SINGLE_IMAGE_TABLE else gr.State("Default"), super_group_selector, model_group_selector], 
                outputs=[data_component, caption_component, css_style]
            )
            
            model_group_selector.change(
                fn=update_table_and_caption, 
                inputs=[table_selector if ENABLE_SINGLE_IMAGE_TABLE else gr.State("Default"), super_group_selector, model_group_selector], 
                outputs=[data_component, caption_component, css_style]
            )

        with gr.TabItem("πŸ“ Data Information", elem_id="qa-tab-table2", id=2):
            gr.Markdown(DATA_INFO, elem_classes="markdown-text")

        with gr.TabItem("πŸš€ Submit", elem_id="submit-tab", id=3):
            with gr.Row():
                gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")



if __name__ == "__main__":
    block.launch(share=True)