Spaces:

mib-bench
/

leaderboard

Running

App Files Files Community

jasonshaoshun commited on Apr 23

Commit

a2e0e8f

1 Parent(s): 2ba536b

Update causal graph page to display separate views for average and highest values

Browse files

Files changed (2) hide show

app.py +35 -7
src/leaderboard/read_evals.py +1 -1

app.py CHANGED Viewed

@@ -681,12 +681,12 @@ with demo:
         # Then modify the Causal Graph tab section
         with gr.TabItem("Causal Variable Localization", elem_id="causalgraph", id=1):
             with gr.Tabs() as causalgraph_tabs:
-                with gr.TabItem("Detailed View", id=0):
-                    leaderboard_detailed, data = init_leaderboard_mib_causalgraph(
-                        LEADERBOARD_DF_MIB_CAUSALGRAPH_DETAILED,
-                        "Causal Graph"
-                    )
-                with gr.TabItem("Aggregated View", id=1):
                     gr.Markdown("""
                     ### Filtering Options
                     Use the dropdown menus below to filter results by specific tasks or models.
@@ -718,11 +718,39 @@ with demo:
                         inputs=[original_leaderboard, task_substring_checkbox, model_substring_checkbox],
                         outputs=leaderboard_aggregated
                     )
-                with gr.TabItem("Intervention Averaged", id=2):
                     leaderboard_averaged, data = init_leaderboard_mib_causalgraph(
                         LEADERBOARD_DF_MIB_CAUSALGRAPH_AVERAGED,
                         "Causal Graph"
                     )
         with gr.TabItem("Submit", elem_id="llm-benchmark-tab-table", id=2):
             # Track selection

         # Then modify the Causal Graph tab section
         with gr.TabItem("Causal Variable Localization", elem_id="causalgraph", id=1):
             with gr.Tabs() as causalgraph_tabs:
+                # with gr.TabItem("Detailed View", id=0):
+                #     leaderboard_detailed, data = init_leaderboard_mib_causalgraph(
+                #         LEADERBOARD_DF_MIB_CAUSALGRAPH_DETAILED,
+                #         "Causal Graph"
+                #     )
+                with gr.TabItem("Highest View", id=0):
                     gr.Markdown("""
                     ### Filtering Options
                     Use the dropdown menus below to filter results by specific tasks or models.
                         inputs=[original_leaderboard, task_substring_checkbox, model_substring_checkbox],
                         outputs=leaderboard_aggregated
                     )
+                with gr.TabItem("Averaged View", id=1):
+                    task_substring_checkbox = gr.CheckboxGroup(
+                        choices=TASK_SUBSTRINGS,
+                        label="View tasks:",
+                        value=TASK_SUBSTRINGS,  # Default to all substrings selected
+                    )
+                    model_substring_checkbox = gr.CheckboxGroup(
+                        choices = MODEL_SUBSTRINGS,
+                        label = "View models:",
+                        value = MODEL_SUBSTRINGS
+                    )
                     leaderboard_averaged, data = init_leaderboard_mib_causalgraph(
                         LEADERBOARD_DF_MIB_CAUSALGRAPH_AVERAGED,
                         "Causal Graph"
                     )
+                    original_leaderboard = gr.State(value=data)
+                    task_substring_checkbox.change(
+                        fn=update_leaderboard,
+                        inputs=[original_leaderboard, task_substring_checkbox, model_substring_checkbox],
+                        outputs=leaderboard_averaged
+                    )
+                    model_substring_checkbox.change(
+                        fn=update_leaderboard,
+                        inputs=[original_leaderboard, task_substring_checkbox, model_substring_checkbox],
+                        outputs=leaderboard_averaged
+                    )
+                    # leaderboard_averaged, data = init_leaderboard_mib_causalgraph(
+                    #     LEADERBOARD_DF_MIB_CAUSALGRAPH_AVERAGED,
+                    #     "Causal Graph"
+                    # )
         with gr.TabItem("Submit", elem_id="llm-benchmark-tab-table", id=2):
             # Track selection

src/leaderboard/read_evals.py CHANGED Viewed

@@ -508,7 +508,7 @@ def get_raw_eval_results_mib_causalgraph(results_path: str) -> Tuple[pd.DataFram
     # intervention_averaged_mean_df = create_intervention_averaged_df(detailed_df_mean)
     # return detailed_df_highest, detailed_df_mean, intervention_averaged_highest_df
-    return detailed_df_highest, detailed_df_mean, detailed_df_mean

     # intervention_averaged_mean_df = create_intervention_averaged_df(detailed_df_mean)
     # return detailed_df_highest, detailed_df_mean, intervention_averaged_highest_df
+    return detailed_df_highest, detailed_df_highest, detailed_df_mean