Spaces:
Running
Running
jasonshaoshun
commited on
Commit
Β·
f585ea0
1
Parent(s):
5bcfeb8
debug
Browse files- app.py +27 -3
- src/populate.py +11 -14
app.py
CHANGED
|
@@ -74,7 +74,15 @@ except Exception:
|
|
| 74 |
|
| 75 |
LEADERBOARD_DF_MIB_SUBGRAPH = get_leaderboard_df_mib_subgraph(EVAL_RESULTS_MIB_SUBGRAPH_PATH, EVAL_REQUESTS_PATH, COLS_MIB_SUBGRAPH, BENCHMARK_COLS_MIB_SUBGRAPH)
|
| 76 |
|
| 77 |
-
LEADERBOARD_DF_MIB_CAUSALGRAPH = get_leaderboard_df_mib_causalgraph(EVAL_RESULTS_MIB_CAUSALGRAPH_PATH, EVAL_REQUESTS_PATH, COLS_MIB_CAUSALGRAPH, BENCHMARK_COLS_MIB_CAUSALGRAPH)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
# LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
| 80 |
# LEADERBOARD_DF_MULTIMODAL = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS_MULTIMODAL, BENCHMARK_COLS_MULTIMODAL)
|
|
@@ -202,9 +210,25 @@ with demo:
|
|
| 202 |
|
| 203 |
with gr.TabItem("Subgraph", elem_id="subgraph", id=0):
|
| 204 |
leaderboard = init_leaderboard_mib_subgraph(LEADERBOARD_DF_MIB_SUBGRAPH, "Subgraph")
|
| 205 |
-
|
|
|
|
| 206 |
with gr.TabItem("Causal Graph", elem_id="causalgraph", id=1):
|
| 207 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
|
| 209 |
# with gr.Row():
|
| 210 |
# with gr.Accordion("π Citation", open=False):
|
|
|
|
| 74 |
|
| 75 |
LEADERBOARD_DF_MIB_SUBGRAPH = get_leaderboard_df_mib_subgraph(EVAL_RESULTS_MIB_SUBGRAPH_PATH, EVAL_REQUESTS_PATH, COLS_MIB_SUBGRAPH, BENCHMARK_COLS_MIB_SUBGRAPH)
|
| 76 |
|
| 77 |
+
# LEADERBOARD_DF_MIB_CAUSALGRAPH = get_leaderboard_df_mib_causalgraph(EVAL_RESULTS_MIB_CAUSALGRAPH_PATH, EVAL_REQUESTS_PATH, COLS_MIB_CAUSALGRAPH, BENCHMARK_COLS_MIB_CAUSALGRAPH)
|
| 78 |
+
|
| 79 |
+
# In app.py, modify the LEADERBOARD initialization
|
| 80 |
+
LEADERBOARD_DF_MIB_CAUSALGRAPH_DETAILED, LEADERBOARD_DF_MIB_CAUSALGRAPH_AGGREGATED, LEADERBOARD_DF_MIB_CAUSALGRAPH_AVERAGED = get_leaderboard_df_mib_causalgraph(
|
| 81 |
+
EVAL_RESULTS_MIB_CAUSALGRAPH_PATH,
|
| 82 |
+
EVAL_REQUESTS_PATH,
|
| 83 |
+
COLS_MIB_CAUSALGRAPH,
|
| 84 |
+
BENCHMARK_COLS_MIB_CAUSALGRAPH
|
| 85 |
+
)
|
| 86 |
|
| 87 |
# LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
| 88 |
# LEADERBOARD_DF_MULTIMODAL = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS_MULTIMODAL, BENCHMARK_COLS_MULTIMODAL)
|
|
|
|
| 210 |
|
| 211 |
with gr.TabItem("Subgraph", elem_id="subgraph", id=0):
|
| 212 |
leaderboard = init_leaderboard_mib_subgraph(LEADERBOARD_DF_MIB_SUBGRAPH, "Subgraph")
|
| 213 |
+
|
| 214 |
+
# Then modify the Causal Graph tab section
|
| 215 |
with gr.TabItem("Causal Graph", elem_id="causalgraph", id=1):
|
| 216 |
+
with gr.Tabs() as causalgraph_tabs:
|
| 217 |
+
with gr.TabItem("Detailed View", id=0):
|
| 218 |
+
leaderboard_detailed = init_leaderboard_mib_causalgraph(
|
| 219 |
+
LEADERBOARD_DF_MIB_CAUSALGRAPH_DETAILED,
|
| 220 |
+
"Causal Graph"
|
| 221 |
+
)
|
| 222 |
+
with gr.TabItem("Aggregated View", id=1):
|
| 223 |
+
leaderboard_aggregated = init_leaderboard_mib_causalgraph(
|
| 224 |
+
LEADERBOARD_DF_MIB_CAUSALGRAPH_AGGREGATED,
|
| 225 |
+
"Causal Graph"
|
| 226 |
+
)
|
| 227 |
+
with gr.TabItem("Intervention Averaged", id=2):
|
| 228 |
+
leaderboard_averaged = init_leaderboard_mib_causalgraph(
|
| 229 |
+
LEADERBOARD_DF_MIB_CAUSALGRAPH_AVERAGED,
|
| 230 |
+
"Causal Graph"
|
| 231 |
+
)
|
| 232 |
|
| 233 |
# with gr.Row():
|
| 234 |
# with gr.Accordion("π Citation", open=False):
|
src/populate.py
CHANGED
|
@@ -110,26 +110,23 @@ def create_intervention_averaged_df(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 110 |
return averaged_df
|
| 111 |
|
| 112 |
|
| 113 |
-
def get_leaderboard_df_mib_causalgraph(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
|
| 114 |
-
"""Creates
|
| 115 |
-
print(f"
|
| 116 |
raw_data = get_raw_eval_results_mib_causalgraph(results_path, requests_path)
|
| 117 |
-
print(f"
|
| 118 |
-
|
| 119 |
-
if not raw_data:
|
| 120 |
-
print("Warning: raw_data is empty")
|
| 121 |
-
return pd.DataFrame()
|
| 122 |
|
| 123 |
# Convert each result to dict format for detailed df
|
| 124 |
all_data_json = [v.to_dict() for v in raw_data]
|
| 125 |
-
print(f"Length of all_data_json: {len(all_data_json)}")
|
| 126 |
-
print(f"First entry of all_data_json: {all_data_json[0] if all_data_json else None}")
|
| 127 |
-
|
| 128 |
detailed_df = pd.DataFrame.from_records(all_data_json)
|
| 129 |
-
print(f"Shape of detailed_df: {detailed_df.shape}")
|
| 130 |
-
print(f"Columns in detailed_df: {detailed_df.columns.tolist()}")
|
| 131 |
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
|
| 134 |
|
| 135 |
def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|
|
|
|
| 110 |
return averaged_df
|
| 111 |
|
| 112 |
|
| 113 |
+
def get_leaderboard_df_mib_causalgraph(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
|
| 114 |
+
"""Creates three dataframes from all the MIB causal graph experiment results"""
|
| 115 |
+
print(f"results_path is {results_path}, requests_path is {requests_path}")
|
| 116 |
raw_data = get_raw_eval_results_mib_causalgraph(results_path, requests_path)
|
| 117 |
+
print(f"raw_data is {raw_data}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
|
| 119 |
# Convert each result to dict format for detailed df
|
| 120 |
all_data_json = [v.to_dict() for v in raw_data]
|
|
|
|
|
|
|
|
|
|
| 121 |
detailed_df = pd.DataFrame.from_records(all_data_json)
|
|
|
|
|
|
|
| 122 |
|
| 123 |
+
# Create aggregated df
|
| 124 |
+
aggregated_df = aggregate_methods(detailed_df)
|
| 125 |
+
|
| 126 |
+
# Create intervention-averaged df
|
| 127 |
+
intervention_averaged_df = create_intervention_averaged_df(aggregated_df)
|
| 128 |
+
|
| 129 |
+
return detailed_df, aggregated_df, intervention_averaged_df
|
| 130 |
|
| 131 |
|
| 132 |
def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|