GIFT-Eval

Running

Taha Aksu commited on 25 days ago

Commit

1895436

1 Parent(s): b6bb7c3

Change column name to replication code

Files changed (3) hide show

app.py CHANGED Viewed

@@ -171,7 +171,7 @@ def init_leaderboard(ori_dataframe, model_info_df, sort_val: str | list | None =
         filter_columns=[
             ColumnFilter(ModelInfoColumn.model_type.name, type="checkboxgroup", label="Model types"),
             ColumnFilter(ModelInfoColumn.testdata_leakage.name, type="checkboxgroup", label="Test Leak."),
-            ColumnFilter(ModelInfoColumn.replication_code_available.name, type="checkboxgroup", label="Eval Code"),
         ],
         # bool_checkboxgroup_label="",
         column_widths=[30, 180] + [130 for _ in range(len(merged_df.columns)-2)],

         filter_columns=[
             ColumnFilter(ModelInfoColumn.model_type.name, type="checkboxgroup", label="Model types"),
             ColumnFilter(ModelInfoColumn.testdata_leakage.name, type="checkboxgroup", label="Test Leak."),
+            ColumnFilter(ModelInfoColumn.replication_code_available.name, type="checkboxgroup", label="Replication Code"),
         ],
         # bool_checkboxgroup_label="",
         column_widths=[30, 180] + [130 for _ in range(len(merged_df.columns)-2)],

src/about.py CHANGED Viewed

@@ -45,7 +45,7 @@ LLM_BENCHMARKS_TEXT = f"""
 ## Update Log
 ### 2025-10-17
-- Added new column: Repro. Code to indicate whether the model's evaluation code is made available. This column is a binary indicator specifying whether the model's evaluation code is made available to the public by the submission author. The preferable way to share the evaluation code is to share a notebook in the GIFT-Eval github repository (as many previous submissions have done), but a standalone repo for the evaluation code is also acceptable as long as it is accessible to the public and the link is provided in the config.json file.
 ### 2025-08-25
 - Added new model type: Zero-shot to distinguish between foundation model submissions that don't use training data of GIFT-Eval. Now models tagged with zero-shot indicate that the model is not trained on the GIFT-Eval training data. Test data leakage is still separately tracked with the TestData Leakage column. For a model be tagged as `zero-shot`, it must both not have test data leakage and not use any training split from GIFT-Eval.

 ## Update Log
 ### 2025-10-17
+- Added new column: Replication Code to indicate whether the model's evaluation code is made available. This column is a binary indicator specifying whether the model's evaluation code is made available to the public by the submission author. The preferable way to share the evaluation code is to share a notebook in the GIFT-Eval github repository (as many previous submissions have done), but a standalone repo for the evaluation code is also acceptable as long as it is accessible to the public and the link is provided in the config.json file.
 ### 2025-08-25
 - Added new model type: Zero-shot to distinguish between foundation model submissions that don't use training data of GIFT-Eval. Now models tagged with zero-shot indicate that the model is not trained on the GIFT-Eval training data. Test data leakage is still separately tracked with the TestData Leakage column. For a model be tagged as `zero-shot`, it must both not have test data leakage and not use any training split from GIFT-Eval.

src/display/utils.py CHANGED Viewed

@@ -36,7 +36,7 @@ model_info_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "num
 model_info_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
 model_info_dict.append(["org", ColumnContent, ColumnContent("Organization", "str", True, hidden=False)])
 model_info_dict.append(["testdata_leakage", ColumnContent, ColumnContent("Test Leak.", "str", True, hidden=False)])
-model_info_dict.append(["replication_code_available", ColumnContent, ColumnContent("Eval Code", "str", True, hidden=False)])
 # model_info_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
 # We use make dataclass to dynamically fill the scores from Tasks

 model_info_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
 model_info_dict.append(["org", ColumnContent, ColumnContent("Organization", "str", True, hidden=False)])
 model_info_dict.append(["testdata_leakage", ColumnContent, ColumnContent("Test Leak.", "str", True, hidden=False)])
+model_info_dict.append(["replication_code_available", ColumnContent, ColumnContent("Replication Code", "str", True, hidden=False)])
 # model_info_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
 # We use make dataclass to dynamically fill the scores from Tasks