Spaces:
Running
Running
Taha Aksu
commited on
Commit
·
1895436
1
Parent(s):
b6bb7c3
Change column name to replication code
Browse files- app.py +1 -1
- src/about.py +1 -1
- src/display/utils.py +1 -1
app.py
CHANGED
|
@@ -171,7 +171,7 @@ def init_leaderboard(ori_dataframe, model_info_df, sort_val: str | list | None =
|
|
| 171 |
filter_columns=[
|
| 172 |
ColumnFilter(ModelInfoColumn.model_type.name, type="checkboxgroup", label="Model types"),
|
| 173 |
ColumnFilter(ModelInfoColumn.testdata_leakage.name, type="checkboxgroup", label="Test Leak."),
|
| 174 |
-
ColumnFilter(ModelInfoColumn.replication_code_available.name, type="checkboxgroup", label="
|
| 175 |
],
|
| 176 |
# bool_checkboxgroup_label="",
|
| 177 |
column_widths=[30, 180] + [130 for _ in range(len(merged_df.columns)-2)],
|
|
|
|
| 171 |
filter_columns=[
|
| 172 |
ColumnFilter(ModelInfoColumn.model_type.name, type="checkboxgroup", label="Model types"),
|
| 173 |
ColumnFilter(ModelInfoColumn.testdata_leakage.name, type="checkboxgroup", label="Test Leak."),
|
| 174 |
+
ColumnFilter(ModelInfoColumn.replication_code_available.name, type="checkboxgroup", label="Replication Code"),
|
| 175 |
],
|
| 176 |
# bool_checkboxgroup_label="",
|
| 177 |
column_widths=[30, 180] + [130 for _ in range(len(merged_df.columns)-2)],
|
src/about.py
CHANGED
|
@@ -45,7 +45,7 @@ LLM_BENCHMARKS_TEXT = f"""
|
|
| 45 |
## Update Log
|
| 46 |
|
| 47 |
### 2025-10-17
|
| 48 |
-
- Added new column:
|
| 49 |
|
| 50 |
### 2025-08-25
|
| 51 |
- Added new model type: Zero-shot to distinguish between foundation model submissions that don't use training data of GIFT-Eval. Now models tagged with zero-shot indicate that the model is not trained on the GIFT-Eval training data. Test data leakage is still separately tracked with the TestData Leakage column. For a model be tagged as `zero-shot`, it must both not have test data leakage and not use any training split from GIFT-Eval.
|
|
|
|
| 45 |
## Update Log
|
| 46 |
|
| 47 |
### 2025-10-17
|
| 48 |
+
- Added new column: Replication Code to indicate whether the model's evaluation code is made available. This column is a binary indicator specifying whether the model's evaluation code is made available to the public by the submission author. The preferable way to share the evaluation code is to share a notebook in the GIFT-Eval github repository (as many previous submissions have done), but a standalone repo for the evaluation code is also acceptable as long as it is accessible to the public and the link is provided in the config.json file.
|
| 49 |
|
| 50 |
### 2025-08-25
|
| 51 |
- Added new model type: Zero-shot to distinguish between foundation model submissions that don't use training data of GIFT-Eval. Now models tagged with zero-shot indicate that the model is not trained on the GIFT-Eval training data. Test data leakage is still separately tracked with the TestData Leakage column. For a model be tagged as `zero-shot`, it must both not have test data leakage and not use any training split from GIFT-Eval.
|
src/display/utils.py
CHANGED
|
@@ -36,7 +36,7 @@ model_info_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "num
|
|
| 36 |
model_info_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
|
| 37 |
model_info_dict.append(["org", ColumnContent, ColumnContent("Organization", "str", True, hidden=False)])
|
| 38 |
model_info_dict.append(["testdata_leakage", ColumnContent, ColumnContent("Test Leak.", "str", True, hidden=False)])
|
| 39 |
-
model_info_dict.append(["replication_code_available", ColumnContent, ColumnContent("
|
| 40 |
# model_info_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
| 41 |
|
| 42 |
# We use make dataclass to dynamically fill the scores from Tasks
|
|
|
|
| 36 |
model_info_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
|
| 37 |
model_info_dict.append(["org", ColumnContent, ColumnContent("Organization", "str", True, hidden=False)])
|
| 38 |
model_info_dict.append(["testdata_leakage", ColumnContent, ColumnContent("Test Leak.", "str", True, hidden=False)])
|
| 39 |
+
model_info_dict.append(["replication_code_available", ColumnContent, ColumnContent("Replication Code", "str", True, hidden=False)])
|
| 40 |
# model_info_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
| 41 |
|
| 42 |
# We use make dataclass to dynamically fill the scores from Tasks
|