Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
t0-0
commited on
Commit
·
0109b82
1
Parent(s):
5b66510
Display llm-jp-eval version and backend library
Browse files- app.py +36 -0
- src/display/utils.py +26 -0
- src/leaderboard/read_evals.py +10 -1
app.py
CHANGED
|
@@ -26,9 +26,11 @@ from src.display.utils import (
|
|
| 26 |
TYPES,
|
| 27 |
AddSpecialTokens,
|
| 28 |
AutoEvalColumn,
|
|
|
|
| 29 |
ModelType,
|
| 30 |
NumFewShots,
|
| 31 |
Precision,
|
|
|
|
| 32 |
fields,
|
| 33 |
)
|
| 34 |
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO
|
|
@@ -75,6 +77,8 @@ def filter_models(
|
|
| 75 |
precision_query: list,
|
| 76 |
add_special_tokens_query: list,
|
| 77 |
num_few_shots_query: list,
|
|
|
|
|
|
|
| 78 |
) -> pd.DataFrame:
|
| 79 |
print(f"Initial df shape: {df.shape}")
|
| 80 |
print(f"Initial df content:\n{df}")
|
|
@@ -110,6 +114,14 @@ def filter_models(
|
|
| 110 |
]
|
| 111 |
print(f"After num_few_shots filter: {filtered_df.shape}")
|
| 112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
print("Filtered dataframe head:")
|
| 114 |
print(filtered_df.head())
|
| 115 |
return filtered_df
|
|
@@ -177,6 +189,8 @@ def update_table(
|
|
| 177 |
size_query: list,
|
| 178 |
add_special_tokens_query: list,
|
| 179 |
num_few_shots_query: list,
|
|
|
|
|
|
|
| 180 |
query: str,
|
| 181 |
):
|
| 182 |
print(
|
|
@@ -191,6 +205,8 @@ def update_table(
|
|
| 191 |
precision_query,
|
| 192 |
add_special_tokens_query,
|
| 193 |
num_few_shots_query,
|
|
|
|
|
|
|
| 194 |
)
|
| 195 |
print(f"filtered_df shape after filter_models: {filtered_df.shape}")
|
| 196 |
|
|
@@ -236,6 +252,8 @@ leaderboard_df = filter_models(
|
|
| 236 |
[i.value.name for i in Precision],
|
| 237 |
[i.value.name for i in AddSpecialTokens],
|
| 238 |
[i.value.name for i in NumFewShots],
|
|
|
|
|
|
|
| 239 |
)
|
| 240 |
|
| 241 |
leaderboard_df_filtered = filter_models(
|
|
@@ -245,6 +263,8 @@ leaderboard_df_filtered = filter_models(
|
|
| 245 |
[i.value.name for i in Precision],
|
| 246 |
[i.value.name for i in AddSpecialTokens],
|
| 247 |
[i.value.name for i in NumFewShots],
|
|
|
|
|
|
|
| 248 |
)
|
| 249 |
|
| 250 |
# DataFrameの初期化部分のみを修正
|
|
@@ -309,6 +329,18 @@ with gr.Blocks() as demo_leaderboard:
|
|
| 309 |
value=[i.value.name for i in NumFewShots],
|
| 310 |
elem_id="filter-columns-num-few-shots",
|
| 311 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 312 |
|
| 313 |
# DataFrameコンポーネントの初期化
|
| 314 |
leaderboard_table = gr.Dataframe(
|
|
@@ -340,6 +372,8 @@ with gr.Blocks() as demo_leaderboard:
|
|
| 340 |
filter_columns_size.change,
|
| 341 |
filter_columns_add_special_tokens.change,
|
| 342 |
filter_columns_num_few_shots.change,
|
|
|
|
|
|
|
| 343 |
search_bar.submit,
|
| 344 |
],
|
| 345 |
fn=update_table,
|
|
@@ -351,6 +385,8 @@ with gr.Blocks() as demo_leaderboard:
|
|
| 351 |
filter_columns_size,
|
| 352 |
filter_columns_add_special_tokens,
|
| 353 |
filter_columns_num_few_shots,
|
|
|
|
|
|
|
| 354 |
search_bar,
|
| 355 |
],
|
| 356 |
outputs=leaderboard_table,
|
|
|
|
| 26 |
TYPES,
|
| 27 |
AddSpecialTokens,
|
| 28 |
AutoEvalColumn,
|
| 29 |
+
Backend,
|
| 30 |
ModelType,
|
| 31 |
NumFewShots,
|
| 32 |
Precision,
|
| 33 |
+
Version,
|
| 34 |
fields,
|
| 35 |
)
|
| 36 |
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO
|
|
|
|
| 77 |
precision_query: list,
|
| 78 |
add_special_tokens_query: list,
|
| 79 |
num_few_shots_query: list,
|
| 80 |
+
version_query: list,
|
| 81 |
+
backend_query: list,
|
| 82 |
) -> pd.DataFrame:
|
| 83 |
print(f"Initial df shape: {df.shape}")
|
| 84 |
print(f"Initial df content:\n{df}")
|
|
|
|
| 114 |
]
|
| 115 |
print(f"After num_few_shots filter: {filtered_df.shape}")
|
| 116 |
|
| 117 |
+
# Version フィルタリング
|
| 118 |
+
filtered_df = filtered_df[filtered_df["llm-jp-eval version"].isin(version_query)]
|
| 119 |
+
print(f"After version filter: {filtered_df.shape}")
|
| 120 |
+
|
| 121 |
+
# Backend フィルタリング
|
| 122 |
+
filtered_df = filtered_df[filtered_df["Backend Library"].isin(backend_query)]
|
| 123 |
+
print(f"After backend filter: {filtered_df.shape}")
|
| 124 |
+
|
| 125 |
print("Filtered dataframe head:")
|
| 126 |
print(filtered_df.head())
|
| 127 |
return filtered_df
|
|
|
|
| 189 |
size_query: list,
|
| 190 |
add_special_tokens_query: list,
|
| 191 |
num_few_shots_query: list,
|
| 192 |
+
version_query: list,
|
| 193 |
+
backend_query: list,
|
| 194 |
query: str,
|
| 195 |
):
|
| 196 |
print(
|
|
|
|
| 205 |
precision_query,
|
| 206 |
add_special_tokens_query,
|
| 207 |
num_few_shots_query,
|
| 208 |
+
version_query,
|
| 209 |
+
backend_query,
|
| 210 |
)
|
| 211 |
print(f"filtered_df shape after filter_models: {filtered_df.shape}")
|
| 212 |
|
|
|
|
| 252 |
[i.value.name for i in Precision],
|
| 253 |
[i.value.name for i in AddSpecialTokens],
|
| 254 |
[i.value.name for i in NumFewShots],
|
| 255 |
+
[i.value.name for i in Version],
|
| 256 |
+
[i.value.name for i in Backend],
|
| 257 |
)
|
| 258 |
|
| 259 |
leaderboard_df_filtered = filter_models(
|
|
|
|
| 263 |
[i.value.name for i in Precision],
|
| 264 |
[i.value.name for i in AddSpecialTokens],
|
| 265 |
[i.value.name for i in NumFewShots],
|
| 266 |
+
[i.value.name for i in Version],
|
| 267 |
+
[i.value.name for i in Backend],
|
| 268 |
)
|
| 269 |
|
| 270 |
# DataFrameの初期化部分のみを修正
|
|
|
|
| 329 |
value=[i.value.name for i in NumFewShots],
|
| 330 |
elem_id="filter-columns-num-few-shots",
|
| 331 |
)
|
| 332 |
+
filter_columns_version = gr.CheckboxGroup(
|
| 333 |
+
label="Version",
|
| 334 |
+
choices=[i.value.name for i in Version],
|
| 335 |
+
value=[i.value.name for i in Version],
|
| 336 |
+
elem_id="filter-columns-version",
|
| 337 |
+
)
|
| 338 |
+
filter_columns_backend = gr.CheckboxGroup(
|
| 339 |
+
label="Backend",
|
| 340 |
+
choices=[i.value.name for i in Backend],
|
| 341 |
+
value=[i.value.name for i in Backend],
|
| 342 |
+
elem_id="filter-columns-backend",
|
| 343 |
+
)
|
| 344 |
|
| 345 |
# DataFrameコンポーネントの初期化
|
| 346 |
leaderboard_table = gr.Dataframe(
|
|
|
|
| 372 |
filter_columns_size.change,
|
| 373 |
filter_columns_add_special_tokens.change,
|
| 374 |
filter_columns_num_few_shots.change,
|
| 375 |
+
filter_columns_version.change,
|
| 376 |
+
filter_columns_backend.change,
|
| 377 |
search_bar.submit,
|
| 378 |
],
|
| 379 |
fn=update_table,
|
|
|
|
| 385 |
filter_columns_size,
|
| 386 |
filter_columns_add_special_tokens,
|
| 387 |
filter_columns_num_few_shots,
|
| 388 |
+
filter_columns_version,
|
| 389 |
+
filter_columns_backend,
|
| 390 |
search_bar,
|
| 391 |
],
|
| 392 |
outputs=leaderboard_table,
|
src/display/utils.py
CHANGED
|
@@ -44,6 +44,10 @@ auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Avai
|
|
| 44 |
auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
| 45 |
auto_eval_column_dict.append(["num_few_shots", ColumnContent, ColumnContent("Few-shot", "str", False)])
|
| 46 |
auto_eval_column_dict.append(["add_special_tokens", ColumnContent, ColumnContent("Add Special Tokens", "bool", False)])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
|
| 48 |
|
| 49 |
# We use make dataclass to dynamically fill the scores from Tasks
|
|
@@ -130,6 +134,28 @@ class NumFewShots(Enum):
|
|
| 130 |
return NumFewShots.Unknown
|
| 131 |
|
| 132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
# Column selection
|
| 134 |
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
|
| 135 |
TYPES = [c.type for c in fields(AutoEvalColumn)]
|
|
|
|
| 44 |
auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
| 45 |
auto_eval_column_dict.append(["num_few_shots", ColumnContent, ColumnContent("Few-shot", "str", False)])
|
| 46 |
auto_eval_column_dict.append(["add_special_tokens", ColumnContent, ColumnContent("Add Special Tokens", "bool", False)])
|
| 47 |
+
auto_eval_column_dict.append(
|
| 48 |
+
["llm_jp_eval_version", ColumnContent, ColumnContent("llm-jp-eval version", "str", False)]
|
| 49 |
+
)
|
| 50 |
+
auto_eval_column_dict.append(["backend", ColumnContent, ColumnContent("Backend Library", "str", False)])
|
| 51 |
auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
|
| 52 |
|
| 53 |
# We use make dataclass to dynamically fill the scores from Tasks
|
|
|
|
| 134 |
return NumFewShots.Unknown
|
| 135 |
|
| 136 |
|
| 137 |
+
class Version(Enum):
|
| 138 |
+
v1_4_1 = ModelDetails("v1.4.1")
|
| 139 |
+
Unknown = ModelDetails("?")
|
| 140 |
+
|
| 141 |
+
def from_str(version):
|
| 142 |
+
if version == "1.4.1":
|
| 143 |
+
return Version.v1_4_1
|
| 144 |
+
else:
|
| 145 |
+
return Version.Unknown
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
class Backend(Enum):
|
| 149 |
+
vllm = ModelDetails("vllm")
|
| 150 |
+
Unknown = ModelDetails("?")
|
| 151 |
+
|
| 152 |
+
def from_str(backend):
|
| 153 |
+
if backend == "vllm":
|
| 154 |
+
return Backend.vllm
|
| 155 |
+
else:
|
| 156 |
+
return Backend.Unknown
|
| 157 |
+
|
| 158 |
+
|
| 159 |
# Column selection
|
| 160 |
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
|
| 161 |
TYPES = [c.type for c in fields(AutoEvalColumn)]
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -7,7 +7,7 @@ from decimal import Decimal
|
|
| 7 |
import dateutil
|
| 8 |
|
| 9 |
from src.display.formatting import make_clickable_model
|
| 10 |
-
from src.display.utils import AutoEvalColumn, ModelType, Tasks, WeightType
|
| 11 |
from src.submission.check_validity import is_model_on_hub
|
| 12 |
|
| 13 |
|
|
@@ -34,6 +34,8 @@ class EvalResult:
|
|
| 34 |
still_on_hub: bool = False
|
| 35 |
num_few_shots: str = "0"
|
| 36 |
add_special_tokens: str = ""
|
|
|
|
|
|
|
| 37 |
|
| 38 |
@classmethod
|
| 39 |
def init_from_json_file(self, json_filepath):
|
|
@@ -62,6 +64,9 @@ class EvalResult:
|
|
| 62 |
config.get("pipeline_kwargs", {"add_special_tokens": "Unknown"}).get("add_special_tokens")
|
| 63 |
)
|
| 64 |
|
|
|
|
|
|
|
|
|
|
| 65 |
# Get model and org
|
| 66 |
# org_and_model = config.get("model_name", config.get("offline_inference").get("model_name", None))
|
| 67 |
org_and_model = config.get("model_name", config.get("offline_inference", {}).get("model_name", "Unknown"))
|
|
@@ -116,6 +121,8 @@ class EvalResult:
|
|
| 116 |
architecture=architecture,
|
| 117 |
num_few_shots=num_few_shots,
|
| 118 |
add_special_tokens=add_special_tokens,
|
|
|
|
|
|
|
| 119 |
)
|
| 120 |
|
| 121 |
def update_with_request_file(self, requests_path):
|
|
@@ -153,6 +160,8 @@ class EvalResult:
|
|
| 153 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
| 154 |
AutoEvalColumn.num_few_shots.name: self.num_few_shots,
|
| 155 |
AutoEvalColumn.add_special_tokens.name: self.add_special_tokens,
|
|
|
|
|
|
|
| 156 |
}
|
| 157 |
|
| 158 |
# for task in Tasks:
|
|
|
|
| 7 |
import dateutil
|
| 8 |
|
| 9 |
from src.display.formatting import make_clickable_model
|
| 10 |
+
from src.display.utils import AutoEvalColumn, Backend, ModelType, Tasks, Version, WeightType
|
| 11 |
from src.submission.check_validity import is_model_on_hub
|
| 12 |
|
| 13 |
|
|
|
|
| 34 |
still_on_hub: bool = False
|
| 35 |
num_few_shots: str = "0"
|
| 36 |
add_special_tokens: str = ""
|
| 37 |
+
llm_jp_eval_version: str = ""
|
| 38 |
+
backend: str = ""
|
| 39 |
|
| 40 |
@classmethod
|
| 41 |
def init_from_json_file(self, json_filepath):
|
|
|
|
| 64 |
config.get("pipeline_kwargs", {"add_special_tokens": "Unknown"}).get("add_special_tokens")
|
| 65 |
)
|
| 66 |
|
| 67 |
+
version = Version.from_str(metainfo.get("version", "?")).value.name
|
| 68 |
+
backend = Backend.from_str(model_config.get("_target_", "?").split(".")[0]).value.name
|
| 69 |
+
|
| 70 |
# Get model and org
|
| 71 |
# org_and_model = config.get("model_name", config.get("offline_inference").get("model_name", None))
|
| 72 |
org_and_model = config.get("model_name", config.get("offline_inference", {}).get("model_name", "Unknown"))
|
|
|
|
| 121 |
architecture=architecture,
|
| 122 |
num_few_shots=num_few_shots,
|
| 123 |
add_special_tokens=add_special_tokens,
|
| 124 |
+
llm_jp_eval_version=version,
|
| 125 |
+
backend=backend,
|
| 126 |
)
|
| 127 |
|
| 128 |
def update_with_request_file(self, requests_path):
|
|
|
|
| 160 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
| 161 |
AutoEvalColumn.num_few_shots.name: self.num_few_shots,
|
| 162 |
AutoEvalColumn.add_special_tokens.name: self.add_special_tokens,
|
| 163 |
+
AutoEvalColumn.llm_jp_eval_version.name: self.llm_jp_eval_version,
|
| 164 |
+
AutoEvalColumn.backend.name: self.backend,
|
| 165 |
}
|
| 166 |
|
| 167 |
# for task in Tasks:
|