Spaces:
Running
Running
Commit
·
fb1f20c
1
Parent(s):
486ddb5
update UI
Browse files- app.py +262 -156
- data/2024-10/7b.xlsx +0 -0
- requirements.txt +1 -1
app.py
CHANGED
|
@@ -13,6 +13,7 @@ from huggingface_hub import HfApi
|
|
| 13 |
from huggingface_hub.hf_api import HTTPError
|
| 14 |
from huggingface_hub.utils._errors import GatedRepoError
|
| 15 |
from gradio_rangeslider import RangeSlider
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
load_dotenv()
|
|
@@ -139,7 +140,7 @@ def get_unique_column_names(all_data):
|
|
| 139 |
"arxiv_\u200bphysics",
|
| 140 |
"github_\u200bcpp",
|
| 141 |
"github_\u200bpython",
|
| 142 |
-
"ao3_\u200bchinese",
|
| 143 |
]
|
| 144 |
|
| 145 |
|
|
@@ -149,74 +150,98 @@ def color_cell(value):
|
|
| 149 |
|
| 150 |
def update_table(
|
| 151 |
period: str,
|
| 152 |
-
|
| 153 |
metric: str,
|
| 154 |
visible_columns: list,
|
| 155 |
color_columns: list,
|
| 156 |
size_range: list,
|
| 157 |
-
sort_by: str = "Average (
|
| 158 |
ascending: bool = True,
|
| 159 |
):
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
|
| 221 |
|
| 222 |
def create_world_languages_gdp_chart():
|
|
@@ -292,95 +317,172 @@ def submit_model(name):
|
|
| 292 |
return "ERROR: Unexpected error. Please try again later."
|
| 293 |
|
| 294 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 295 |
def create_scaling_plot(all_data, period):
|
| 296 |
selected_columns = ["Name", "Parameters Count (B)", "Average (The lower the better)"]
|
| 297 |
target_data = all_data[period]
|
| 298 |
new_df = pd.DataFrame()
|
| 299 |
|
| 300 |
for size in target_data.keys():
|
| 301 |
-
new_df = pd.concat([new_df, target_data[size]["cr"].loc[:, selected_columns]], axis=0)
|
| 302 |
-
|
| 303 |
-
new_df
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
)
|
| 316 |
-
|
| 317 |
-
fig.
|
| 318 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 319 |
)
|
| 320 |
|
| 321 |
-
|
| 322 |
-
"
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
"
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
"
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
y = connection_points["Log Compression Rate (%)"].values
|
| 346 |
-
model = LinearRegression().fit(X, y)
|
| 347 |
-
|
| 348 |
-
x_min = connection_points["Log Params(B)"].min()
|
| 349 |
-
x_max = connection_points["Log Params(B)"].max()
|
| 350 |
-
extended_x = np.linspace(x_min, x_max * 1.5, 100)
|
| 351 |
-
extended_x_original = np.exp(extended_x)
|
| 352 |
-
trend_line_y = model.predict(extended_x.reshape(-1, 1))
|
| 353 |
-
trend_line_y_original = np.exp(trend_line_y)
|
| 354 |
-
|
| 355 |
-
trend_line = go.Scatter(
|
| 356 |
-
x=extended_x,
|
| 357 |
-
y=trend_line_y,
|
| 358 |
-
mode="lines",
|
| 359 |
-
line=dict(color="skyblue", dash="dash"),
|
| 360 |
-
name="Trend Line",
|
| 361 |
-
hovertemplate="<b>Params(B):</b> %{customdata[0]:.2f}<br>" + "<b>Compression Rate (%):</b> %{customdata[1]:.2f}<extra></extra>",
|
| 362 |
-
customdata=np.stack((extended_x_original, trend_line_y_original), axis=-1),
|
| 363 |
)
|
| 364 |
|
| 365 |
-
fig.add_trace(trend_line)
|
| 366 |
-
|
| 367 |
-
x_min = new_df["Params(B)"].min()
|
| 368 |
-
x_max = new_df["Params(B)"].max()
|
| 369 |
-
x_tick_vals = np.geomspace(x_min, x_max, num=5)
|
| 370 |
-
x_tick_text = [f"{val:.1f}" for val in x_tick_vals]
|
| 371 |
-
|
| 372 |
-
y_min = new_df["Compression Rate (%)"].min()
|
| 373 |
-
y_max = new_df["Compression Rate (%)"].max()
|
| 374 |
-
y_tick_vals = np.geomspace(y_min, y_max, num=5)
|
| 375 |
-
y_tick_text = [f"{val:.1f}" for val in y_tick_vals]
|
| 376 |
-
|
| 377 |
-
fig.update_xaxes(tickvals=np.log(x_tick_vals), ticktext=x_tick_text, title="Params(B)")
|
| 378 |
-
fig.update_yaxes(tickvals=np.log(y_tick_vals), ticktext=y_tick_text, title="Compression Rate (%)", autorange="reversed")
|
| 379 |
-
|
| 380 |
-
fig.update_layout(xaxis=dict(showgrid=True, zeroline=False), yaxis=dict(showgrid=True, zeroline=False))
|
| 381 |
-
|
| 382 |
-
fig.update_traces(marker=dict(size=12))
|
| 383 |
-
|
| 384 |
return fig
|
| 385 |
|
| 386 |
|
|
@@ -423,19 +525,17 @@ time_list.sort()
|
|
| 423 |
last_period = time_list[-1]
|
| 424 |
|
| 425 |
initial_fig = create_scaling_plot(all_data, last_period)
|
| 426 |
-
initial_period = last_period
|
| 427 |
-
initial_models = model_size_list
|
| 428 |
initial_metric = metric_list[0]
|
| 429 |
initial_columns = get_unique_column_names(all_data)
|
| 430 |
-
initial_columns = initial_columns[:-1]
|
| 431 |
-
# initial_colors = ["Average"]
|
| 432 |
initial_colors = ["Average", "Individual Tests"]
|
| 433 |
initial_size_range = [0, 15]
|
| 434 |
-
initial_data = update_table(
|
| 435 |
|
| 436 |
css = """
|
| 437 |
.gradio-container {
|
| 438 |
max-width: 95% !important;
|
|
|
|
| 439 |
}
|
| 440 |
.tab-buttons button {
|
| 441 |
font-size: 1.3em;
|
|
@@ -444,7 +544,11 @@ css = """
|
|
| 444 |
white-space: normal;
|
| 445 |
word-break: break-word;
|
| 446 |
}
|
| 447 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 448 |
"""
|
| 449 |
|
| 450 |
TITLE_HTML = '<h1 style="text-align:center"><span style="font-size:1.3em">🏆 LLM Compression Leaderboard</span></h1>'
|
|
@@ -460,17 +564,18 @@ with gr.Blocks(css=css) as demo:
|
|
| 460 |
period_selector = gr.Dropdown(label="Period", choices=time_list, value=last_period)
|
| 461 |
model_selector = gr.CheckboxGroup(label="Model Size", choices=model_size_list, value=model_size_list)
|
| 462 |
size_range_slider = RangeSlider(minimum=0, maximum=15, value=[0, 15], step=0.1, label="Model Size Range")
|
| 463 |
-
metric_selector = gr.Dropdown(label="Metric", choices=metric_list, value=
|
| 464 |
with gr.Column():
|
| 465 |
color_selector = gr.CheckboxGroup(label="Colored Columns", choices=["Average", "Individual Tests"], value=initial_colors)
|
| 466 |
colfilter = gr.CheckboxGroup(label="Data Source", choices=get_unique_column_names(all_data), value=initial_columns)
|
| 467 |
|
| 468 |
-
table = gr.Dataframe(
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
)
|
|
|
|
| 474 |
|
| 475 |
period_selector.change(
|
| 476 |
update_table, inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider], outputs=table
|
|
@@ -494,6 +599,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 494 |
with gr.Tab("🌍 MultiLang"):
|
| 495 |
gr.Markdown("## Coming soon...")
|
| 496 |
world_languages_plot = gr.Plot(create_world_languages_gdp_chart())
|
|
|
|
| 497 |
with gr.Tab("📈 Scaling Law"):
|
| 498 |
print(time_list)
|
| 499 |
period_selector_2 = gr.Dropdown(label="Period", choices=time_list, value=last_period)
|
|
@@ -507,13 +613,13 @@ with gr.Blocks(css=css) as demo:
|
|
| 507 |
|
| 508 |
with gr.Tab("ℹ️ About"):
|
| 509 |
gr.Markdown(about_md)
|
|
|
|
| 510 |
with gr.Tab("🚀 Submit"):
|
| 511 |
with gr.Group():
|
| 512 |
with gr.Row():
|
| 513 |
model_name = gr.Textbox(max_lines=1, placeholder="Enter model name...", show_label=False, scale=4)
|
| 514 |
submit = gr.Button("Submit", variant="primary", scale=0)
|
| 515 |
output = gr.Markdown("# Enter a public HF repo id, then hit Submit to add it to the evaluation queue.")
|
| 516 |
-
|
| 517 |
submit.click(fn=submit_model, inputs=model_name, outputs=output)
|
| 518 |
|
| 519 |
demo.launch(share=False)
|
|
|
|
| 13 |
from huggingface_hub.hf_api import HTTPError
|
| 14 |
from huggingface_hub.utils._errors import GatedRepoError
|
| 15 |
from gradio_rangeslider import RangeSlider
|
| 16 |
+
import datetime
|
| 17 |
|
| 18 |
|
| 19 |
load_dotenv()
|
|
|
|
| 140 |
"arxiv_\u200bphysics",
|
| 141 |
"github_\u200bcpp",
|
| 142 |
"github_\u200bpython",
|
| 143 |
+
# "ao3_\u200bchinese",
|
| 144 |
]
|
| 145 |
|
| 146 |
|
|
|
|
| 150 |
|
| 151 |
def update_table(
|
| 152 |
period: str,
|
| 153 |
+
models_size: list,
|
| 154 |
metric: str,
|
| 155 |
visible_columns: list,
|
| 156 |
color_columns: list,
|
| 157 |
size_range: list,
|
| 158 |
+
sort_by: str = "Average (lower=better)",
|
| 159 |
ascending: bool = True,
|
| 160 |
):
|
| 161 |
+
print(
|
| 162 |
+
f"Updating - time: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}, period: {period}, models: {models_size}, metric: {metric}, visible_columns: {visible_columns}, color_columns: {color_columns}, size_range: {size_range}, sort_by: {sort_by}, ascending: {ascending}\n"
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
if not models_size:
|
| 166 |
+
return "No data available for the selected models and period."
|
| 167 |
+
# return pd.DataFrame()
|
| 168 |
+
|
| 169 |
+
target_period_data = all_data[period]
|
| 170 |
+
target_file_name = [model_size_to_file_name[model] for model in models_size]
|
| 171 |
+
sheet_name = metric_to_sheet[metric]
|
| 172 |
+
|
| 173 |
+
# combined_data = pd.concat([target_period_data[file_name][sheet_name] for file_name in target_file_name], axis=0)
|
| 174 |
+
combined_data = pd.concat(
|
| 175 |
+
[df.dropna(axis=1, how="all") for df in [target_period_data[file_name][sheet_name] for file_name in target_file_name]], axis=0
|
| 176 |
+
)
|
| 177 |
+
if len(combined_data) == 0:
|
| 178 |
+
return "No data available for the selected models and period."
|
| 179 |
+
# return pd.DataFrame()
|
| 180 |
+
|
| 181 |
+
# Filter models based on the size range
|
| 182 |
+
combined_data = combined_data[combined_data["Parameters Count (B)"].between(size_range[0], size_range[1])]
|
| 183 |
+
combined_data.reset_index(drop=True, inplace=True)
|
| 184 |
+
if len(combined_data) == 0:
|
| 185 |
+
return "No data available for the selected models and period."
|
| 186 |
+
# return pd.DataFrame()
|
| 187 |
+
|
| 188 |
+
combined_data["Name"] = combined_data["Name"].apply(lambda x: x.replace(".pth", ""))
|
| 189 |
+
|
| 190 |
+
relevant_columns = [col for col in visible_columns if col not in ["Name", "Parameters Count (B)", "Average (The lower the better)"]]
|
| 191 |
+
if len(combined_data) > 0:
|
| 192 |
+
combined_data["Average (The lower the better)"] = round(combined_data[relevant_columns].mean(axis=1), 3)
|
| 193 |
+
combined_data = combined_data.rename(columns={"Parameters Count (B)": "Params (B)"})
|
| 194 |
+
combined_data = combined_data.rename(columns={"Average (The lower the better)": "Average (lower=better)"})
|
| 195 |
+
sorted_data = combined_data.sort_values(by=sort_by, ascending=ascending)
|
| 196 |
+
visible_columns = ["Name", "Params (B)", "Average (lower=better)"] + visible_columns
|
| 197 |
+
filtered_data = sorted_data[visible_columns]
|
| 198 |
+
filtered_data.columns = [col.replace("_", " ") for col in filtered_data.columns]
|
| 199 |
+
|
| 200 |
+
formatter = {col: "{:.3f}" for col in filtered_data.columns if filtered_data[col].dtype in ["float64", "float32"]}
|
| 201 |
+
|
| 202 |
+
# color gradient
|
| 203 |
+
colors = ["#63be7b", "#ffffff", "#f8696b"]
|
| 204 |
+
cmap = LinearSegmentedColormap.from_list("custom_cmap", colors)
|
| 205 |
+
vmin = {}
|
| 206 |
+
vmax = {}
|
| 207 |
+
for column in filtered_data.columns:
|
| 208 |
+
if column in ["Name", "Params (B)"]:
|
| 209 |
+
continue
|
| 210 |
+
col_values = filtered_data[column]
|
| 211 |
+
if len(col_values) > 1:
|
| 212 |
+
vmin[column] = col_values.min()
|
| 213 |
+
vmax[column] = col_values.max()
|
| 214 |
+
|
| 215 |
+
target_color_columns = []
|
| 216 |
+
if "Average" in color_columns:
|
| 217 |
+
target_color_columns.append("Average (lower=better)")
|
| 218 |
+
if "Individual Tests" in color_columns:
|
| 219 |
+
target_color_columns.extend([col for col in filtered_data.columns if col not in ["Name", "Params (B)", "Average (lower=better)"]])
|
| 220 |
+
|
| 221 |
+
styler = filtered_data.style.format(formatter).map(color_cell, subset=["Params (B)"])
|
| 222 |
+
|
| 223 |
+
for column in target_color_columns:
|
| 224 |
+
styler = styler.background_gradient(cmap=cmap, subset=[column], vmin=vmin[column], vmax=vmax[column])
|
| 225 |
+
|
| 226 |
+
# return styler
|
| 227 |
+
|
| 228 |
+
styler = styler.hide(axis="index")
|
| 229 |
+
|
| 230 |
+
widths = [300, 150, 150, 100, 100, 100, 100, 100, 100, 100, 100]
|
| 231 |
+
table_styles = []
|
| 232 |
+
|
| 233 |
+
for i, w in enumerate(widths):
|
| 234 |
+
table_styles.append(
|
| 235 |
+
{"selector": f"th.col{i}", "props": [("min-width", f"{w}px"), ("max-width", f"{w}px"), ("text-align", "center")]} # 添加表头文字居中
|
| 236 |
+
)
|
| 237 |
+
table_styles.append(
|
| 238 |
+
{"selector": f"td.col{i}", "props": [("min-width", f"{w}px"), ("max-width", f"{w}px"), ("text-align", "center")]} # 添加单元格文字居中
|
| 239 |
+
)
|
| 240 |
+
|
| 241 |
+
styler = styler.set_table_styles(table_styles)
|
| 242 |
+
|
| 243 |
+
html_output = styler.to_html()
|
| 244 |
+
return html_output
|
| 245 |
|
| 246 |
|
| 247 |
def create_world_languages_gdp_chart():
|
|
|
|
| 317 |
return "ERROR: Unexpected error. Please try again later."
|
| 318 |
|
| 319 |
|
| 320 |
+
# def create_scaling_plot(all_data, period):
|
| 321 |
+
# selected_columns = ["Name", "Parameters Count (B)", "Average (The lower the better)"]
|
| 322 |
+
# target_data = all_data[period]
|
| 323 |
+
# new_df = pd.DataFrame()
|
| 324 |
+
|
| 325 |
+
# for size in target_data.keys():
|
| 326 |
+
# new_df = pd.concat([new_df, target_data[size]["cr"].loc[:, selected_columns].dropna(axis=1, how="all")], axis=0)
|
| 327 |
+
|
| 328 |
+
# new_df.rename(columns={"Parameters Count (B)": "Params(B)", "Average (The lower the better)": "Compression Rate (%)"}, inplace=True)
|
| 329 |
+
|
| 330 |
+
# new_df["Log Params(B)"] = np.log(new_df["Params(B)"])
|
| 331 |
+
# new_df["Log Compression Rate (%)"] = np.log(new_df["Compression Rate (%)"])
|
| 332 |
+
|
| 333 |
+
# fig = px.scatter(
|
| 334 |
+
# new_df,
|
| 335 |
+
# x="Log Params(B)",
|
| 336 |
+
# y="Log Compression Rate (%)",
|
| 337 |
+
# title="Compression Rate Scaling Law",
|
| 338 |
+
# hover_name="Name",
|
| 339 |
+
# custom_data=["Params(B)", "Compression Rate (%)"],
|
| 340 |
+
# )
|
| 341 |
+
|
| 342 |
+
# fig.update_traces(
|
| 343 |
+
# hovertemplate="<b>%{hovertext}</b><br>Params(B): %{customdata[0]:.2f} B<br>Compression Rate (%): %{customdata[1]:.2f}<extra></extra>"
|
| 344 |
+
# )
|
| 345 |
+
# fig.update_layout(
|
| 346 |
+
# width=800, # 设置图像宽度
|
| 347 |
+
# height=600, # 设置图像高度
|
| 348 |
+
# title={"text": "Compression Rate Scaling Law", "x": 0.5, "xanchor": "center", "yanchor": "top"},
|
| 349 |
+
# showlegend=True,
|
| 350 |
+
# xaxis={"showgrid": True, "zeroline": False, "type": "linear", "title": "Params(B)"}, # 确保坐标轴类型正确
|
| 351 |
+
# yaxis={"showgrid": True, "zeroline": False, "type": "linear", "title": "Compression Rate (%)", "autorange": "reversed"},
|
| 352 |
+
# )
|
| 353 |
+
|
| 354 |
+
# names_to_connect_dict = {
|
| 355 |
+
# "2024-05": ["Meta-Llama-3-8B", "stablelm-3b-4e1t", "Qwen2-1.5B", "TinyLlama-1.1B-intermediate-step-1431k-3T", "Mistral-Nemo-Base-2407"],
|
| 356 |
+
# "2024-06": ["Meta-Llama-3-8B", "stablelm-3b-4e1t", "Qwen2-1.5B", "TinyLlama-1.1B-intermediate-step-1431k-3T", "Mistral-Nemo-Base-2407"],
|
| 357 |
+
# "2024-07": ["Meta-Llama-3.1-8B", "stablelm-3b-4e1t", "Qwen2-1.5B", "TinyLlama-1.1B-intermediate-step-1431k-3T", "Mistral-Nemo-Base-2407"],
|
| 358 |
+
# "2024-08": [
|
| 359 |
+
# "Meta-Llama-3.1-8B",
|
| 360 |
+
# "Rene-v0.1-1.3b-pytorch",
|
| 361 |
+
# "stablelm-3b-4e1t",
|
| 362 |
+
# "Qwen2-1.5B",
|
| 363 |
+
# "TinyLlama-1.1B-intermediate-step-1431k-3T",
|
| 364 |
+
# "Mistral-Nemo-Base-2407",
|
| 365 |
+
# ],
|
| 366 |
+
# "2025-01": ["Qwen2.5-1.5B"],
|
| 367 |
+
# }
|
| 368 |
+
|
| 369 |
+
# names_to_connect = names_to_connect_dict.get(period, names_to_connect_dict["2024-08"])
|
| 370 |
+
|
| 371 |
+
# connection_points = new_df[new_df["Name"].isin(names_to_connect)]
|
| 372 |
+
# print(connection_points)
|
| 373 |
+
|
| 374 |
+
# new_df["Color"] = new_df["Name"].apply(lambda name: "#39C5BB" if name in names_to_connect else "#636efa")
|
| 375 |
+
|
| 376 |
+
# fig.update_traces(marker=dict(color=new_df["Color"]))
|
| 377 |
+
|
| 378 |
+
# X = connection_points["Log Params(B)"].values.reshape(-1, 1)
|
| 379 |
+
# y = connection_points["Log Compression Rate (%)"].values
|
| 380 |
+
# model = LinearRegression().fit(X, y)
|
| 381 |
+
|
| 382 |
+
# x_min = connection_points["Log Params(B)"].min()
|
| 383 |
+
# x_max = connection_points["Log Params(B)"].max()
|
| 384 |
+
# extended_x = np.linspace(x_min, x_max * 1.5, 100)
|
| 385 |
+
# extended_x_original = np.exp(extended_x)
|
| 386 |
+
# trend_line_y = model.predict(extended_x.reshape(-1, 1))
|
| 387 |
+
# trend_line_y_original = np.exp(trend_line_y)
|
| 388 |
+
|
| 389 |
+
# trend_line = go.Scatter(
|
| 390 |
+
# x=extended_x,
|
| 391 |
+
# y=trend_line_y,
|
| 392 |
+
# mode="lines",
|
| 393 |
+
# line=dict(color="skyblue", dash="dash"),
|
| 394 |
+
# name="Trend Line",
|
| 395 |
+
# hovertemplate="<b>Params(B):</b> %{customdata[0]:.2f}<br>" + "<b>Compression Rate (%):</b> %{customdata[1]:.2f}<extra></extra>",
|
| 396 |
+
# customdata=np.stack((extended_x_original, trend_line_y_original), axis=-1),
|
| 397 |
+
# )
|
| 398 |
+
|
| 399 |
+
# fig.add_trace(trend_line)
|
| 400 |
+
|
| 401 |
+
# x_min = new_df["Params(B)"].min()
|
| 402 |
+
# x_max = new_df["Params(B)"].max()
|
| 403 |
+
# x_tick_vals = np.geomspace(x_min, x_max, num=5)
|
| 404 |
+
# x_tick_text = [f"{val:.1f}" for val in x_tick_vals]
|
| 405 |
+
|
| 406 |
+
# y_min = new_df["Compression Rate (%)"].min()
|
| 407 |
+
# y_max = new_df["Compression Rate (%)"].max()
|
| 408 |
+
# y_tick_vals = np.geomspace(y_min, y_max, num=5)
|
| 409 |
+
# y_tick_text = [f"{val:.1f}" for val in y_tick_vals]
|
| 410 |
+
|
| 411 |
+
# fig.update_xaxes(tickvals=np.log(x_tick_vals), ticktext=x_tick_text, title="Params(B)")
|
| 412 |
+
# fig.update_yaxes(tickvals=np.log(y_tick_vals), ticktext=y_tick_text, title="Compression Rate (%)", autorange="reversed")
|
| 413 |
+
|
| 414 |
+
# fig.update_layout(xaxis=dict(showgrid=True, zeroline=False), yaxis=dict(showgrid=True, zeroline=False))
|
| 415 |
+
|
| 416 |
+
# fig.update_traces(marker=dict(size=12))
|
| 417 |
+
|
| 418 |
+
# print(fig.layout)
|
| 419 |
+
|
| 420 |
+
# return fig
|
| 421 |
+
|
| 422 |
+
|
| 423 |
def create_scaling_plot(all_data, period):
|
| 424 |
selected_columns = ["Name", "Parameters Count (B)", "Average (The lower the better)"]
|
| 425 |
target_data = all_data[period]
|
| 426 |
new_df = pd.DataFrame()
|
| 427 |
|
| 428 |
for size in target_data.keys():
|
| 429 |
+
new_df = pd.concat([new_df, target_data[size]["cr"].loc[:, selected_columns].dropna(axis=1, how="all")], axis=0)
|
| 430 |
+
|
| 431 |
+
x_values = new_df["Parameters Count (B)"].astype(float).tolist()
|
| 432 |
+
y_values = new_df["Average (The lower the better)"].astype(float).tolist()
|
| 433 |
+
names = new_df["Name"].tolist()
|
| 434 |
+
|
| 435 |
+
# 计算对数空间的范围
|
| 436 |
+
x_min, x_max = np.log10(min(x_values)), np.log10(max(x_values))
|
| 437 |
+
y_min, y_max = np.log10(min(y_values)), np.log10(max(y_values))
|
| 438 |
+
|
| 439 |
+
# 计算合适的刻度间隔
|
| 440 |
+
x_dtick = (x_max - x_min) / 4 # 分成5个刻度
|
| 441 |
+
y_dtick = (y_max - y_min) / 4
|
| 442 |
+
|
| 443 |
+
fig = go.Figure()
|
| 444 |
+
|
| 445 |
+
fig.add_trace(
|
| 446 |
+
go.Scatter(
|
| 447 |
+
x=x_values,
|
| 448 |
+
y=y_values,
|
| 449 |
+
mode="markers",
|
| 450 |
+
name="Models",
|
| 451 |
+
marker=dict(size=12, color="#39C5BB", opacity=0.8),
|
| 452 |
+
text=names,
|
| 453 |
+
customdata=list(zip(x_values, y_values)),
|
| 454 |
+
hovertemplate=(
|
| 455 |
+
"<b>%{text}</b><br>" + "Params: %{customdata[0]:.2f}B<br>" + "Compression Rate: %{customdata[1]:.2f}%<br>" + "<extra></extra>"
|
| 456 |
+
),
|
| 457 |
+
)
|
| 458 |
)
|
| 459 |
|
| 460 |
+
fig.update_layout(
|
| 461 |
+
title={"text": "Compression Rate Scaling Law", "x": 0.5, "xanchor": "center", "yanchor": "top"},
|
| 462 |
+
width=800,
|
| 463 |
+
height=600,
|
| 464 |
+
showlegend=True,
|
| 465 |
+
xaxis=dict(
|
| 466 |
+
title="Parameters (B)",
|
| 467 |
+
showgrid=True,
|
| 468 |
+
zeroline=False,
|
| 469 |
+
type="log",
|
| 470 |
+
dtick=x_dtick,
|
| 471 |
+
tickformat=".2f", # 保留两位小数
|
| 472 |
+
range=[x_min - 0.1, x_max + 0.1],
|
| 473 |
+
),
|
| 474 |
+
yaxis=dict(
|
| 475 |
+
title="Compression Rate (%)",
|
| 476 |
+
showgrid=True,
|
| 477 |
+
zeroline=False,
|
| 478 |
+
type="log",
|
| 479 |
+
dtick=y_dtick,
|
| 480 |
+
tickformat=".2f", # 保留两位小数
|
| 481 |
+
range=[y_min - 0.1, y_max + 0.1],
|
| 482 |
+
autorange="reversed",
|
| 483 |
+
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 484 |
)
|
| 485 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 486 |
return fig
|
| 487 |
|
| 488 |
|
|
|
|
| 525 |
last_period = time_list[-1]
|
| 526 |
|
| 527 |
initial_fig = create_scaling_plot(all_data, last_period)
|
|
|
|
|
|
|
| 528 |
initial_metric = metric_list[0]
|
| 529 |
initial_columns = get_unique_column_names(all_data)
|
| 530 |
+
# initial_columns = initial_columns[:-1]
|
|
|
|
| 531 |
initial_colors = ["Average", "Individual Tests"]
|
| 532 |
initial_size_range = [0, 15]
|
| 533 |
+
initial_data = update_table(last_period, model_size_list, initial_metric, initial_columns, initial_colors, initial_size_range)
|
| 534 |
|
| 535 |
css = """
|
| 536 |
.gradio-container {
|
| 537 |
max-width: 95% !important;
|
| 538 |
+
margin: 0 auto;
|
| 539 |
}
|
| 540 |
.tab-buttons button {
|
| 541 |
font-size: 1.3em;
|
|
|
|
| 544 |
white-space: normal;
|
| 545 |
word-break: break-word;
|
| 546 |
}
|
| 547 |
+
table {
|
| 548 |
+
margin-left: auto !important;
|
| 549 |
+
margin-right: auto !important;
|
| 550 |
+
width: 100% !important;
|
| 551 |
+
}
|
| 552 |
"""
|
| 553 |
|
| 554 |
TITLE_HTML = '<h1 style="text-align:center"><span style="font-size:1.3em">🏆 LLM Compression Leaderboard</span></h1>'
|
|
|
|
| 564 |
period_selector = gr.Dropdown(label="Period", choices=time_list, value=last_period)
|
| 565 |
model_selector = gr.CheckboxGroup(label="Model Size", choices=model_size_list, value=model_size_list)
|
| 566 |
size_range_slider = RangeSlider(minimum=0, maximum=15, value=[0, 15], step=0.1, label="Model Size Range")
|
| 567 |
+
metric_selector = gr.Dropdown(label="Metric", choices=metric_list, value=initial_metric)
|
| 568 |
with gr.Column():
|
| 569 |
color_selector = gr.CheckboxGroup(label="Colored Columns", choices=["Average", "Individual Tests"], value=initial_colors)
|
| 570 |
colfilter = gr.CheckboxGroup(label="Data Source", choices=get_unique_column_names(all_data), value=initial_columns)
|
| 571 |
|
| 572 |
+
# table = gr.Dataframe(
|
| 573 |
+
# initial_data,
|
| 574 |
+
# column_widths=[130, 50, 50, 35, 35, 35, 35, 35, 35, 35, 35],
|
| 575 |
+
# wrap=True,
|
| 576 |
+
# max_height=800,
|
| 577 |
+
# )
|
| 578 |
+
table = gr.HTML(initial_data)
|
| 579 |
|
| 580 |
period_selector.change(
|
| 581 |
update_table, inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider], outputs=table
|
|
|
|
| 599 |
with gr.Tab("🌍 MultiLang"):
|
| 600 |
gr.Markdown("## Coming soon...")
|
| 601 |
world_languages_plot = gr.Plot(create_world_languages_gdp_chart())
|
| 602 |
+
|
| 603 |
with gr.Tab("📈 Scaling Law"):
|
| 604 |
print(time_list)
|
| 605 |
period_selector_2 = gr.Dropdown(label="Period", choices=time_list, value=last_period)
|
|
|
|
| 613 |
|
| 614 |
with gr.Tab("ℹ️ About"):
|
| 615 |
gr.Markdown(about_md)
|
| 616 |
+
|
| 617 |
with gr.Tab("🚀 Submit"):
|
| 618 |
with gr.Group():
|
| 619 |
with gr.Row():
|
| 620 |
model_name = gr.Textbox(max_lines=1, placeholder="Enter model name...", show_label=False, scale=4)
|
| 621 |
submit = gr.Button("Submit", variant="primary", scale=0)
|
| 622 |
output = gr.Markdown("# Enter a public HF repo id, then hit Submit to add it to the evaluation queue.")
|
|
|
|
| 623 |
submit.click(fn=submit_model, inputs=model_name, outputs=output)
|
| 624 |
|
| 625 |
demo.launch(share=False)
|
data/2024-10/7b.xlsx
CHANGED
|
Binary files a/data/2024-10/7b.xlsx and b/data/2024-10/7b.xlsx differ
|
|
|
requirements.txt
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
plotly==5.22.0
|
| 2 |
scikit-learn==1.5.0
|
| 3 |
-
gradio==
|
| 4 |
huggingface_hub==0.23.1
|
| 5 |
pandas==2.2.2
|
| 6 |
openpyxl==3.1.2
|
|
|
|
| 1 |
plotly==5.22.0
|
| 2 |
scikit-learn==1.5.0
|
| 3 |
+
gradio==5.15.0
|
| 4 |
huggingface_hub==0.23.1
|
| 5 |
pandas==2.2.2
|
| 6 |
openpyxl==3.1.2
|