Spaces:
Running
Running
Commit
Β·
531390e
1
Parent(s):
483e3a1
remove memory for now because there are errors
Browse files
app.py
CHANGED
|
@@ -40,7 +40,7 @@ ALL_COLUMNS_MAPPING = {
|
|
| 40 |
"optimizations": "Optimizations π οΈ",
|
| 41 |
#
|
| 42 |
"generate.throughput(tokens/s)": "Throughput (tokens/s) β¬οΈ",
|
| 43 |
-
"forward.peak_memory(MB)": "Peak Memory (MB) β¬οΈ",
|
| 44 |
#
|
| 45 |
"best_scored_model": "Best Scored Model π",
|
| 46 |
"best_score": "Best Score (%) β¬οΈ",
|
|
@@ -84,9 +84,9 @@ def get_benchmark_df(benchmark="1xA100-80GB"):
|
|
| 84 |
)
|
| 85 |
|
| 86 |
# convert peak memory to int
|
| 87 |
-
merged_df["forward.peak_memory(MB)"] = merged_df["forward.peak_memory(MB)"].apply(
|
| 88 |
-
|
| 89 |
-
)
|
| 90 |
|
| 91 |
# add optimizations
|
| 92 |
merged_df["optimizations"] = merged_df[
|
|
@@ -149,13 +149,13 @@ def get_benchmark_plot(bench_df):
|
|
| 149 |
x="generate.latency(s)",
|
| 150 |
y="best_score",
|
| 151 |
color="model_type",
|
| 152 |
-
size="forward.peak_memory(MB)",
|
| 153 |
custom_data=[
|
| 154 |
"best_scored_model",
|
| 155 |
"backend.name",
|
| 156 |
"backend.torch_dtype",
|
| 157 |
"optimizations",
|
| 158 |
-
"forward.peak_memory(MB)",
|
| 159 |
"generate.throughput(tokens/s)",
|
| 160 |
],
|
| 161 |
color_discrete_sequence=px.colors.qualitative.Light24,
|
|
@@ -163,7 +163,7 @@ def get_benchmark_plot(bench_df):
|
|
| 163 |
|
| 164 |
fig.update_layout(
|
| 165 |
title={
|
| 166 |
-
"text": "Model Score vs. Latency
|
| 167 |
"y": 0.95,
|
| 168 |
"x": 0.5,
|
| 169 |
"xanchor": "center",
|
|
@@ -183,8 +183,8 @@ def get_benchmark_plot(bench_df):
|
|
| 183 |
"Backend: %{customdata[1]}",
|
| 184 |
"Load Datatype: %{customdata[2]}",
|
| 185 |
"Optimizations: %{customdata[3]}",
|
| 186 |
-
"Peak Memory (MB): %{customdata[4]}",
|
| 187 |
-
"Throughput (tokens/s): %{customdata[
|
| 188 |
"Per 1000 Tokens Latency (s): %{x}",
|
| 189 |
"Open LLM Score (%): %{y}",
|
| 190 |
]
|
|
@@ -200,7 +200,7 @@ def filter_query(
|
|
| 200 |
datatypes,
|
| 201 |
optimizations,
|
| 202 |
score,
|
| 203 |
-
memory,
|
| 204 |
benchmark="1xA100-80GB",
|
| 205 |
):
|
| 206 |
raw_df = get_benchmark_df(benchmark=benchmark)
|
|
@@ -221,7 +221,7 @@ def filter_query(
|
|
| 221 |
else True
|
| 222 |
)
|
| 223 |
& (raw_df["best_score"] >= score)
|
| 224 |
-
& (raw_df["forward.peak_memory(MB)"] <= memory)
|
| 225 |
]
|
| 226 |
|
| 227 |
filtered_table = get_benchmark_table(filtered_df)
|
|
@@ -291,16 +291,16 @@ with demo:
|
|
| 291 |
value=0,
|
| 292 |
elem_id="threshold-slider",
|
| 293 |
)
|
| 294 |
-
with gr.Column(scale=1):
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
|
| 305 |
with gr.Row():
|
| 306 |
with gr.Column(scale=1):
|
|
@@ -352,7 +352,7 @@ with demo:
|
|
| 352 |
datatype_checkboxes,
|
| 353 |
optimizations_checkboxes,
|
| 354 |
score_slider,
|
| 355 |
-
memory_slider,
|
| 356 |
],
|
| 357 |
[A100_leaderboard, A100_plotly],
|
| 358 |
)
|
|
|
|
| 40 |
"optimizations": "Optimizations π οΈ",
|
| 41 |
#
|
| 42 |
"generate.throughput(tokens/s)": "Throughput (tokens/s) β¬οΈ",
|
| 43 |
+
# "forward.peak_memory(MB)": "Peak Memory (MB) β¬οΈ",
|
| 44 |
#
|
| 45 |
"best_scored_model": "Best Scored Model π",
|
| 46 |
"best_score": "Best Score (%) β¬οΈ",
|
|
|
|
| 84 |
)
|
| 85 |
|
| 86 |
# convert peak memory to int
|
| 87 |
+
# merged_df["forward.peak_memory(MB)"] = merged_df["forward.peak_memory(MB)"].apply(
|
| 88 |
+
# lambda x: int(x)
|
| 89 |
+
# )
|
| 90 |
|
| 91 |
# add optimizations
|
| 92 |
merged_df["optimizations"] = merged_df[
|
|
|
|
| 149 |
x="generate.latency(s)",
|
| 150 |
y="best_score",
|
| 151 |
color="model_type",
|
| 152 |
+
# size="forward.peak_memory(MB)",
|
| 153 |
custom_data=[
|
| 154 |
"best_scored_model",
|
| 155 |
"backend.name",
|
| 156 |
"backend.torch_dtype",
|
| 157 |
"optimizations",
|
| 158 |
+
# "forward.peak_memory(MB)",
|
| 159 |
"generate.throughput(tokens/s)",
|
| 160 |
],
|
| 161 |
color_discrete_sequence=px.colors.qualitative.Light24,
|
|
|
|
| 163 |
|
| 164 |
fig.update_layout(
|
| 165 |
title={
|
| 166 |
+
"text": "Model Score vs. Latency",
|
| 167 |
"y": 0.95,
|
| 168 |
"x": 0.5,
|
| 169 |
"xanchor": "center",
|
|
|
|
| 183 |
"Backend: %{customdata[1]}",
|
| 184 |
"Load Datatype: %{customdata[2]}",
|
| 185 |
"Optimizations: %{customdata[3]}",
|
| 186 |
+
# "Peak Memory (MB): %{customdata[4]}",
|
| 187 |
+
"Throughput (tokens/s): %{customdata[4]}",
|
| 188 |
"Per 1000 Tokens Latency (s): %{x}",
|
| 189 |
"Open LLM Score (%): %{y}",
|
| 190 |
]
|
|
|
|
| 200 |
datatypes,
|
| 201 |
optimizations,
|
| 202 |
score,
|
| 203 |
+
# memory,
|
| 204 |
benchmark="1xA100-80GB",
|
| 205 |
):
|
| 206 |
raw_df = get_benchmark_df(benchmark=benchmark)
|
|
|
|
| 221 |
else True
|
| 222 |
)
|
| 223 |
& (raw_df["best_score"] >= score)
|
| 224 |
+
# & (raw_df["forward.peak_memory(MB)"] <= memory)
|
| 225 |
]
|
| 226 |
|
| 227 |
filtered_table = get_benchmark_table(filtered_df)
|
|
|
|
| 291 |
value=0,
|
| 292 |
elem_id="threshold-slider",
|
| 293 |
)
|
| 294 |
+
# with gr.Column(scale=1):
|
| 295 |
+
# with gr.Box():
|
| 296 |
+
# memory_slider = gr.Slider(
|
| 297 |
+
# label="Peak Memory (MB) π",
|
| 298 |
+
# info="ποΈ Slide to maximum Peak Memory",
|
| 299 |
+
# minimum=0,
|
| 300 |
+
# maximum=80 * 1024,
|
| 301 |
+
# value=80 * 1024,
|
| 302 |
+
# elem_id="memory-slider",
|
| 303 |
+
# )
|
| 304 |
|
| 305 |
with gr.Row():
|
| 306 |
with gr.Column(scale=1):
|
|
|
|
| 352 |
datatype_checkboxes,
|
| 353 |
optimizations_checkboxes,
|
| 354 |
score_slider,
|
| 355 |
+
# memory_slider,
|
| 356 |
],
|
| 357 |
[A100_leaderboard, A100_plotly],
|
| 358 |
)
|