Spaces:
Running
Running
| import gradio as gr | |
| import pandas as pd | |
| import plotly.express as px | |
| BETTERTRANSFORMER_DATA = [ | |
| # open llm | |
| "Model π€", | |
| "DType π₯", | |
| "Backend π", | |
| "Params (B)", | |
| "Architecture ποΈ", | |
| "Open LLM Score (%)", | |
| # deployment settings | |
| "DType π₯", | |
| "Backend π", | |
| "Optimization π οΈ", | |
| "Quantization ποΈ", | |
| "Optimization π οΈ BetterTransformer", | |
| # primary measurements | |
| "Prefill (s)", | |
| "Prefill (s) BetterTransformer", | |
| "Decode (tokens/s)", | |
| "Decode (tokens/s) BetterTransformer", | |
| "End-to-End (tokens/s)", | |
| "End-to-End (tokens/s) BetterTransformer", | |
| # speedups | |
| "Prefill Speedup (%)", | |
| "Decode Speedup (%)", | |
| ] | |
| def get_bt_df(llm_perf_df): | |
| copy_df = llm_perf_df.copy() | |
| # seperate original model experiments from BetterTransformer experiments | |
| original_df = copy_df[(copy_df["Optimization π οΈ"] == "None") & (copy_df["DType π₯"] == "float16")] | |
| bt_df = copy_df[(copy_df["Optimization π οΈ"] == "BetterTransformer") & (copy_df["DType π₯"] == "float16")] | |
| # merge the two dataframes | |
| bt_df = pd.merge( | |
| original_df, | |
| bt_df, | |
| on=["Model π€", "Quantization ποΈ"], | |
| suffixes=["", " BetterTransformer"], | |
| ) | |
| # compute speedups | |
| bt_df["Prefill Speedup (%)"] = ( | |
| (bt_df["Prefill (s)"] / bt_df["Prefill (s) BetterTransformer"]) * 100 | |
| ).round(2) - 100 | |
| bt_df["Decode Speedup (%)"] = ( | |
| (bt_df["Decode (tokens/s) BetterTransformer"] / bt_df["Decode (tokens/s)"]) * 100 | |
| ).round(2) - 100 | |
| # filter speedups > 1000% | |
| bt_df = bt_df[bt_df["Prefill Speedup (%)"] < 1000] | |
| bt_df = bt_df[bt_df["Decode Speedup (%)"] < 1000] | |
| return bt_df | |
| def get_bt_prefill_fig(llm_perf_df): | |
| bt_df = get_bt_df(llm_perf_df) | |
| # plot | |
| prefill_fig = px.box( | |
| bt_df, | |
| x="Architecture ποΈ", | |
| y="Prefill Speedup (%)", | |
| color_discrete_sequence=px.colors.qualitative.Light24, | |
| custom_data=BETTERTRANSFORMER_DATA, | |
| color="Quantization ποΈ", | |
| points="all", | |
| ) | |
| # add hover data | |
| prefill_fig.update_traces( | |
| hovertemplate="<br>".join( | |
| [f"<b>{column}:</b> %{{customdata[{i}]}}" for i, column in enumerate(BETTERTRANSFORMER_DATA)] | |
| ) | |
| ) | |
| # add layout | |
| prefill_fig.update_layout( | |
| title={ | |
| "text": "Prefill Speedup per Architecture, Compared To Non-Optimized Model", | |
| "y": 0.95, | |
| "x": 0.5, | |
| "xanchor": "center", | |
| "yanchor": "top", | |
| }, | |
| xaxis_title="LLM Architecture", | |
| yaxis_title="Prefill Speedup (%)", | |
| legend_title="Quantization Scheme", | |
| width=1200, | |
| height=600, | |
| ) | |
| return prefill_fig | |
| def get_bt_decode_fig(llm_perf_df): | |
| bt_df = get_bt_df(llm_perf_df) | |
| # plot | |
| decode_fig = px.box( | |
| bt_df, | |
| x="Architecture ποΈ", | |
| y="Decode Speedup (%)", | |
| color_discrete_sequence=px.colors.qualitative.Light24, | |
| custom_data=BETTERTRANSFORMER_DATA, | |
| color="Quantization ποΈ", | |
| points="all", | |
| ) | |
| # add hover data | |
| decode_fig.update_traces( | |
| hovertemplate="<br>".join( | |
| [f"<b>{column}:</b> %{{customdata[{i}]}}" for i, column in enumerate(BETTERTRANSFORMER_DATA)] | |
| ) | |
| ) | |
| # add layout | |
| decode_fig.update_layout( | |
| title={ | |
| "text": "Decode Speedup per Architecture, Compared To Non-Optimized Model", | |
| "y": 0.95, | |
| "x": 0.5, | |
| "xanchor": "center", | |
| "yanchor": "top", | |
| }, | |
| xaxis_title="LLM Architecture", | |
| yaxis_title="Decode Speedup (%)", | |
| legend_title="Quantization Scheme", | |
| width=1200, | |
| height=600, | |
| ) | |
| return decode_fig | |
| def create_bt_plots(llm_perf_df): | |
| # descriptive text | |
| gr.HTML("π Hover over the points π for additional information.", elem_id="text") | |
| # get figures | |
| prefill_fig = get_bt_prefill_fig(llm_perf_df) | |
| decode_fig = get_bt_decode_fig(llm_perf_df) | |
| # create plots | |
| prefill_plot = gr.components.Plot(value=prefill_fig, elem_id="plot", show_label=False) | |
| decode_plot = gr.components.Plot(value=decode_fig, elem_id="plot", show_label=False) | |
| return prefill_plot, decode_plot | |