Spaces:
Running
Running
<div> <div id=e996eac5-dceb-42af-9cc6-d21f247621f5 class=plotly-graph-div style="height:400px; width:1000px;"></div> <script>window.PLOTLYENV=window.PLOTLYENV||{},document.getElementById("e996eac5-dceb-42af-9cc6-d21f247621f5")&&Plotly.newPlot("e996eac5-dceb-42af-9cc6-d21f247621f5",[{marker:{color:"#4ea5b7"},name:"Tokens/sec/GPU",width:.7,x:["2","4","8","16","32"],y:[14167.25,13460.16,10888.53,6159.3,3609.73],type:"bar",xaxis:"x",yaxis:"y"},{base:[13460.16],marker:{color:"#e889ab"},name:"Performance Drop",showlegend:!0,width:.0875,x:["4"],y:[707.0900000000001],type:"bar",xaxis:"x",yaxis:"y"},{base:[10888.53],marker:{color:"#e889ab"},showlegend:!1,width:.0875,x:["8"],y:[2571.629999999999],type:"bar",xaxis:"x",yaxis:"y"},{base:[6159.3],marker:{color:"#e889ab"},showlegend:!1,width:.0875,x:["16"],y:[4729.2300000000005],type:"bar",xaxis:"x",yaxis:"y"},{base:[3609.73],marker:{color:"#e889ab"},showlegend:!1,width:.0875,x:["32"],y:[2549.57],type:"bar",xaxis:"x",yaxis:"y"},{marker:{color:"#cec0fa"},name:"Max Batch Size",text:["4","10","20","40","100"],textposition:"inside",width:.7,x:["2","4","8","16","32"],y:[4,10,20,40,100],type:"bar",xaxis:"x2",yaxis:"y2"}],{template:{data:{histogram2dcontour:[{type:"histogram2dcontour",colorbar:{outlinewidth:0,ticks:""},colorscale:[[0,"#0d0887"],[.1111111111111111,"#46039f"],[.2222222222222222,"#7201a8"],[.3333333333333333,"#9c179e"],[.4444444444444444,"#bd3786"],[.5555555555555556,"#d8576b"],[.6666666666666666,"#ed7953"],[.7777777777777778,"#fb9f3a"],[.8888888888888888,"#fdca26"],[1,"#f0f921"]]}],choropleth:[{type:"choropleth",colorbar:{outlinewidth:0,ticks:""}}],histogram2d:[{type:"histogram2d",colorbar:{outlinewidth:0,ticks:""},colorscale:[[0,"#0d0887"],[.1111111111111111,"#46039f"],[.2222222222222222,"#7201a8"],[.3333333333333333,"#9c179e"],[.4444444444444444,"#bd3786"],[.5555555555555556,"#d8576b"],[.6666666666666666,"#ed7953"],[.7777777777777778,"#fb9f3a"],[.8888888888888888,"#fdca26"],[1,"#f0f921"]]}],heatmap:[{type:"heatmap",colorbar:{outlinewidth:0,ticks:""},colorscale:[[0,"#0d0887"],[.1111111111111111,"#46039f"],[.2222222222222222,"#7201a8"],[.3333333333333333,"#9c179e"],[.4444444444444444,"#bd3786"],[.5555555555555556,"#d8576b"],[.6666666666666666,"#ed7953"],[.7777777777777778,"#fb9f3a"],[.8888888888888888,"#fdca26"],[1,"#f0f921"]]}],heatmapgl:[{type:"heatmapgl",colorbar:{outlinewidth:0,ticks:""},colorscale:[[0,"#0d0887"],[.1111111111111111,"#46039f"],[.2222222222222222,"#7201a8"],[.3333333333333333,"#9c179e"],[.4444444444444444,"#bd3786"],[.5555555555555556,"#d8576b"],[.6666666666666666,"#ed7953"],[.7777777777777778,"#fb9f3a"],[.8888888888888888,"#fdca26"],[1,"#f0f921"]]}],contourcarpet:[{type:"contourcarpet",colorbar:{outlinewidth:0,ticks:""}}],contour:[{type:"contour",colorbar:{outlinewidth:0,ticks:""},colorscale:[[0,"#0d0887"],[.1111111111111111,"#46039f"],[.2222222222222222,"#7201a8"],[.3333333333333333,"#9c179e"],[.4444444444444444,"#bd3786"],[.5555555555555556,"#d8576b"],[.6666666666666666,"#ed7953"],[.7777777777777778,"#fb9f3a"],[.8888888888888888,"#fdca26"],[1,"#f0f921"]]}],surface:[{type:"surface",colorbar:{outlinewidth:0,ticks:""},colorscale:[[0,"#0d0887"],[.1111111111111111,"#46039f"],[.2222222222222222,"#7201a8"],[.3333333333333333,"#9c179e"],[.4444444444444444,"#bd3786"],[.5555555555555556,"#d8576b"],[.6666666666666666,"#ed7953"],[.7777777777777778,"#fb9f3a"],[.8888888888888888,"#fdca26"],[1,"#f0f921"]]}],mesh3d:[{type:"mesh3d",colorbar:{outlinewidth:0,ticks:""}}],scatter:[{fillpattern:{fillmode:"overlay",size:10,solidity:.2},type:"scatter"}],parcoords:[{type:"parcoords",line:{colorbar:{outlinewidth:0,ticks:""}}}],scatterpolargl:[{type:"scatterpolargl",marker:{colorbar:{outlinewidth:0,ticks:""}}}],bar:[{error_x:{color:"#2a3f5f"},error_y:{color:"#2a3f5f"},marker:{line:{color:"#E5ECF6",width:.5},pattern:{fillmode:"overlay",size:10,solidity:.2}},type:"bar"}],scattergeo:[{type:"scattergeo",marker:{colorbar:{outlinewidth:0,ticks:""}}}],scatterpolar:[{type:"scatterpolar",marker:{colorbar:{outlinewidth:0,ticks:""}}}],histogram:[{marker:{pattern:{fillmode:"overlay",size:10,solidity:.2}},type:"histogram"}],scattergl:[{type:"scattergl",marker:{colorbar:{outlinewidth:0,ticks:""}}}],scatter3d:[{type:"scatter3d",line:{colorbar:{outlinewidth:0,ticks:""}},marker:{colorbar:{outlinewidth:0,ticks:""}}}],scattermapbox:[{type:"scattermapbox",marker:{colorbar:{outlinewidth:0,ticks:""}}}],scatterternary:[{type:"scatterternary",marker:{colorbar:{outlinewidth:0,ticks:""}}}],scattercarpet:[{type:"scattercarpet",marker:{colorbar:{outlinewidth:0,ticks:""}}}],carpet:[{aaxis:{endlinecolor:"#2a3f5f",gridcolor:"white",linecolor:"white",minorgridcolor:"white",startlinecolor:"#2a3f5f"},baxis:{endlinecolor:"#2a3f5f",gridcolor:"white",linecolor:"white",minorgridcolor:"white",startlinecolor:"#2a3f5f"},type:"carpet"}],table:[{cells:{fill:{color:"#EBF0F8"},line:{color:"white"}},header:{fill:{color:"#C8D4E3"},line:{color:"white"}},type:"table"}],barpolar:[{marker:{line:{color:"#E5ECF6",width:.5},pattern:{fillmode:"overlay",size:10,solidity:.2}},type:"barpolar"}],pie:[{automargin:!0,type:"pie"}]},layout:{autotypenumbers:"strict",colorway:["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],font:{color:"#2a3f5f"},hovermode:"closest",hoverlabel:{align:"left"},paper_bgcolor:"white",plot_bgcolor:"#E5ECF6",polar:{bgcolor:"#E5ECF6",angularaxis:{gridcolor:"white",linecolor:"white",ticks:""},radialaxis:{gridcolor:"white",linecolor:"white",ticks:""}},ternary:{bgcolor:"#E5ECF6",aaxis:{gridcolor:"white",linecolor:"white",ticks:""},baxis:{gridcolor:"white",linecolor:"white",ticks:""},caxis:{gridcolor:"white",linecolor:"white",ticks:""}},coloraxis:{colorbar:{outlinewidth:0,ticks:""}},colorscale:{sequential:[[0,"#0d0887"],[.1111111111111111,"#46039f"],[.2222222222222222,"#7201a8"],[.3333333333333333,"#9c179e"],[.4444444444444444,"#bd3786"],[.5555555555555556,"#d8576b"],[.6666666666666666,"#ed7953"],[.7777777777777778,"#fb9f3a"],[.8888888888888888,"#fdca26"],[1,"#f0f921"]],sequentialminus:[[0,"#0d0887"],[.1111111111111111,"#46039f"],[.2222222222222222,"#7201a8"],[.3333333333333333,"#9c179e"],[.4444444444444444,"#bd3786"],[.5555555555555556,"#d8576b"],[.6666666666666666,"#ed7953"],[.7777777777777778,"#fb9f3a"],[.8888888888888888,"#fdca26"],[1,"#f0f921"]],diverging:[[0,"#8e0152"],[.1,"#c51b7d"],[.2,"#de77ae"],[.3,"#f1b6da"],[.4,"#fde0ef"],[.5,"#f7f7f7"],[.6,"#e6f5d0"],[.7,"#b8e186"],[.8,"#7fbc41"],[.9,"#4d9221"],[1,"#276419"]]},xaxis:{gridcolor:"white",linecolor:"white",ticks:"",title:{standoff:15},zerolinecolor:"white",automargin:!0,zerolinewidth:2},yaxis:{gridcolor:"white",linecolor:"white",ticks:"",title:{standoff:15},zerolinecolor:"white",automargin:!0,zerolinewidth:2},scene:{xaxis:{backgroundcolor:"#E5ECF6",gridcolor:"white",linecolor:"white",showbackground:!0,ticks:"",zerolinecolor:"white",gridwidth:2},yaxis:{backgroundcolor:"#E5ECF6",gridcolor:"white",linecolor:"white",showbackground:!0,ticks:"",zerolinecolor:"white",gridwidth:2},zaxis:{backgroundcolor:"#E5ECF6",gridcolor:"white",linecolor:"white",showbackground:!0,ticks:"",zerolinecolor:"white",gridwidth:2}},shapedefaults:{line:{color:"#2a3f5f"}},annotationdefaults:{arrowcolor:"#2a3f5f",arrowhead:0,arrowwidth:1},geo:{bgcolor:"white",landcolor:"#E5ECF6",subunitcolor:"white",showland:!0,showlakes:!0,lakecolor:"white"},title:{x:.05},mapbox:{style:"light"}}},xaxis:{anchor:"y",domain:[0,.45],title:{text:"Tensor Parallelism (TP)"},showgrid:!0,gridcolor:"LightGray"},yaxis:{anchor:"x",domain:[0,1],title:{text:"Tokens/sec/GPU"},showgrid:!0,gridcolor:"LightGray"},xaxis2:{anchor:"y2",domain:[.55,1],title:{text:"Tensor Parallelism (TP)"},showgrid:!0,gridcolor:"LightGray"},yaxis2:{anchor:"x2",domain:[0,1],title:{text:"Maximum Batch Size"},showgrid:!0,gridcolor:"LightGray"},annotations:[{font:{size:16},showarrow:!1,text:"Throughput Scaling with TP/SP (3B Model)",x:.225,xanchor:"center",xref:"paper",y:1,yanchor:"bottom",yref:"paper"},{font:{size:16},showarrow:!1,text:"Maximum Batch Size per TP Value",x:.775,xanchor:"center",xref:"paper",y:1,yanchor:"bottom",yref:"paper"},{font:{color:"#e889ab"},showarrow:!1,text:"-5.0%",x:1,xanchor:"center",xref:"x",xshift:30,y:13813.705,yanchor:"middle",yref:"y"},{font:{color:"#e889ab"},showarrow:!1,text:"-19.1%",x:2,xanchor:"center",xref:"x",xshift:30,y:12174.345000000001,yanchor:"middle",yref:"y"},{font:{color:"#e889ab"},showarrow:!1,text:"-43.4%",x:3,xanchor:"center",xref:"x",xshift:30,y:8523.915,yanchor:"middle",yref:"y"},{font:{color:"#e889ab"},showarrow:!1,text:"-41.4%",x:4,xanchor:"center",xref:"x",xshift:30,y:4884.515,yanchor:"middle",yref:"y"}],legend:{x:.55,y:1},width:1e3,height:400,barmode:"stack"},{responsive:!0})</script> </div> |