Spaces:
Running
Running
svg work (#17)
Browse files- updating graphs (f38b87593f094b90e11f0a668d7fc08425248d33)
- assets/data/benchmarks/dp_ourjourney_memoryusage.html +2 -0
- assets/data/benchmarks/dp_scaling.html +2 -0
- assets/data/benchmarks/memusage_activations.html +1 -1
- assets/data/benchmarks/tp_sp_scaling.html +2 -0
- assets/data/benchmarks/zero3_memoryusage.html +2 -0
- dist/assets/data/benchmarks/dp_ourjourney_memoryusage.html +2 -0
- dist/assets/data/benchmarks/dp_scaling.html +2 -0
- dist/assets/data/benchmarks/memusage_activations.html +1 -1
- dist/assets/data/benchmarks/tp_sp_scaling.html +2 -0
- dist/assets/data/benchmarks/zero3_memoryusage.html +2 -0
- dist/index.html +40 -7
- src/index.html +40 -7
assets/data/benchmarks/dp_ourjourney_memoryusage.html
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
<div> <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>
|
2 |
+
<script charset="utf-8" src="https://cdn.plot.ly/plotly-2.32.0.min.js"></script> <div id="3646e093-fda5-4c7f-8f67-2b049c56786f" class="plotly-graph-div" style="height:410px; width:1000px;"></div> <script type="text/javascript"> window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById("3646e093-fda5-4c7f-8f67-2b049c56786f")) { Plotly.newPlot( "3646e093-fda5-4c7f-8f67-2b049c56786f", [{"legendgroup":"parameters","marker":{"color":"#4ea5b7"},"name":"parameters","showlegend":true,"x":["1024","2048","4096","8192","16384"],"y":[2.3017578125,2.3017578125,2.3017578125,2.3017578125,2.3017578125],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"gradients","marker":{"color":"#e889ab"},"name":"gradients","showlegend":true,"x":["1024","2048","4096","8192","16384"],"y":[2.3017578125,2.3017578125,2.3017578125,2.3017578125,2.3017578125],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"optimizer states","marker":{"color":"#cec0fa"},"name":"optimizer states","showlegend":true,"x":["1024","2048","4096","8192","16384"],"y":[9.20703125,9.20703125,9.20703125,9.20703125,9.20703125],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"activations","marker":{"color":"#e38a42"},"name":"activations","showlegend":true,"x":["1024","2048","4096","8192","16384"],"y":[1.0625,2.125,4.25,8.5,17.0],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"parameters","marker":{"color":"#4ea5b7"},"name":"parameters","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[14.95703125,14.95703125,14.95703125,14.95703125,14.95703125],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"gradients","marker":{"color":"#e889ab"},"name":"gradients","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[14.95703125,14.95703125,14.95703125,14.95703125,14.95703125],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"optimizer states","marker":{"color":"#cec0fa"},"name":"optimizer states","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[59.828125,59.828125,59.828125,59.828125,59.828125],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"activations","marker":{"color":"#e38a42"},"name":"activations","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[4.25,8.5,17.0,34.0,68.0],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"parameters","marker":{"color":"#4ea5b7"},"name":"parameters","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[131.4140625,131.4140625,131.4140625,131.4140625,131.4140625],"type":"bar","xaxis":"x3","yaxis":"y3"},{"legendgroup":"gradients","marker":{"color":"#e889ab"},"name":"gradients","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[131.4140625,131.4140625,131.4140625,131.4140625,131.4140625],"type":"bar","xaxis":"x3","yaxis":"y3"},{"legendgroup":"optimizer states","marker":{"color":"#cec0fa"},"name":"optimizer states","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[525.65625,525.65625,525.65625,525.65625,525.65625],"type":"bar","xaxis":"x3","yaxis":"y3"},{"legendgroup":"activations","marker":{"color":"#e38a42"},"name":"activations","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[21.25,42.5,85.0,170.0,340.0],"type":"bar","xaxis":"x3","yaxis":"y3"}], {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"xaxis":{"anchor":"y","domain":[0.0,0.2888888888888889]},"yaxis":{"anchor":"x","domain":[0.0,1.0],"range":[0,150],"title":{"text":"GB memory"}},"xaxis2":{"anchor":"y2","domain":[0.35555555555555557,0.6444444444444445]},"yaxis2":{"anchor":"x2","domain":[0.0,1.0],"matches":"y","showticklabels":false,"range":[0,150]},"xaxis3":{"anchor":"y3","domain":[0.7111111111111111,1.0]},"yaxis3":{"anchor":"x3","domain":[0.0,1.0],"matches":"y","showticklabels":false,"range":[0,150]},"annotations":[{"font":{"size":16},"showarrow":false,"text":"1B model","x":0.14444444444444446,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"8B model","x":0.5,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"70B model","x":0.8555555555555556,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"}],"shapes":[{"line":{"color":"red","dash":"dash"},"type":"line","x0":0,"x1":1,"xref":"x domain","y0":80,"y1":80,"yref":"y"},{"line":{"color":"red","dash":"dash"},"type":"line","x0":0,"x1":1,"xref":"x2 domain","y0":80,"y1":80,"yref":"y2"},{"line":{"color":"red","dash":"dash"},"type":"line","x0":0,"x1":1,"xref":"x3 domain","y0":80,"y1":80,"yref":"y3"}],"title":{"text":"Memory Usage vs Sequence Length for Different Model Sizes"},"legend":{"orientation":"v","x":1.02,"y":0.5},"margin":{"r":150},"barmode":"stack","width":1000,"height":410}, {"responsive": true, "scrollZoom": false} ) }; </script> </div>
|
assets/data/benchmarks/dp_scaling.html
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
<div> <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>
|
2 |
+
<script charset="utf-8" src="https://cdn.plot.ly/plotly-2.32.0.min.js"></script> <div id="da8bff0b-93b4-4b93-9e14-f4777891ef0f" class="plotly-graph-div" style="height:400px; width:1000px;"></div> <script type="text/javascript"> window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById("da8bff0b-93b4-4b93-9e14-f4777891ef0f")) { Plotly.newPlot( "da8bff0b-93b4-4b93-9e14-f4777891ef0f", [{"marker":{"color":"#4ea5b7"},"name":"Throughput (tokens\u002fsec\u002fGPU)","width":0.7,"x":["8","16","32","64","128","256"],"y":[40149.94,37609.69,35367.61,31112.23,26446.44,15700.38],"type":"bar","xaxis":"x","yaxis":"y"},{"base":[37609.69],"marker":{"color":"#e889ab"},"name":"Performance Drop","showlegend":true,"width":0.0875,"x":["16"],"y":[2540.25],"type":"bar","xaxis":"x","yaxis":"y"},{"base":[35367.61],"marker":{"color":"#e889ab"},"showlegend":false,"width":0.0875,"x":["32"],"y":[2242.0800000000017],"type":"bar","xaxis":"x","yaxis":"y"},{"base":[31112.23],"marker":{"color":"#e889ab"},"showlegend":false,"width":0.0875,"x":["64"],"y":[4255.380000000001],"type":"bar","xaxis":"x","yaxis":"y"},{"base":[26446.44],"marker":{"color":"#e889ab"},"showlegend":false,"width":0.0875,"x":["128"],"y":[4665.790000000001],"type":"bar","xaxis":"x","yaxis":"y"},{"base":[15700.38],"marker":{"color":"#e889ab"},"showlegend":false,"width":0.0875,"x":["256"],"y":[10746.06],"type":"bar","xaxis":"x","yaxis":"y"},{"line":{"color":"#e889ab"},"marker":{"color":"#e889ab"},"mode":"lines+markers","name":"Memory Usage (GB)","x":["8","16","32","64","128","256"],"y":[36.66,36.66,36.66,36.66,36.66,36.66],"type":"scatter","xaxis":"x2","yaxis":"y2"}], {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"xaxis":{"anchor":"y","domain":[0.0,0.45],"title":{"text":"Data Parallelism (DP)"},"showgrid":true,"gridcolor":"LightGray"},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Throughput (tokens\u002fsec\u002fGPU)"},"showgrid":true,"gridcolor":"LightGray"},"xaxis2":{"anchor":"y2","domain":[0.55,1.0],"title":{"text":"Data Parallelism (DP)"},"showgrid":true,"gridcolor":"LightGray"},"yaxis2":{"anchor":"x2","domain":[0.0,1.0],"title":{"text":"Memory Usage (GB)"},"showgrid":true,"gridcolor":"LightGray"},"annotations":[{"font":{"size":16},"showarrow":false,"text":"Throughput Scaling with Data Parallelism","x":0.225,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"Memory Usage Scaling with Data Parallelism","x":0.775,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"color":"#e889ab"},"showarrow":false,"text":"-6.3%","x":1,"xanchor":"center","xref":"x","xshift":30,"y":38879.815,"yanchor":"middle","yref":"y"},{"font":{"color":"#e889ab"},"showarrow":false,"text":"-6.0%","x":2,"xanchor":"center","xref":"x","xshift":30,"y":36488.65,"yanchor":"middle","yref":"y"},{"font":{"color":"#e889ab"},"showarrow":false,"text":"-12.0%","x":3,"xanchor":"center","xref":"x","xshift":30,"y":33239.92,"yanchor":"middle","yref":"y"},{"font":{"color":"#e889ab"},"showarrow":false,"text":"-15.0%","x":4,"xanchor":"center","xref":"x","xshift":30,"y":28779.335,"yanchor":"middle","yref":"y"},{"font":{"color":"#e889ab"},"showarrow":false,"text":"-40.6%","x":5,"xanchor":"center","xref":"x","xshift":30,"y":21073.41,"yanchor":"middle","yref":"y"}],"legend":{"x":0.55,"y":1.0},"width":1000,"height":400,"barmode":"stack"}, {"responsive": true, "scrollZoom": false} ) }; </script> </div>
|
assets/data/benchmarks/memusage_activations.html
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
<div> <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>
|
2 |
-
<script charset="utf-8" src="https://cdn.plot.ly/plotly-2.32.0.min.js"></script> <div id="a841d1b3-f0b4-43f7-90f9-bbb31dc90094" class="plotly-graph-div" style="height:400px; width:1200px;"></div> <script type="text/javascript"> window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById("a841d1b3-f0b4-43f7-90f9-bbb31dc90094")) { Plotly.newPlot( "a841d1b3-f0b4-43f7-90f9-bbb31dc90094", [{"legendgroup":"parameters","marker":{"color":"#4ea5b7"},"name":"parameters","showlegend":true,"x":["1024","2048","4096","8192","16384"],"y":[25.979034423828125,25.994659423828125,26.025909423828125,26.088409423828125,26.213409423828125],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"gradients","marker":{"color":"#e889ab"},"name":"gradients","showlegend":true,"x":["1024","2048","4096","8192","16384"],"y":[25.979034423828125,25.994659423828125,26.025909423828125,26.088409423828125,26.213409423828125],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"optimizer states","marker":{"color":"#cec0fa"},"name":"optimizer states","showlegend":true,"x":["1024","2048","4096","8192","16384"],"y":[51.95806884765625,51.98931884765625,52.05181884765625,52.17681884765625,52.42681884765625],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"activations","marker":{"color":"#e38a42"},"name":"activations","showlegend":true,"x":["1024","2048","4096","8192","16384"],"y":[9.25390625,28.5078125,97.015625,354.03125,1348.0625],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"parameters","marker":{"color":"#4ea5b7"},"name":"parameters","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[243.97711181640625,244.00836181640625,244.07086181640625,244.19586181640625,244.44586181640625],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"gradients","marker":{"color":"#e889ab"},"name":"gradients","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[243.97711181640625,244.00836181640625,244.07086181640625,244.19586181640625,244.44586181640625],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"optimizer states","marker":{"color":"#cec0fa"},"name":"optimizer states","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[487.9542236328125,488.0167236328125,488.1417236328125,488.3917236328125,488.8917236328125],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"activations","marker":{"color":"#e38a42"},"name":"activations","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[46.2578125,142.515625,485.03125,1770.0625,6740.125],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"parameters","marker":{"color":"#4ea5b7"},"name":"parameters","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[1519.99072265625,1520.05322265625,1520.17822265625,1520.42822265625,1520.92822265625],"type":"bar","xaxis":"x3","yaxis":"y3"},{"legendgroup":"gradients","marker":{"color":"#e889ab"},"name":"gradients","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[1519.99072265625,1520.05322265625,1520.17822265625,1520.42822265625,1520.92822265625],"type":"bar","xaxis":"x3","yaxis":"y3"},{"legendgroup":"optimizer states","marker":{"color":"#cec0fa"},"name":"optimizer states","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[3039.9814453125,3040.1064453125,3040.3564453125,3040.8564453125,3041.8564453125],"type":"bar","xaxis":"x3","yaxis":"y3"},{"legendgroup":"activations","marker":{"color":"#e38a42"},"name":"activations","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[145.703125,448.90625,1527.8125,5575.625,21231.25],"type":"bar","xaxis":"x3","yaxis":"y3"}], {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"xaxis":{"anchor":"y","domain":[0.0,0.2888888888888889],"showgrid":true,"gridwidth":1,"gridcolor":"LightGray"},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"GB memory"},"showgrid":true,"gridwidth":1,"gridcolor":"LightGray"},"xaxis2":{"anchor":"y2","domain":[0.35555555555555557,0.6444444444444445],"showgrid":true,"gridwidth":1,"gridcolor":"LightGray"},"yaxis2":{"anchor":"x2","domain":[0.0,1.0],"showgrid":true,"gridwidth":1,"gridcolor":"LightGray"},"xaxis3":{"anchor":"y3","domain":[0.7111111111111111,1.0],"showgrid":true,"gridwidth":1,"gridcolor":"LightGray"},"yaxis3":{"anchor":"x3","domain":[0.0,1.0],"showgrid":true,"gridwidth":1,"gridcolor":"LightGray"},"annotations":[{"font":{"size":16},"showarrow":false,"text":"Meta-Llama-3.1-8B","x":0.14444444444444446,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"Meta-Llama-3.1-70B","x":0.5,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"Meta-Llama-3.1-405B","x":0.8555555555555556,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"}],"barmode":"stack","width":1200,"height":400,"legend":{"title":{}}}, {"responsive": true, "scrollZoom": false} ) }; </script> </div>
|
|
|
1 |
<div> <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>
|
2 |
+
<script charset="utf-8" src="https://cdn.plot.ly/plotly-2.32.0.min.js"></script> <div id="e21a1ffe-d43b-465a-b9a3-a65ced4d70af" class="plotly-graph-div" style="height:400px; width:1000px;"></div> <script type="text/javascript"> window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById("e21a1ffe-d43b-465a-b9a3-a65ced4d70af")) { Plotly.newPlot( "e21a1ffe-d43b-465a-b9a3-a65ced4d70af", [{"legendgroup":"parameters","marker":{"color":"#4ea5b7"},"name":"parameters","showlegend":true,"x":["1024","2048","4096","8192","16384"],"y":[25.979034423828125,25.994659423828125,26.025909423828125,26.088409423828125,26.213409423828125],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"gradients","marker":{"color":"#e889ab"},"name":"gradients","showlegend":true,"x":["1024","2048","4096","8192","16384"],"y":[25.979034423828125,25.994659423828125,26.025909423828125,26.088409423828125,26.213409423828125],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"optimizer states","marker":{"color":"#cec0fa"},"name":"optimizer states","showlegend":true,"x":["1024","2048","4096","8192","16384"],"y":[51.95806884765625,51.98931884765625,52.05181884765625,52.17681884765625,52.42681884765625],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"activations","marker":{"color":"#e38a42"},"name":"activations","showlegend":true,"x":["1024","2048","4096","8192","16384"],"y":[9.25390625,28.5078125,97.015625,354.03125,1348.0625],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"parameters","marker":{"color":"#4ea5b7"},"name":"parameters","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[243.97711181640625,244.00836181640625,244.07086181640625,244.19586181640625,244.44586181640625],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"gradients","marker":{"color":"#e889ab"},"name":"gradients","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[243.97711181640625,244.00836181640625,244.07086181640625,244.19586181640625,244.44586181640625],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"optimizer states","marker":{"color":"#cec0fa"},"name":"optimizer states","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[487.9542236328125,488.0167236328125,488.1417236328125,488.3917236328125,488.8917236328125],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"activations","marker":{"color":"#e38a42"},"name":"activations","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[46.2578125,142.515625,485.03125,1770.0625,6740.125],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"parameters","marker":{"color":"#4ea5b7"},"name":"parameters","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[1519.99072265625,1520.05322265625,1520.17822265625,1520.42822265625,1520.92822265625],"type":"bar","xaxis":"x3","yaxis":"y3"},{"legendgroup":"gradients","marker":{"color":"#e889ab"},"name":"gradients","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[1519.99072265625,1520.05322265625,1520.17822265625,1520.42822265625,1520.92822265625],"type":"bar","xaxis":"x3","yaxis":"y3"},{"legendgroup":"optimizer states","marker":{"color":"#cec0fa"},"name":"optimizer states","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[3039.9814453125,3040.1064453125,3040.3564453125,3040.8564453125,3041.8564453125],"type":"bar","xaxis":"x3","yaxis":"y3"},{"legendgroup":"activations","marker":{"color":"#e38a42"},"name":"activations","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[145.703125,448.90625,1527.8125,5575.625,21231.25],"type":"bar","xaxis":"x3","yaxis":"y3"}], {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"xaxis":{"anchor":"y","domain":[0.0,0.2888888888888889],"showgrid":true,"gridwidth":1,"gridcolor":"LightGray"},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"GB memory"},"showgrid":true,"gridwidth":1,"gridcolor":"LightGray"},"xaxis2":{"anchor":"y2","domain":[0.35555555555555557,0.6444444444444445],"showgrid":true,"gridwidth":1,"gridcolor":"LightGray"},"yaxis2":{"anchor":"x2","domain":[0.0,1.0],"showgrid":true,"gridwidth":1,"gridcolor":"LightGray"},"xaxis3":{"anchor":"y3","domain":[0.7111111111111111,1.0],"showgrid":true,"gridwidth":1,"gridcolor":"LightGray"},"yaxis3":{"anchor":"x3","domain":[0.0,1.0],"showgrid":true,"gridwidth":1,"gridcolor":"LightGray"},"annotations":[{"font":{"size":16},"showarrow":false,"text":"Meta-Llama-3.1-8B","x":0.14444444444444446,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"Meta-Llama-3.1-70B","x":0.5,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"Meta-Llama-3.1-405B","x":0.8555555555555556,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"}],"barmode":"stack","width":1000,"height":400,"legend":{"title":{}}}, {"responsive": true, "scrollZoom": false} ) }; </script> </div>
|
assets/data/benchmarks/tp_sp_scaling.html
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
<div> <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>
|
2 |
+
<script charset="utf-8" src="https://cdn.plot.ly/plotly-2.32.0.min.js"></script> <div id="bcb1feb0-360d-4e1b-b204-6bb1855e7a29" class="plotly-graph-div" style="height:400px; width:1000px;"></div> <script type="text/javascript"> window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById("bcb1feb0-360d-4e1b-b204-6bb1855e7a29")) { Plotly.newPlot( "bcb1feb0-360d-4e1b-b204-6bb1855e7a29", [{"marker":{"color":"#4ea5b7"},"name":"Tokens\u002fsec\u002fGPU","width":0.7,"x":["2","4","8","16","32"],"y":[14167.25,13460.16,10888.53,6159.3,3609.73],"type":"bar","xaxis":"x","yaxis":"y"},{"base":[13460.16],"marker":{"color":"#e889ab"},"name":"Performance Drop","showlegend":true,"width":0.0875,"x":["4"],"y":[707.0900000000001],"type":"bar","xaxis":"x","yaxis":"y"},{"base":[10888.53],"marker":{"color":"#e889ab"},"showlegend":false,"width":0.0875,"x":["8"],"y":[2571.629999999999],"type":"bar","xaxis":"x","yaxis":"y"},{"base":[6159.3],"marker":{"color":"#e889ab"},"showlegend":false,"width":0.0875,"x":["16"],"y":[4729.2300000000005],"type":"bar","xaxis":"x","yaxis":"y"},{"base":[3609.73],"marker":{"color":"#e889ab"},"showlegend":false,"width":0.0875,"x":["32"],"y":[2549.57],"type":"bar","xaxis":"x","yaxis":"y"},{"marker":{"color":"#cec0fa"},"name":"Max Batch Size","text":["4","10","20","40","100"],"textposition":"inside","width":0.7,"x":["2","4","8","16","32"],"y":[4,10,20,40,100],"type":"bar","xaxis":"x2","yaxis":"y2"}], {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"xaxis":{"anchor":"y","domain":[0.0,0.45],"title":{"text":"Tensor Parallelism (TP)"},"showgrid":true,"gridcolor":"LightGray"},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Tokens\u002fsec\u002fGPU"},"showgrid":true,"gridcolor":"LightGray"},"xaxis2":{"anchor":"y2","domain":[0.55,1.0],"title":{"text":"Tensor Parallelism (TP)"},"showgrid":true,"gridcolor":"LightGray"},"yaxis2":{"anchor":"x2","domain":[0.0,1.0],"title":{"text":"Maximum Batch Size"},"showgrid":true,"gridcolor":"LightGray"},"annotations":[{"font":{"size":16},"showarrow":false,"text":"Throughput Scaling with TP\u002fSP (3B Model)","x":0.225,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"Maximum Batch Size per TP Value","x":0.775,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"color":"#e889ab"},"showarrow":false,"text":"-5.0%","x":1,"xanchor":"center","xref":"x","xshift":30,"y":13813.705,"yanchor":"middle","yref":"y"},{"font":{"color":"#e889ab"},"showarrow":false,"text":"-19.1%","x":2,"xanchor":"center","xref":"x","xshift":30,"y":12174.345000000001,"yanchor":"middle","yref":"y"},{"font":{"color":"#e889ab"},"showarrow":false,"text":"-43.4%","x":3,"xanchor":"center","xref":"x","xshift":30,"y":8523.915,"yanchor":"middle","yref":"y"},{"font":{"color":"#e889ab"},"showarrow":false,"text":"-41.4%","x":4,"xanchor":"center","xref":"x","xshift":30,"y":4884.515,"yanchor":"middle","yref":"y"}],"legend":{"x":0.55,"y":1.0},"width":1000,"height":400,"barmode":"stack"}, {"responsive": true, "scrollZoom": false} ) }; </script> </div>
|
assets/data/benchmarks/zero3_memoryusage.html
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
<div> <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>
|
2 |
+
<script charset="utf-8" src="https://cdn.plot.ly/plotly-2.32.0.min.js"></script> <div id="6d74d9d7-30ee-487c-b86d-00833a466164" class="plotly-graph-div" style="height:400px; width:1000px;"></div> <script type="text/javascript"> window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById("6d74d9d7-30ee-487c-b86d-00833a466164")) { Plotly.newPlot( "6d74d9d7-30ee-487c-b86d-00833a466164", [{"legendgroup":"Model Parameters","marker":{"color":"#4ea5b7"},"name":"Model Parameters","showlegend":true,"x":["1024","4096","16384"],"y":[15.0,15.0,15.0],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"Gradients","marker":{"color":"#e889ab"},"name":"Gradients","showlegend":true,"x":["1024","4096","16384"],"y":[15.0,15.0,15.0],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"Optimizer States","marker":{"color":"#cec0fa"},"name":"Optimizer States","showlegend":true,"x":["1024","4096","16384"],"y":[60.0,60.0,60.0],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"Activations","marker":{"color":"#e38a42"},"name":"Activations","showlegend":true,"x":["1024","4096","16384"],"y":[4.25,17.0,68.0],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"Model Parameters","marker":{"color":"#4ea5b7"},"name":"Model Parameters","showlegend":false,"x":["1024","4096","16384"],"y":[15.0,15.0,15.0],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"Gradients","marker":{"color":"#e889ab"},"name":"Gradients","showlegend":false,"x":["1024","4096","16384"],"y":[15.0,15.0,15.0],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"Optimizer States","marker":{"color":"#cec0fa"},"name":"Optimizer States","showlegend":false,"x":["1024","4096","16384"],"y":[7.5,7.5,7.5],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"Activations","marker":{"color":"#e38a42"},"name":"Activations","showlegend":false,"x":["1024","4096","16384"],"y":[4.25,17.0,68.0],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"Model Parameters","marker":{"color":"#4ea5b7"},"name":"Model Parameters","showlegend":false,"x":["1024","4096","16384"],"y":[15.0,15.0,15.0],"type":"bar","xaxis":"x3","yaxis":"y3"},{"legendgroup":"Gradients","marker":{"color":"#e889ab"},"name":"Gradients","showlegend":false,"x":["1024","4096","16384"],"y":[1.875,1.875,1.875],"type":"bar","xaxis":"x3","yaxis":"y3"},{"legendgroup":"Optimizer States","marker":{"color":"#cec0fa"},"name":"Optimizer States","showlegend":false,"x":["1024","4096","16384"],"y":[7.5,7.5,7.5],"type":"bar","xaxis":"x3","yaxis":"y3"},{"legendgroup":"Activations","marker":{"color":"#e38a42"},"name":"Activations","showlegend":false,"x":["1024","4096","16384"],"y":[4.25,17.0,68.0],"type":"bar","xaxis":"x3","yaxis":"y3"},{"legendgroup":"Model Parameters","marker":{"color":"#4ea5b7"},"name":"Model Parameters","showlegend":false,"x":["1024","4096","16384"],"y":[1.875,1.875,1.875],"type":"bar","xaxis":"x4","yaxis":"y4"},{"legendgroup":"Gradients","marker":{"color":"#e889ab"},"name":"Gradients","showlegend":false,"x":["1024","4096","16384"],"y":[1.875,1.875,1.875],"type":"bar","xaxis":"x4","yaxis":"y4"},{"legendgroup":"Optimizer States","marker":{"color":"#cec0fa"},"name":"Optimizer States","showlegend":false,"x":["1024","4096","16384"],"y":[7.5,7.5,7.5],"type":"bar","xaxis":"x4","yaxis":"y4"},{"legendgroup":"Activations","marker":{"color":"#e38a42"},"name":"Activations","showlegend":false,"x":["1024","4096","16384"],"y":[4.25,17.0,68.0],"type":"bar","xaxis":"x4","yaxis":"y4"}], {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"xaxis":{"anchor":"y","domain":[0.0,0.2125],"title":{"text":"Sequence Length"},"showgrid":true,"gridcolor":"LightGray"},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Memory Usage (GB)"},"dtick":20,"showgrid":true,"gridcolor":"LightGray"},"xaxis2":{"anchor":"y2","domain":[0.2625,0.475],"title":{"text":"Sequence Length"},"showgrid":true,"gridcolor":"LightGray"},"yaxis2":{"anchor":"x2","domain":[0.0,1.0],"matches":"y","showticklabels":false,"showgrid":true,"gridcolor":"LightGray"},"xaxis3":{"anchor":"y3","domain":[0.525,0.7375],"title":{"text":"Sequence Length"},"showgrid":true,"gridcolor":"LightGray"},"yaxis3":{"anchor":"x3","domain":[0.0,1.0],"matches":"y","showticklabels":false,"showgrid":true,"gridcolor":"LightGray"},"xaxis4":{"anchor":"y4","domain":[0.7875,1.0],"title":{"text":"Sequence Length"},"showgrid":true,"gridcolor":"LightGray"},"yaxis4":{"anchor":"x4","domain":[0.0,1.0],"matches":"y","showticklabels":false,"showgrid":true,"gridcolor":"LightGray"},"annotations":[{"font":{"size":16},"showarrow":false,"text":"DP=8","x":0.10625,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"DP=8 Zero-1","x":0.36875,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"DP=8 Zero-2","x":0.6312500000000001,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"DP=8 Zero-3","x":0.89375,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"}],"shapes":[{"line":{"color":"red","dash":"dash"},"type":"line","x0":0,"x1":1,"xref":"x domain","y0":80,"y1":80,"yref":"y"},{"line":{"color":"red","dash":"dash"},"type":"line","x0":0,"x1":1,"xref":"x2 domain","y0":80,"y1":80,"yref":"y2"},{"line":{"color":"red","dash":"dash"},"type":"line","x0":0,"x1":1,"xref":"x3 domain","y0":80,"y1":80,"yref":"y3"},{"line":{"color":"red","dash":"dash"},"type":"line","x0":0,"x1":1,"xref":"x4 domain","y0":80,"y1":80,"yref":"y4"}],"title":{"text":"Memory Usage for 8B Model"},"legend":{"orientation":"v","x":1.02,"y":0.5},"margin":{"r":150},"barmode":"stack","width":1000,"height":400}, {"responsive": true, "scrollZoom": false} ) }; </script> </div>
|
dist/assets/data/benchmarks/dp_ourjourney_memoryusage.html
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
<div> <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>
|
2 |
+
<script charset="utf-8" src="https://cdn.plot.ly/plotly-2.32.0.min.js"></script> <div id="3646e093-fda5-4c7f-8f67-2b049c56786f" class="plotly-graph-div" style="height:410px; width:1000px;"></div> <script type="text/javascript"> window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById("3646e093-fda5-4c7f-8f67-2b049c56786f")) { Plotly.newPlot( "3646e093-fda5-4c7f-8f67-2b049c56786f", [{"legendgroup":"parameters","marker":{"color":"#4ea5b7"},"name":"parameters","showlegend":true,"x":["1024","2048","4096","8192","16384"],"y":[2.3017578125,2.3017578125,2.3017578125,2.3017578125,2.3017578125],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"gradients","marker":{"color":"#e889ab"},"name":"gradients","showlegend":true,"x":["1024","2048","4096","8192","16384"],"y":[2.3017578125,2.3017578125,2.3017578125,2.3017578125,2.3017578125],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"optimizer states","marker":{"color":"#cec0fa"},"name":"optimizer states","showlegend":true,"x":["1024","2048","4096","8192","16384"],"y":[9.20703125,9.20703125,9.20703125,9.20703125,9.20703125],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"activations","marker":{"color":"#e38a42"},"name":"activations","showlegend":true,"x":["1024","2048","4096","8192","16384"],"y":[1.0625,2.125,4.25,8.5,17.0],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"parameters","marker":{"color":"#4ea5b7"},"name":"parameters","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[14.95703125,14.95703125,14.95703125,14.95703125,14.95703125],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"gradients","marker":{"color":"#e889ab"},"name":"gradients","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[14.95703125,14.95703125,14.95703125,14.95703125,14.95703125],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"optimizer states","marker":{"color":"#cec0fa"},"name":"optimizer states","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[59.828125,59.828125,59.828125,59.828125,59.828125],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"activations","marker":{"color":"#e38a42"},"name":"activations","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[4.25,8.5,17.0,34.0,68.0],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"parameters","marker":{"color":"#4ea5b7"},"name":"parameters","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[131.4140625,131.4140625,131.4140625,131.4140625,131.4140625],"type":"bar","xaxis":"x3","yaxis":"y3"},{"legendgroup":"gradients","marker":{"color":"#e889ab"},"name":"gradients","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[131.4140625,131.4140625,131.4140625,131.4140625,131.4140625],"type":"bar","xaxis":"x3","yaxis":"y3"},{"legendgroup":"optimizer states","marker":{"color":"#cec0fa"},"name":"optimizer states","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[525.65625,525.65625,525.65625,525.65625,525.65625],"type":"bar","xaxis":"x3","yaxis":"y3"},{"legendgroup":"activations","marker":{"color":"#e38a42"},"name":"activations","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[21.25,42.5,85.0,170.0,340.0],"type":"bar","xaxis":"x3","yaxis":"y3"}], {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"xaxis":{"anchor":"y","domain":[0.0,0.2888888888888889]},"yaxis":{"anchor":"x","domain":[0.0,1.0],"range":[0,150],"title":{"text":"GB memory"}},"xaxis2":{"anchor":"y2","domain":[0.35555555555555557,0.6444444444444445]},"yaxis2":{"anchor":"x2","domain":[0.0,1.0],"matches":"y","showticklabels":false,"range":[0,150]},"xaxis3":{"anchor":"y3","domain":[0.7111111111111111,1.0]},"yaxis3":{"anchor":"x3","domain":[0.0,1.0],"matches":"y","showticklabels":false,"range":[0,150]},"annotations":[{"font":{"size":16},"showarrow":false,"text":"1B model","x":0.14444444444444446,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"8B model","x":0.5,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"70B model","x":0.8555555555555556,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"}],"shapes":[{"line":{"color":"red","dash":"dash"},"type":"line","x0":0,"x1":1,"xref":"x domain","y0":80,"y1":80,"yref":"y"},{"line":{"color":"red","dash":"dash"},"type":"line","x0":0,"x1":1,"xref":"x2 domain","y0":80,"y1":80,"yref":"y2"},{"line":{"color":"red","dash":"dash"},"type":"line","x0":0,"x1":1,"xref":"x3 domain","y0":80,"y1":80,"yref":"y3"}],"title":{"text":"Memory Usage vs Sequence Length for Different Model Sizes"},"legend":{"orientation":"v","x":1.02,"y":0.5},"margin":{"r":150},"barmode":"stack","width":1000,"height":410}, {"responsive": true, "scrollZoom": false} ) }; </script> </div>
|
dist/assets/data/benchmarks/dp_scaling.html
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
<div> <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>
|
2 |
+
<script charset="utf-8" src="https://cdn.plot.ly/plotly-2.32.0.min.js"></script> <div id="da8bff0b-93b4-4b93-9e14-f4777891ef0f" class="plotly-graph-div" style="height:400px; width:1000px;"></div> <script type="text/javascript"> window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById("da8bff0b-93b4-4b93-9e14-f4777891ef0f")) { Plotly.newPlot( "da8bff0b-93b4-4b93-9e14-f4777891ef0f", [{"marker":{"color":"#4ea5b7"},"name":"Throughput (tokens\u002fsec\u002fGPU)","width":0.7,"x":["8","16","32","64","128","256"],"y":[40149.94,37609.69,35367.61,31112.23,26446.44,15700.38],"type":"bar","xaxis":"x","yaxis":"y"},{"base":[37609.69],"marker":{"color":"#e889ab"},"name":"Performance Drop","showlegend":true,"width":0.0875,"x":["16"],"y":[2540.25],"type":"bar","xaxis":"x","yaxis":"y"},{"base":[35367.61],"marker":{"color":"#e889ab"},"showlegend":false,"width":0.0875,"x":["32"],"y":[2242.0800000000017],"type":"bar","xaxis":"x","yaxis":"y"},{"base":[31112.23],"marker":{"color":"#e889ab"},"showlegend":false,"width":0.0875,"x":["64"],"y":[4255.380000000001],"type":"bar","xaxis":"x","yaxis":"y"},{"base":[26446.44],"marker":{"color":"#e889ab"},"showlegend":false,"width":0.0875,"x":["128"],"y":[4665.790000000001],"type":"bar","xaxis":"x","yaxis":"y"},{"base":[15700.38],"marker":{"color":"#e889ab"},"showlegend":false,"width":0.0875,"x":["256"],"y":[10746.06],"type":"bar","xaxis":"x","yaxis":"y"},{"line":{"color":"#e889ab"},"marker":{"color":"#e889ab"},"mode":"lines+markers","name":"Memory Usage (GB)","x":["8","16","32","64","128","256"],"y":[36.66,36.66,36.66,36.66,36.66,36.66],"type":"scatter","xaxis":"x2","yaxis":"y2"}], {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"xaxis":{"anchor":"y","domain":[0.0,0.45],"title":{"text":"Data Parallelism (DP)"},"showgrid":true,"gridcolor":"LightGray"},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Throughput (tokens\u002fsec\u002fGPU)"},"showgrid":true,"gridcolor":"LightGray"},"xaxis2":{"anchor":"y2","domain":[0.55,1.0],"title":{"text":"Data Parallelism (DP)"},"showgrid":true,"gridcolor":"LightGray"},"yaxis2":{"anchor":"x2","domain":[0.0,1.0],"title":{"text":"Memory Usage (GB)"},"showgrid":true,"gridcolor":"LightGray"},"annotations":[{"font":{"size":16},"showarrow":false,"text":"Throughput Scaling with Data Parallelism","x":0.225,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"Memory Usage Scaling with Data Parallelism","x":0.775,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"color":"#e889ab"},"showarrow":false,"text":"-6.3%","x":1,"xanchor":"center","xref":"x","xshift":30,"y":38879.815,"yanchor":"middle","yref":"y"},{"font":{"color":"#e889ab"},"showarrow":false,"text":"-6.0%","x":2,"xanchor":"center","xref":"x","xshift":30,"y":36488.65,"yanchor":"middle","yref":"y"},{"font":{"color":"#e889ab"},"showarrow":false,"text":"-12.0%","x":3,"xanchor":"center","xref":"x","xshift":30,"y":33239.92,"yanchor":"middle","yref":"y"},{"font":{"color":"#e889ab"},"showarrow":false,"text":"-15.0%","x":4,"xanchor":"center","xref":"x","xshift":30,"y":28779.335,"yanchor":"middle","yref":"y"},{"font":{"color":"#e889ab"},"showarrow":false,"text":"-40.6%","x":5,"xanchor":"center","xref":"x","xshift":30,"y":21073.41,"yanchor":"middle","yref":"y"}],"legend":{"x":0.55,"y":1.0},"width":1000,"height":400,"barmode":"stack"}, {"responsive": true, "scrollZoom": false} ) }; </script> </div>
|
dist/assets/data/benchmarks/memusage_activations.html
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
<div> <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>
|
2 |
-
<script charset="utf-8" src="https://cdn.plot.ly/plotly-2.32.0.min.js"></script> <div id="a841d1b3-f0b4-43f7-90f9-bbb31dc90094" class="plotly-graph-div" style="height:400px; width:1200px;"></div> <script type="text/javascript"> window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById("a841d1b3-f0b4-43f7-90f9-bbb31dc90094")) { Plotly.newPlot( "a841d1b3-f0b4-43f7-90f9-bbb31dc90094", [{"legendgroup":"parameters","marker":{"color":"#4ea5b7"},"name":"parameters","showlegend":true,"x":["1024","2048","4096","8192","16384"],"y":[25.979034423828125,25.994659423828125,26.025909423828125,26.088409423828125,26.213409423828125],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"gradients","marker":{"color":"#e889ab"},"name":"gradients","showlegend":true,"x":["1024","2048","4096","8192","16384"],"y":[25.979034423828125,25.994659423828125,26.025909423828125,26.088409423828125,26.213409423828125],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"optimizer states","marker":{"color":"#cec0fa"},"name":"optimizer states","showlegend":true,"x":["1024","2048","4096","8192","16384"],"y":[51.95806884765625,51.98931884765625,52.05181884765625,52.17681884765625,52.42681884765625],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"activations","marker":{"color":"#e38a42"},"name":"activations","showlegend":true,"x":["1024","2048","4096","8192","16384"],"y":[9.25390625,28.5078125,97.015625,354.03125,1348.0625],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"parameters","marker":{"color":"#4ea5b7"},"name":"parameters","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[243.97711181640625,244.00836181640625,244.07086181640625,244.19586181640625,244.44586181640625],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"gradients","marker":{"color":"#e889ab"},"name":"gradients","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[243.97711181640625,244.00836181640625,244.07086181640625,244.19586181640625,244.44586181640625],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"optimizer states","marker":{"color":"#cec0fa"},"name":"optimizer states","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[487.9542236328125,488.0167236328125,488.1417236328125,488.3917236328125,488.8917236328125],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"activations","marker":{"color":"#e38a42"},"name":"activations","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[46.2578125,142.515625,485.03125,1770.0625,6740.125],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"parameters","marker":{"color":"#4ea5b7"},"name":"parameters","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[1519.99072265625,1520.05322265625,1520.17822265625,1520.42822265625,1520.92822265625],"type":"bar","xaxis":"x3","yaxis":"y3"},{"legendgroup":"gradients","marker":{"color":"#e889ab"},"name":"gradients","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[1519.99072265625,1520.05322265625,1520.17822265625,1520.42822265625,1520.92822265625],"type":"bar","xaxis":"x3","yaxis":"y3"},{"legendgroup":"optimizer states","marker":{"color":"#cec0fa"},"name":"optimizer states","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[3039.9814453125,3040.1064453125,3040.3564453125,3040.8564453125,3041.8564453125],"type":"bar","xaxis":"x3","yaxis":"y3"},{"legendgroup":"activations","marker":{"color":"#e38a42"},"name":"activations","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[145.703125,448.90625,1527.8125,5575.625,21231.25],"type":"bar","xaxis":"x3","yaxis":"y3"}], {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"xaxis":{"anchor":"y","domain":[0.0,0.2888888888888889],"showgrid":true,"gridwidth":1,"gridcolor":"LightGray"},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"GB memory"},"showgrid":true,"gridwidth":1,"gridcolor":"LightGray"},"xaxis2":{"anchor":"y2","domain":[0.35555555555555557,0.6444444444444445],"showgrid":true,"gridwidth":1,"gridcolor":"LightGray"},"yaxis2":{"anchor":"x2","domain":[0.0,1.0],"showgrid":true,"gridwidth":1,"gridcolor":"LightGray"},"xaxis3":{"anchor":"y3","domain":[0.7111111111111111,1.0],"showgrid":true,"gridwidth":1,"gridcolor":"LightGray"},"yaxis3":{"anchor":"x3","domain":[0.0,1.0],"showgrid":true,"gridwidth":1,"gridcolor":"LightGray"},"annotations":[{"font":{"size":16},"showarrow":false,"text":"Meta-Llama-3.1-8B","x":0.14444444444444446,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"Meta-Llama-3.1-70B","x":0.5,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"Meta-Llama-3.1-405B","x":0.8555555555555556,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"}],"barmode":"stack","width":1200,"height":400,"legend":{"title":{}}}, {"responsive": true, "scrollZoom": false} ) }; </script> </div>
|
|
|
1 |
<div> <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>
|
2 |
+
<script charset="utf-8" src="https://cdn.plot.ly/plotly-2.32.0.min.js"></script> <div id="e21a1ffe-d43b-465a-b9a3-a65ced4d70af" class="plotly-graph-div" style="height:400px; width:1000px;"></div> <script type="text/javascript"> window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById("e21a1ffe-d43b-465a-b9a3-a65ced4d70af")) { Plotly.newPlot( "e21a1ffe-d43b-465a-b9a3-a65ced4d70af", [{"legendgroup":"parameters","marker":{"color":"#4ea5b7"},"name":"parameters","showlegend":true,"x":["1024","2048","4096","8192","16384"],"y":[25.979034423828125,25.994659423828125,26.025909423828125,26.088409423828125,26.213409423828125],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"gradients","marker":{"color":"#e889ab"},"name":"gradients","showlegend":true,"x":["1024","2048","4096","8192","16384"],"y":[25.979034423828125,25.994659423828125,26.025909423828125,26.088409423828125,26.213409423828125],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"optimizer states","marker":{"color":"#cec0fa"},"name":"optimizer states","showlegend":true,"x":["1024","2048","4096","8192","16384"],"y":[51.95806884765625,51.98931884765625,52.05181884765625,52.17681884765625,52.42681884765625],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"activations","marker":{"color":"#e38a42"},"name":"activations","showlegend":true,"x":["1024","2048","4096","8192","16384"],"y":[9.25390625,28.5078125,97.015625,354.03125,1348.0625],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"parameters","marker":{"color":"#4ea5b7"},"name":"parameters","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[243.97711181640625,244.00836181640625,244.07086181640625,244.19586181640625,244.44586181640625],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"gradients","marker":{"color":"#e889ab"},"name":"gradients","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[243.97711181640625,244.00836181640625,244.07086181640625,244.19586181640625,244.44586181640625],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"optimizer states","marker":{"color":"#cec0fa"},"name":"optimizer states","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[487.9542236328125,488.0167236328125,488.1417236328125,488.3917236328125,488.8917236328125],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"activations","marker":{"color":"#e38a42"},"name":"activations","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[46.2578125,142.515625,485.03125,1770.0625,6740.125],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"parameters","marker":{"color":"#4ea5b7"},"name":"parameters","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[1519.99072265625,1520.05322265625,1520.17822265625,1520.42822265625,1520.92822265625],"type":"bar","xaxis":"x3","yaxis":"y3"},{"legendgroup":"gradients","marker":{"color":"#e889ab"},"name":"gradients","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[1519.99072265625,1520.05322265625,1520.17822265625,1520.42822265625,1520.92822265625],"type":"bar","xaxis":"x3","yaxis":"y3"},{"legendgroup":"optimizer states","marker":{"color":"#cec0fa"},"name":"optimizer states","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[3039.9814453125,3040.1064453125,3040.3564453125,3040.8564453125,3041.8564453125],"type":"bar","xaxis":"x3","yaxis":"y3"},{"legendgroup":"activations","marker":{"color":"#e38a42"},"name":"activations","showlegend":false,"x":["1024","2048","4096","8192","16384"],"y":[145.703125,448.90625,1527.8125,5575.625,21231.25],"type":"bar","xaxis":"x3","yaxis":"y3"}], {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"xaxis":{"anchor":"y","domain":[0.0,0.2888888888888889],"showgrid":true,"gridwidth":1,"gridcolor":"LightGray"},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"GB memory"},"showgrid":true,"gridwidth":1,"gridcolor":"LightGray"},"xaxis2":{"anchor":"y2","domain":[0.35555555555555557,0.6444444444444445],"showgrid":true,"gridwidth":1,"gridcolor":"LightGray"},"yaxis2":{"anchor":"x2","domain":[0.0,1.0],"showgrid":true,"gridwidth":1,"gridcolor":"LightGray"},"xaxis3":{"anchor":"y3","domain":[0.7111111111111111,1.0],"showgrid":true,"gridwidth":1,"gridcolor":"LightGray"},"yaxis3":{"anchor":"x3","domain":[0.0,1.0],"showgrid":true,"gridwidth":1,"gridcolor":"LightGray"},"annotations":[{"font":{"size":16},"showarrow":false,"text":"Meta-Llama-3.1-8B","x":0.14444444444444446,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"Meta-Llama-3.1-70B","x":0.5,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"Meta-Llama-3.1-405B","x":0.8555555555555556,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"}],"barmode":"stack","width":1000,"height":400,"legend":{"title":{}}}, {"responsive": true, "scrollZoom": false} ) }; </script> </div>
|
dist/assets/data/benchmarks/tp_sp_scaling.html
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
<div> <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>
|
2 |
+
<script charset="utf-8" src="https://cdn.plot.ly/plotly-2.32.0.min.js"></script> <div id="bcb1feb0-360d-4e1b-b204-6bb1855e7a29" class="plotly-graph-div" style="height:400px; width:1000px;"></div> <script type="text/javascript"> window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById("bcb1feb0-360d-4e1b-b204-6bb1855e7a29")) { Plotly.newPlot( "bcb1feb0-360d-4e1b-b204-6bb1855e7a29", [{"marker":{"color":"#4ea5b7"},"name":"Tokens\u002fsec\u002fGPU","width":0.7,"x":["2","4","8","16","32"],"y":[14167.25,13460.16,10888.53,6159.3,3609.73],"type":"bar","xaxis":"x","yaxis":"y"},{"base":[13460.16],"marker":{"color":"#e889ab"},"name":"Performance Drop","showlegend":true,"width":0.0875,"x":["4"],"y":[707.0900000000001],"type":"bar","xaxis":"x","yaxis":"y"},{"base":[10888.53],"marker":{"color":"#e889ab"},"showlegend":false,"width":0.0875,"x":["8"],"y":[2571.629999999999],"type":"bar","xaxis":"x","yaxis":"y"},{"base":[6159.3],"marker":{"color":"#e889ab"},"showlegend":false,"width":0.0875,"x":["16"],"y":[4729.2300000000005],"type":"bar","xaxis":"x","yaxis":"y"},{"base":[3609.73],"marker":{"color":"#e889ab"},"showlegend":false,"width":0.0875,"x":["32"],"y":[2549.57],"type":"bar","xaxis":"x","yaxis":"y"},{"marker":{"color":"#cec0fa"},"name":"Max Batch Size","text":["4","10","20","40","100"],"textposition":"inside","width":0.7,"x":["2","4","8","16","32"],"y":[4,10,20,40,100],"type":"bar","xaxis":"x2","yaxis":"y2"}], {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"xaxis":{"anchor":"y","domain":[0.0,0.45],"title":{"text":"Tensor Parallelism (TP)"},"showgrid":true,"gridcolor":"LightGray"},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Tokens\u002fsec\u002fGPU"},"showgrid":true,"gridcolor":"LightGray"},"xaxis2":{"anchor":"y2","domain":[0.55,1.0],"title":{"text":"Tensor Parallelism (TP)"},"showgrid":true,"gridcolor":"LightGray"},"yaxis2":{"anchor":"x2","domain":[0.0,1.0],"title":{"text":"Maximum Batch Size"},"showgrid":true,"gridcolor":"LightGray"},"annotations":[{"font":{"size":16},"showarrow":false,"text":"Throughput Scaling with TP\u002fSP (3B Model)","x":0.225,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"Maximum Batch Size per TP Value","x":0.775,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"color":"#e889ab"},"showarrow":false,"text":"-5.0%","x":1,"xanchor":"center","xref":"x","xshift":30,"y":13813.705,"yanchor":"middle","yref":"y"},{"font":{"color":"#e889ab"},"showarrow":false,"text":"-19.1%","x":2,"xanchor":"center","xref":"x","xshift":30,"y":12174.345000000001,"yanchor":"middle","yref":"y"},{"font":{"color":"#e889ab"},"showarrow":false,"text":"-43.4%","x":3,"xanchor":"center","xref":"x","xshift":30,"y":8523.915,"yanchor":"middle","yref":"y"},{"font":{"color":"#e889ab"},"showarrow":false,"text":"-41.4%","x":4,"xanchor":"center","xref":"x","xshift":30,"y":4884.515,"yanchor":"middle","yref":"y"}],"legend":{"x":0.55,"y":1.0},"width":1000,"height":400,"barmode":"stack"}, {"responsive": true, "scrollZoom": false} ) }; </script> </div>
|
dist/assets/data/benchmarks/zero3_memoryusage.html
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
<div> <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>
|
2 |
+
<script charset="utf-8" src="https://cdn.plot.ly/plotly-2.32.0.min.js"></script> <div id="6d74d9d7-30ee-487c-b86d-00833a466164" class="plotly-graph-div" style="height:400px; width:1000px;"></div> <script type="text/javascript"> window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById("6d74d9d7-30ee-487c-b86d-00833a466164")) { Plotly.newPlot( "6d74d9d7-30ee-487c-b86d-00833a466164", [{"legendgroup":"Model Parameters","marker":{"color":"#4ea5b7"},"name":"Model Parameters","showlegend":true,"x":["1024","4096","16384"],"y":[15.0,15.0,15.0],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"Gradients","marker":{"color":"#e889ab"},"name":"Gradients","showlegend":true,"x":["1024","4096","16384"],"y":[15.0,15.0,15.0],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"Optimizer States","marker":{"color":"#cec0fa"},"name":"Optimizer States","showlegend":true,"x":["1024","4096","16384"],"y":[60.0,60.0,60.0],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"Activations","marker":{"color":"#e38a42"},"name":"Activations","showlegend":true,"x":["1024","4096","16384"],"y":[4.25,17.0,68.0],"type":"bar","xaxis":"x","yaxis":"y"},{"legendgroup":"Model Parameters","marker":{"color":"#4ea5b7"},"name":"Model Parameters","showlegend":false,"x":["1024","4096","16384"],"y":[15.0,15.0,15.0],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"Gradients","marker":{"color":"#e889ab"},"name":"Gradients","showlegend":false,"x":["1024","4096","16384"],"y":[15.0,15.0,15.0],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"Optimizer States","marker":{"color":"#cec0fa"},"name":"Optimizer States","showlegend":false,"x":["1024","4096","16384"],"y":[7.5,7.5,7.5],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"Activations","marker":{"color":"#e38a42"},"name":"Activations","showlegend":false,"x":["1024","4096","16384"],"y":[4.25,17.0,68.0],"type":"bar","xaxis":"x2","yaxis":"y2"},{"legendgroup":"Model Parameters","marker":{"color":"#4ea5b7"},"name":"Model Parameters","showlegend":false,"x":["1024","4096","16384"],"y":[15.0,15.0,15.0],"type":"bar","xaxis":"x3","yaxis":"y3"},{"legendgroup":"Gradients","marker":{"color":"#e889ab"},"name":"Gradients","showlegend":false,"x":["1024","4096","16384"],"y":[1.875,1.875,1.875],"type":"bar","xaxis":"x3","yaxis":"y3"},{"legendgroup":"Optimizer States","marker":{"color":"#cec0fa"},"name":"Optimizer States","showlegend":false,"x":["1024","4096","16384"],"y":[7.5,7.5,7.5],"type":"bar","xaxis":"x3","yaxis":"y3"},{"legendgroup":"Activations","marker":{"color":"#e38a42"},"name":"Activations","showlegend":false,"x":["1024","4096","16384"],"y":[4.25,17.0,68.0],"type":"bar","xaxis":"x3","yaxis":"y3"},{"legendgroup":"Model Parameters","marker":{"color":"#4ea5b7"},"name":"Model Parameters","showlegend":false,"x":["1024","4096","16384"],"y":[1.875,1.875,1.875],"type":"bar","xaxis":"x4","yaxis":"y4"},{"legendgroup":"Gradients","marker":{"color":"#e889ab"},"name":"Gradients","showlegend":false,"x":["1024","4096","16384"],"y":[1.875,1.875,1.875],"type":"bar","xaxis":"x4","yaxis":"y4"},{"legendgroup":"Optimizer States","marker":{"color":"#cec0fa"},"name":"Optimizer States","showlegend":false,"x":["1024","4096","16384"],"y":[7.5,7.5,7.5],"type":"bar","xaxis":"x4","yaxis":"y4"},{"legendgroup":"Activations","marker":{"color":"#e38a42"},"name":"Activations","showlegend":false,"x":["1024","4096","16384"],"y":[4.25,17.0,68.0],"type":"bar","xaxis":"x4","yaxis":"y4"}], {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"xaxis":{"anchor":"y","domain":[0.0,0.2125],"title":{"text":"Sequence Length"},"showgrid":true,"gridcolor":"LightGray"},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Memory Usage (GB)"},"dtick":20,"showgrid":true,"gridcolor":"LightGray"},"xaxis2":{"anchor":"y2","domain":[0.2625,0.475],"title":{"text":"Sequence Length"},"showgrid":true,"gridcolor":"LightGray"},"yaxis2":{"anchor":"x2","domain":[0.0,1.0],"matches":"y","showticklabels":false,"showgrid":true,"gridcolor":"LightGray"},"xaxis3":{"anchor":"y3","domain":[0.525,0.7375],"title":{"text":"Sequence Length"},"showgrid":true,"gridcolor":"LightGray"},"yaxis3":{"anchor":"x3","domain":[0.0,1.0],"matches":"y","showticklabels":false,"showgrid":true,"gridcolor":"LightGray"},"xaxis4":{"anchor":"y4","domain":[0.7875,1.0],"title":{"text":"Sequence Length"},"showgrid":true,"gridcolor":"LightGray"},"yaxis4":{"anchor":"x4","domain":[0.0,1.0],"matches":"y","showticklabels":false,"showgrid":true,"gridcolor":"LightGray"},"annotations":[{"font":{"size":16},"showarrow":false,"text":"DP=8","x":0.10625,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"DP=8 Zero-1","x":0.36875,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"DP=8 Zero-2","x":0.6312500000000001,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"DP=8 Zero-3","x":0.89375,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"}],"shapes":[{"line":{"color":"red","dash":"dash"},"type":"line","x0":0,"x1":1,"xref":"x domain","y0":80,"y1":80,"yref":"y"},{"line":{"color":"red","dash":"dash"},"type":"line","x0":0,"x1":1,"xref":"x2 domain","y0":80,"y1":80,"yref":"y2"},{"line":{"color":"red","dash":"dash"},"type":"line","x0":0,"x1":1,"xref":"x3 domain","y0":80,"y1":80,"yref":"y3"},{"line":{"color":"red","dash":"dash"},"type":"line","x0":0,"x1":1,"xref":"x4 domain","y0":80,"y1":80,"yref":"y4"}],"title":{"text":"Memory Usage for 8B Model"},"legend":{"orientation":"v","x":1.02,"y":0.5},"margin":{"r":150},"barmode":"stack","width":1000,"height":400}, {"responsive": true, "scrollZoom": false} ) }; </script> </div>
|
dist/index.html
CHANGED
@@ -416,10 +416,10 @@
|
|
416 |
|
417 |
<p>An interesting observation here is how the memory is not static for a given model but it scales linearly with both the sequence length and batch size. This means the activation memory is the part which will blow up when we increase our batch size or train with longer sequences. We can use this equation to look at how memory usage changes for various sequence lengths for example for Llama models (<code>bs=1</code>):</p>
|
418 |
|
419 |
-
<iframe class="l-body-outset" id="
|
420 |
<script>
|
421 |
window.addEventListener('load', function() {
|
422 |
-
const frame = document.getElementById('
|
423 |
frame.style.height = frame.contentWindow.document.documentElement.scrollHeight + 'px';
|
424 |
frame.style.width = frame.contentWindow.document.documentElement.scrollWidth + 'px';
|
425 |
});
|
@@ -643,17 +643,33 @@
|
|
643 |
|
644 |
<p>While data parallelism cleverly overlaps the all-reduce gradient synchronization with backward computation to save time, this benefit starts to break down at large scales. As we add more and more GPUs (hundreds or thousands), the overhead of coordinating between them grows significantly. The end result? We get less and less efficient returns from each additional GPU we add to the system:</p>
|
645 |
|
646 |
-
<p><img alt="image.png" src="/assets/images/dp_scaling.svg"/></p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
647 |
|
648 |
<p>As expected, we can also see that the memory usage per GPU is not affected by adding more DP ranks for training.</p>
|
649 |
|
650 |
<p><strong>We’ve explored data parallelism, our first (simple) strategy to scale training across more GPUs. It works like gradient accumulation but parallelizes the forward and backward passes on micro batches, thus increasing throughput!</strong></p>
|
651 |
|
652 |
<p>The keen reader has already probably noted however that this assumes that we can fit at least one input sample forward pass (mbs<em>=1)</em> into our GPU memory. This is not always the case! As we can see, larger models don’t fit into a single GPU, even with activation recomputation activated: </p>
|
|
|
653 |
|
654 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
655 |
|
656 |
-
<aside>Tip: you can quickly eyeball the minimal memory required for your model’s parameters by multiplying by 2 e.g. 70B → 140GB (=133GiB)</aside>
|
657 |
|
658 |
<p>Do we have other options for these larger models? We do have some solutions thankfully. They will involve either move some of these tensors to the CPU or split the weights/gradients/optimizer-states tensors across GPUs devices!</p>
|
659 |
|
@@ -799,7 +815,15 @@
|
|
799 |
|
800 |
<p>However, there is a limit here, DP only works if a layer of the model fits in a single GPU and ZeRO can only partition the parameters, gradients, and optimizer states, but not the activation memory! Recall from the activation memory discussion that it scales with sequence length and batch size. Naturally we could just limit those, but in practice we don’t want to be limited by hardware to train with only with a short sequence length. </p>
|
801 |
|
802 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
803 |
|
804 |
<p>Now that we've efficiently used the DP axis to reduce memory through efficient communication patterns, let's explore a new, orthogonal axis of parallelism - Tensor Parallelism. Unlike ZeRO3 that relies on heavy parameter communication, TP manages to shard parameters, gradients, optimizer states AND activations across devices without requiring any model parameter movement between GPUs. What! How is this even possible?! Let's explore this seemingly magical approach together! 🙂</p>
|
805 |
|
@@ -1059,7 +1083,16 @@
|
|
1059 |
|
1060 |
<p>As you might expect, this communication overhead becomes increasingly problematic as we scale up tensor parallelism. To illustrate this, let’s check throughput as we scale TP with SP for a 3B model:</p>
|
1061 |
|
1062 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1063 |
<p>Impact of combined Tensor and Sequence Parallelism (TP/SP) on a 3B model’s performance and memory utilization with 4096 seqlen: when scaling both TP and SP together, there's a trade-off between computational efficiency (left) and memory capacity (right). While higher parallelism degrees reduce per-GPU throughput, they enable processing of significantly larger batch sizes by reducing the activation memory.</p>
|
1064 |
|
1065 |
<p>Let’s summarize our observations:</p>
|
|
|
416 |
|
417 |
<p>An interesting observation here is how the memory is not static for a given model but it scales linearly with both the sequence length and batch size. This means the activation memory is the part which will blow up when we increase our batch size or train with longer sequences. We can use this equation to look at how memory usage changes for various sequence lengths for example for Llama models (<code>bs=1</code>):</p>
|
418 |
|
419 |
+
<iframe class="l-body-outset" id="plotFrame3" src="assets/data/benchmarks/memusage_activations.html" width="90%" scrolling="no" frameborder="0"></iframe>
|
420 |
<script>
|
421 |
window.addEventListener('load', function() {
|
422 |
+
const frame = document.getElementById('plotFrame3');
|
423 |
frame.style.height = frame.contentWindow.document.documentElement.scrollHeight + 'px';
|
424 |
frame.style.width = frame.contentWindow.document.documentElement.scrollWidth + 'px';
|
425 |
});
|
|
|
643 |
|
644 |
<p>While data parallelism cleverly overlaps the all-reduce gradient synchronization with backward computation to save time, this benefit starts to break down at large scales. As we add more and more GPUs (hundreds or thousands), the overhead of coordinating between them grows significantly. The end result? We get less and less efficient returns from each additional GPU we add to the system:</p>
|
645 |
|
646 |
+
<!-- <p><img alt="image.png" src="/assets/images/dp_scaling.svg"/></p> -->
|
647 |
+
<iframe class="l-body-outset" id="plotFrame4" src="assets/data/benchmarks/dp_scaling.html" width="90%" scrolling="no" frameborder="0"></iframe>
|
648 |
+
<script>
|
649 |
+
window.addEventListener('load', function() {
|
650 |
+
const frame = document.getElementById('plotFrame4');
|
651 |
+
frame.style.height = frame.contentWindow.document.documentElement.scrollHeight + 'px';
|
652 |
+
frame.style.width = frame.contentWindow.document.documentElement.scrollWidth + 'px';
|
653 |
+
});
|
654 |
+
</script>
|
655 |
|
656 |
<p>As expected, we can also see that the memory usage per GPU is not affected by adding more DP ranks for training.</p>
|
657 |
|
658 |
<p><strong>We’ve explored data parallelism, our first (simple) strategy to scale training across more GPUs. It works like gradient accumulation but parallelizes the forward and backward passes on micro batches, thus increasing throughput!</strong></p>
|
659 |
|
660 |
<p>The keen reader has already probably noted however that this assumes that we can fit at least one input sample forward pass (mbs<em>=1)</em> into our GPU memory. This is not always the case! As we can see, larger models don’t fit into a single GPU, even with activation recomputation activated: </p>
|
661 |
+
<aside>Tip: you can quickly eyeball the minimal memory required for your model’s parameters by multiplying by 2 e.g. 70B → 140GB (=133GiB)</aside>
|
662 |
|
663 |
+
<iframe class="l-body-outset" id="plotFrame5" src="assets/data/benchmarks/dp_ourjourney_memoryusage.html" width="90%" scrolling="no" frameborder="0"></iframe>
|
664 |
+
<script>
|
665 |
+
window.addEventListener('load', function() {
|
666 |
+
const frame = document.getElementById('plotFrame5');
|
667 |
+
frame.style.height = frame.contentWindow.document.documentElement.scrollHeight + 'px';
|
668 |
+
frame.style.width = frame.contentWindow.document.documentElement.scrollWidth + 'px';
|
669 |
+
});
|
670 |
+
</script>
|
671 |
+
<!-- <p><img alt="dp_ourjourney_memoryusage.svg" src="/assets/images/dp_ourjourney_memoryusage.svg" /></p> -->
|
672 |
|
|
|
673 |
|
674 |
<p>Do we have other options for these larger models? We do have some solutions thankfully. They will involve either move some of these tensors to the CPU or split the weights/gradients/optimizer-states tensors across GPUs devices!</p>
|
675 |
|
|
|
815 |
|
816 |
<p>However, there is a limit here, DP only works if a layer of the model fits in a single GPU and ZeRO can only partition the parameters, gradients, and optimizer states, but not the activation memory! Recall from the activation memory discussion that it scales with sequence length and batch size. Naturally we could just limit those, but in practice we don’t want to be limited by hardware to train with only with a short sequence length. </p>
|
817 |
|
818 |
+
<iframe class="l-body-outset" id="plotFrame6" src="assets/data/benchmarks/zero3_memoryusage.html" width="90%" scrolling="no" frameborder="0"></iframe>
|
819 |
+
<script>
|
820 |
+
window.addEventListener('load', function() {
|
821 |
+
const frame = document.getElementById('plotFrame6');
|
822 |
+
frame.style.height = frame.contentWindow.document.documentElement.scrollHeight + 'px';
|
823 |
+
frame.style.width = frame.contentWindow.document.documentElement.scrollWidth + 'px';
|
824 |
+
});
|
825 |
+
</script>
|
826 |
+
<!-- <p><img alt="zero3_memoryusage.svg" src="/assets/images/zero3_memoryusage.svg" /></p> -->
|
827 |
|
828 |
<p>Now that we've efficiently used the DP axis to reduce memory through efficient communication patterns, let's explore a new, orthogonal axis of parallelism - Tensor Parallelism. Unlike ZeRO3 that relies on heavy parameter communication, TP manages to shard parameters, gradients, optimizer states AND activations across devices without requiring any model parameter movement between GPUs. What! How is this even possible?! Let's explore this seemingly magical approach together! 🙂</p>
|
829 |
|
|
|
1083 |
|
1084 |
<p>As you might expect, this communication overhead becomes increasingly problematic as we scale up tensor parallelism. To illustrate this, let’s check throughput as we scale TP with SP for a 3B model:</p>
|
1085 |
|
1086 |
+
<iframe class="l-body-outset" id="plotFrame2" src="assets/data/benchmarks/tp_sp_scaling.html" width="90%" scrolling="no" frameborder="0"></iframe>
|
1087 |
+
<script>
|
1088 |
+
window.addEventListener('load', function() {
|
1089 |
+
const frame = document.getElementById('plotFrame2');
|
1090 |
+
frame.style.height = frame.contentWindow.document.documentElement.scrollHeight + 'px';
|
1091 |
+
frame.style.width = frame.contentWindow.document.documentElement.scrollWidth + 'px';
|
1092 |
+
});
|
1093 |
+
</script>
|
1094 |
+
|
1095 |
+
<!-- <p><img alt="tp_sp_scaling.svg" src="/assets/images/tp_sp_scaling.svg" /></p> -->
|
1096 |
<p>Impact of combined Tensor and Sequence Parallelism (TP/SP) on a 3B model’s performance and memory utilization with 4096 seqlen: when scaling both TP and SP together, there's a trade-off between computational efficiency (left) and memory capacity (right). While higher parallelism degrees reduce per-GPU throughput, they enable processing of significantly larger batch sizes by reducing the activation memory.</p>
|
1097 |
|
1098 |
<p>Let’s summarize our observations:</p>
|
src/index.html
CHANGED
@@ -416,10 +416,10 @@
|
|
416 |
|
417 |
<p>An interesting observation here is how the memory is not static for a given model but it scales linearly with both the sequence length and batch size. This means the activation memory is the part which will blow up when we increase our batch size or train with longer sequences. We can use this equation to look at how memory usage changes for various sequence lengths for example for Llama models (<code>bs=1</code>):</p>
|
418 |
|
419 |
-
<iframe class="l-body-outset" id="
|
420 |
<script>
|
421 |
window.addEventListener('load', function() {
|
422 |
-
const frame = document.getElementById('
|
423 |
frame.style.height = frame.contentWindow.document.documentElement.scrollHeight + 'px';
|
424 |
frame.style.width = frame.contentWindow.document.documentElement.scrollWidth + 'px';
|
425 |
});
|
@@ -643,17 +643,33 @@
|
|
643 |
|
644 |
<p>While data parallelism cleverly overlaps the all-reduce gradient synchronization with backward computation to save time, this benefit starts to break down at large scales. As we add more and more GPUs (hundreds or thousands), the overhead of coordinating between them grows significantly. The end result? We get less and less efficient returns from each additional GPU we add to the system:</p>
|
645 |
|
646 |
-
<p><img alt="image.png" src="/assets/images/dp_scaling.svg"/></p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
647 |
|
648 |
<p>As expected, we can also see that the memory usage per GPU is not affected by adding more DP ranks for training.</p>
|
649 |
|
650 |
<p><strong>We’ve explored data parallelism, our first (simple) strategy to scale training across more GPUs. It works like gradient accumulation but parallelizes the forward and backward passes on micro batches, thus increasing throughput!</strong></p>
|
651 |
|
652 |
<p>The keen reader has already probably noted however that this assumes that we can fit at least one input sample forward pass (mbs<em>=1)</em> into our GPU memory. This is not always the case! As we can see, larger models don’t fit into a single GPU, even with activation recomputation activated: </p>
|
|
|
653 |
|
654 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
655 |
|
656 |
-
<aside>Tip: you can quickly eyeball the minimal memory required for your model’s parameters by multiplying by 2 e.g. 70B → 140GB (=133GiB)</aside>
|
657 |
|
658 |
<p>Do we have other options for these larger models? We do have some solutions thankfully. They will involve either move some of these tensors to the CPU or split the weights/gradients/optimizer-states tensors across GPUs devices!</p>
|
659 |
|
@@ -799,7 +815,15 @@
|
|
799 |
|
800 |
<p>However, there is a limit here, DP only works if a layer of the model fits in a single GPU and ZeRO can only partition the parameters, gradients, and optimizer states, but not the activation memory! Recall from the activation memory discussion that it scales with sequence length and batch size. Naturally we could just limit those, but in practice we don’t want to be limited by hardware to train with only with a short sequence length. </p>
|
801 |
|
802 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
803 |
|
804 |
<p>Now that we've efficiently used the DP axis to reduce memory through efficient communication patterns, let's explore a new, orthogonal axis of parallelism - Tensor Parallelism. Unlike ZeRO3 that relies on heavy parameter communication, TP manages to shard parameters, gradients, optimizer states AND activations across devices without requiring any model parameter movement between GPUs. What! How is this even possible?! Let's explore this seemingly magical approach together! 🙂</p>
|
805 |
|
@@ -1059,7 +1083,16 @@
|
|
1059 |
|
1060 |
<p>As you might expect, this communication overhead becomes increasingly problematic as we scale up tensor parallelism. To illustrate this, let’s check throughput as we scale TP with SP for a 3B model:</p>
|
1061 |
|
1062 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1063 |
<p>Impact of combined Tensor and Sequence Parallelism (TP/SP) on a 3B model’s performance and memory utilization with 4096 seqlen: when scaling both TP and SP together, there's a trade-off between computational efficiency (left) and memory capacity (right). While higher parallelism degrees reduce per-GPU throughput, they enable processing of significantly larger batch sizes by reducing the activation memory.</p>
|
1064 |
|
1065 |
<p>Let’s summarize our observations:</p>
|
|
|
416 |
|
417 |
<p>An interesting observation here is how the memory is not static for a given model but it scales linearly with both the sequence length and batch size. This means the activation memory is the part which will blow up when we increase our batch size or train with longer sequences. We can use this equation to look at how memory usage changes for various sequence lengths for example for Llama models (<code>bs=1</code>):</p>
|
418 |
|
419 |
+
<iframe class="l-body-outset" id="plotFrame3" src="assets/data/benchmarks/memusage_activations.html" width="90%" scrolling="no" frameborder="0"></iframe>
|
420 |
<script>
|
421 |
window.addEventListener('load', function() {
|
422 |
+
const frame = document.getElementById('plotFrame3');
|
423 |
frame.style.height = frame.contentWindow.document.documentElement.scrollHeight + 'px';
|
424 |
frame.style.width = frame.contentWindow.document.documentElement.scrollWidth + 'px';
|
425 |
});
|
|
|
643 |
|
644 |
<p>While data parallelism cleverly overlaps the all-reduce gradient synchronization with backward computation to save time, this benefit starts to break down at large scales. As we add more and more GPUs (hundreds or thousands), the overhead of coordinating between them grows significantly. The end result? We get less and less efficient returns from each additional GPU we add to the system:</p>
|
645 |
|
646 |
+
<!-- <p><img alt="image.png" src="/assets/images/dp_scaling.svg"/></p> -->
|
647 |
+
<iframe class="l-body-outset" id="plotFrame4" src="assets/data/benchmarks/dp_scaling.html" width="90%" scrolling="no" frameborder="0"></iframe>
|
648 |
+
<script>
|
649 |
+
window.addEventListener('load', function() {
|
650 |
+
const frame = document.getElementById('plotFrame4');
|
651 |
+
frame.style.height = frame.contentWindow.document.documentElement.scrollHeight + 'px';
|
652 |
+
frame.style.width = frame.contentWindow.document.documentElement.scrollWidth + 'px';
|
653 |
+
});
|
654 |
+
</script>
|
655 |
|
656 |
<p>As expected, we can also see that the memory usage per GPU is not affected by adding more DP ranks for training.</p>
|
657 |
|
658 |
<p><strong>We’ve explored data parallelism, our first (simple) strategy to scale training across more GPUs. It works like gradient accumulation but parallelizes the forward and backward passes on micro batches, thus increasing throughput!</strong></p>
|
659 |
|
660 |
<p>The keen reader has already probably noted however that this assumes that we can fit at least one input sample forward pass (mbs<em>=1)</em> into our GPU memory. This is not always the case! As we can see, larger models don’t fit into a single GPU, even with activation recomputation activated: </p>
|
661 |
+
<aside>Tip: you can quickly eyeball the minimal memory required for your model’s parameters by multiplying by 2 e.g. 70B → 140GB (=133GiB)</aside>
|
662 |
|
663 |
+
<iframe class="l-body-outset" id="plotFrame5" src="assets/data/benchmarks/dp_ourjourney_memoryusage.html" width="90%" scrolling="no" frameborder="0"></iframe>
|
664 |
+
<script>
|
665 |
+
window.addEventListener('load', function() {
|
666 |
+
const frame = document.getElementById('plotFrame5');
|
667 |
+
frame.style.height = frame.contentWindow.document.documentElement.scrollHeight + 'px';
|
668 |
+
frame.style.width = frame.contentWindow.document.documentElement.scrollWidth + 'px';
|
669 |
+
});
|
670 |
+
</script>
|
671 |
+
<!-- <p><img alt="dp_ourjourney_memoryusage.svg" src="/assets/images/dp_ourjourney_memoryusage.svg" /></p> -->
|
672 |
|
|
|
673 |
|
674 |
<p>Do we have other options for these larger models? We do have some solutions thankfully. They will involve either move some of these tensors to the CPU or split the weights/gradients/optimizer-states tensors across GPUs devices!</p>
|
675 |
|
|
|
815 |
|
816 |
<p>However, there is a limit here, DP only works if a layer of the model fits in a single GPU and ZeRO can only partition the parameters, gradients, and optimizer states, but not the activation memory! Recall from the activation memory discussion that it scales with sequence length and batch size. Naturally we could just limit those, but in practice we don’t want to be limited by hardware to train with only with a short sequence length. </p>
|
817 |
|
818 |
+
<iframe class="l-body-outset" id="plotFrame6" src="assets/data/benchmarks/zero3_memoryusage.html" width="90%" scrolling="no" frameborder="0"></iframe>
|
819 |
+
<script>
|
820 |
+
window.addEventListener('load', function() {
|
821 |
+
const frame = document.getElementById('plotFrame6');
|
822 |
+
frame.style.height = frame.contentWindow.document.documentElement.scrollHeight + 'px';
|
823 |
+
frame.style.width = frame.contentWindow.document.documentElement.scrollWidth + 'px';
|
824 |
+
});
|
825 |
+
</script>
|
826 |
+
<!-- <p><img alt="zero3_memoryusage.svg" src="/assets/images/zero3_memoryusage.svg" /></p> -->
|
827 |
|
828 |
<p>Now that we've efficiently used the DP axis to reduce memory through efficient communication patterns, let's explore a new, orthogonal axis of parallelism - Tensor Parallelism. Unlike ZeRO3 that relies on heavy parameter communication, TP manages to shard parameters, gradients, optimizer states AND activations across devices without requiring any model parameter movement between GPUs. What! How is this even possible?! Let's explore this seemingly magical approach together! 🙂</p>
|
829 |
|
|
|
1083 |
|
1084 |
<p>As you might expect, this communication overhead becomes increasingly problematic as we scale up tensor parallelism. To illustrate this, let’s check throughput as we scale TP with SP for a 3B model:</p>
|
1085 |
|
1086 |
+
<iframe class="l-body-outset" id="plotFrame2" src="assets/data/benchmarks/tp_sp_scaling.html" width="90%" scrolling="no" frameborder="0"></iframe>
|
1087 |
+
<script>
|
1088 |
+
window.addEventListener('load', function() {
|
1089 |
+
const frame = document.getElementById('plotFrame2');
|
1090 |
+
frame.style.height = frame.contentWindow.document.documentElement.scrollHeight + 'px';
|
1091 |
+
frame.style.width = frame.contentWindow.document.documentElement.scrollWidth + 'px';
|
1092 |
+
});
|
1093 |
+
</script>
|
1094 |
+
|
1095 |
+
<!-- <p><img alt="tp_sp_scaling.svg" src="/assets/images/tp_sp_scaling.svg" /></p> -->
|
1096 |
<p>Impact of combined Tensor and Sequence Parallelism (TP/SP) on a 3B model’s performance and memory utilization with 4096 seqlen: when scaling both TP and SP together, there's a trade-off between computational efficiency (left) and memory capacity (right). While higher parallelism degrees reduce per-GPU throughput, they enable processing of significantly larger batch sizes by reducing the activation memory.</p>
|
1097 |
|
1098 |
<p>Let’s summarize our observations:</p>
|