Spaces:
Running
Running
fixes (#68)
Browse files- updates (59ea36cbfd76d707e96185f3462835606be09063)
- assets/images/ultra-cheatsheet.svg +0 -0
- dist/assets/images/ultra-cheatsheet.svg +0 -0
- dist/distill.bundle.js +1 -1
- dist/distill.bundle.js.map +0 -0
- dist/fragments/banner.html +0 -0
- dist/fragments/benchmarks_interactive.html +0 -0
- dist/index.html +0 -0
- src/distill.js +1 -1
- src/fragments/banner.html +0 -0
- src/fragments/benchmarks_interactive.html +0 -0
- src/index.html +1 -1
assets/images/ultra-cheatsheet.svg
CHANGED
|
|
dist/assets/images/ultra-cheatsheet.svg
CHANGED
|
|
dist/distill.bundle.js
CHANGED
@@ -2146,7 +2146,7 @@ function _arrayWithHoles(r) { if (Array.isArray(r)) return r; }
|
|
2146 |
function bylineTemplate(frontMatter) {
|
2147 |
return "\n <div class=\"byline grid\">\n <div>\n <h3>Authors</h3>\n <div>\n ".concat(frontMatter.authors.map(function (author, i) {
|
2148 |
return "\n <span class=\"author\">\n ".concat(author.personalURL ? "\n <a class=\"name\" href=\"".concat(author.personalURL, "\">").concat(author.name) + (i + 1 < frontMatter.authors.length ? "," : "") + "</a>" : "\n <span class=\"name\">".concat(author.name) + (i + 1 < frontMatter.authors.length ? "," : "") + "</span>", "\n </span>\n ");
|
2149 |
-
}).join(''), "\n </div>\n </div>\n <div >\n <h3>Affiliation</h3>\n <div><a href=\"https://huggingface.co/\">
|
2150 |
}
|
2151 |
var Byline = /*#__PURE__*/function (_HTMLElement4) {
|
2152 |
function Byline() {
|
|
|
2146 |
function bylineTemplate(frontMatter) {
|
2147 |
return "\n <div class=\"byline grid\">\n <div>\n <h3>Authors</h3>\n <div>\n ".concat(frontMatter.authors.map(function (author, i) {
|
2148 |
return "\n <span class=\"author\">\n ".concat(author.personalURL ? "\n <a class=\"name\" href=\"".concat(author.personalURL, "\">").concat(author.name) + (i + 1 < frontMatter.authors.length ? "," : "") + "</a>" : "\n <span class=\"name\">".concat(author.name) + (i + 1 < frontMatter.authors.length ? "," : "") + "</span>", "\n </span>\n ");
|
2149 |
+
}).join(''), "\n </div>\n </div>\n <div >\n <h3>Affiliation</h3>\n <div><a href=\"https://huggingface.co/\">Hugging Face</a>\n </div>\n </div>\n <div >\n <h3>Published</h3>\n <div>Feb 19, 2025</div>\n </div>\n </div>\n");
|
2150 |
}
|
2151 |
var Byline = /*#__PURE__*/function (_HTMLElement4) {
|
2152 |
function Byline() {
|
dist/distill.bundle.js.map
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
dist/fragments/banner.html
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
dist/fragments/benchmarks_interactive.html
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
dist/index.html
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
src/distill.js
CHANGED
@@ -2097,7 +2097,7 @@ d-appendix > distill-appendix {
|
|
2097 |
</div>
|
2098 |
<div >
|
2099 |
<h3>Affiliation</h3>
|
2100 |
-
<div><a href="https://huggingface.co/">
|
2101 |
</div>
|
2102 |
</div>
|
2103 |
<div >
|
|
|
2097 |
</div>
|
2098 |
<div >
|
2099 |
<h3>Affiliation</h3>
|
2100 |
+
<div><a href="https://huggingface.co/">Hugging Face</a>
|
2101 |
</div>
|
2102 |
</div>
|
2103 |
<div >
|
src/fragments/banner.html
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
src/fragments/benchmarks_interactive.html
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
src/index.html
CHANGED
@@ -1381,7 +1381,7 @@
|
|
1381 |
|
1382 |
<ol>
|
1383 |
<li>Send “current keys and values” to the next machine except during the last time step in a non-blocking manner so we can starts the following step before this step is finished</li>
|
1384 |
-
<li>Locally compute the attention score on the “current keys and values” it already has, which typically involves performing <d-math>Softmax(\frac{QK^T}{\sqrt{d}}) * V</d-math
|
1385 |
<li>Wait to receive keys and values from the previous GPU and then circle back to step 1. where “current keys and values” are now the key/values just received from the previous GPU.</li>
|
1386 |
</ol>
|
1387 |
|
|
|
1381 |
|
1382 |
<ol>
|
1383 |
<li>Send “current keys and values” to the next machine except during the last time step in a non-blocking manner so we can starts the following step before this step is finished</li>
|
1384 |
+
<li>Locally compute the attention score on the “current keys and values” it already has, which typically involves performing <d-math>Softmax(\frac{QK^T}{\sqrt{d}}) * V</d-math>.</li>
|
1385 |
<li>Wait to receive keys and values from the previous GPU and then circle back to step 1. where “current keys and values” are now the key/values just received from the previous GPU.</li>
|
1386 |
</ol>
|
1387 |
|