Spaces:
Running
Running
update
Browse files- dist/index.html +128 -117
- dist/main.bundle.js +0 -0
- dist/main.bundle.js.map +0 -0
- src/index.html +128 -117
- src/memory.js +4 -4
dist/index.html
CHANGED
@@ -9,32 +9,39 @@
|
|
9 |
<title>FineWeb: decanting the web for the finest text data at scale</title>
|
10 |
<link rel="stylesheet" href="style.css">
|
11 |
<style>
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
</style>
|
39 |
</head>
|
40 |
|
@@ -90,98 +97,102 @@
|
|
90 |
|
91 |
<div id="graph" style="position: relative; width: 700px; height: 500px;"></div>
|
92 |
<div id="controls">
|
93 |
-
<div class="
|
94 |
-
<
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
<
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
<
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
<
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
<
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
<
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
<
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
<
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
<
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
<
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
<label for="mixed">Mixed Precision:</label>
|
145 |
-
<input type="checkbox" id="mixed" name="mixed" checked>
|
146 |
-
<span></span> <!-- Empty span to maintain grid alignment -->
|
147 |
-
</div>
|
148 |
-
<div class="row">
|
149 |
-
<label for="recomputation">Recomputation:</label>
|
150 |
-
<select id="recomputation" name="recomputation">
|
151 |
-
<option value="none">None</option>
|
152 |
-
<option value="selective">Selective</option>
|
153 |
-
<option value="full">Full</option>
|
154 |
-
</select>
|
155 |
-
<span></span> <!-- Empty span to maintain grid alignment -->
|
156 |
-
</div>
|
157 |
-
<div class="row">
|
158 |
-
<label for="zero">Zero:</label>
|
159 |
-
<select id="zero" name="zero">
|
160 |
-
<option value="0">0</option>
|
161 |
-
<option value="1">1</option>
|
162 |
-
<option value="2">2</option>
|
163 |
-
<option value="3">3</option>
|
164 |
-
</select>
|
165 |
-
<span></span> <!-- Empty span to maintain grid alignment -->
|
166 |
-
</div>
|
167 |
-
<div class="row">
|
168 |
-
<label for="ff_activation">FF Activation:</label>
|
169 |
-
<select id="ff_activation" name="ff_activation">
|
170 |
-
<option value="relu">ReLU</option>
|
171 |
-
<option value="gelu">GELU</option>
|
172 |
-
<option value="swiglu">SwiGLU</option>
|
173 |
-
</select>
|
174 |
-
<span></span> <!-- Empty span to maintain grid alignment -->
|
175 |
</div>
|
176 |
-
<div class="
|
177 |
-
<
|
178 |
-
|
179 |
-
<
|
180 |
-
<
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
</div>
|
186 |
</div>
|
187 |
|
|
|
9 |
<title>FineWeb: decanting the web for the finest text data at scale</title>
|
10 |
<link rel="stylesheet" href="style.css">
|
11 |
<style>
|
12 |
+
#controls {
|
13 |
+
display: grid;
|
14 |
+
grid-template-columns: auto 1fr auto;
|
15 |
+
gap: 5px;
|
16 |
+
align-items: center;
|
17 |
+
max-width: 700px;
|
18 |
+
margin-bottom: 10px;
|
19 |
+
}
|
20 |
+
#controls .row {
|
21 |
+
display: contents;
|
22 |
+
}
|
23 |
+
#controls label {
|
24 |
+
text-align: right;
|
25 |
+
padding-right: 10px;
|
26 |
+
}
|
27 |
+
#controls input[type="range"] {
|
28 |
+
width: 100%;
|
29 |
+
}
|
30 |
+
#controls input[type="number"] {
|
31 |
+
width: 60px;
|
32 |
+
}
|
33 |
+
#controls select {
|
34 |
+
width: 100%;
|
35 |
+
}
|
36 |
+
#controls .column {
|
37 |
+
display: contents;
|
38 |
+
}
|
39 |
+
#graph svg {
|
40 |
+
font-family: sans-serif;
|
41 |
+
}
|
42 |
+
#graph svg rect {
|
43 |
+
cursor: pointer;
|
44 |
+
}
|
45 |
</style>
|
46 |
</head>
|
47 |
|
|
|
97 |
|
98 |
<div id="graph" style="position: relative; width: 700px; height: 500px;"></div>
|
99 |
<div id="controls">
|
100 |
+
<div class="column">
|
101 |
+
<div class="row">
|
102 |
+
<label for="a">Attention Heads (a):</label>
|
103 |
+
<input type="range" id="a" name="a" min="1" max="128" value="8">
|
104 |
+
<input type="number" id="a_input" value="8" min="1" max="128">
|
105 |
+
</div>
|
106 |
+
<div class="row">
|
107 |
+
<label for="b">Micro Batch Size (b):</label>
|
108 |
+
<input type="range" id="b" name="b" min="1" max="53248" value="32">
|
109 |
+
<input type="number" id="b_input" value="32" min="1" max="53248">
|
110 |
+
</div>
|
111 |
+
<div class="row">
|
112 |
+
<label for="h">Hidden Dimension Size (h):</label>
|
113 |
+
<input type="range" id="h" name="h" min="1" max="16384" value="512">
|
114 |
+
<input type="number" id="h_input" value="512" min="128" max="16384">
|
115 |
+
</div>
|
116 |
+
<div class="row">
|
117 |
+
<label for="h_ff">Feedforward Dimension Size (h_ff):</label>
|
118 |
+
<input type="range" id="h_ff" name="h_ff" min="1" max="65536" value="2048">
|
119 |
+
<input type="number" id="h_ff_input" value="2048" min="512" max="65536">
|
120 |
+
</div>
|
121 |
+
<div class="row">
|
122 |
+
<label for="L">Number of Layers (L):</label>
|
123 |
+
<input type="range" id="L" name="L" min="1" max="126" value="12">
|
124 |
+
<input type="number" id="L_input" value="12" min="1" max="126">
|
125 |
+
</div>
|
126 |
+
<div class="row">
|
127 |
+
<label for="s">Sequence Length (s):</label>
|
128 |
+
<input type="range" id="s" name="s" min="1" max="128000" value="128">
|
129 |
+
<input type="number" id="s_input" value="128" min="64" max="128000">
|
130 |
+
</div>
|
131 |
+
<div class="row">
|
132 |
+
<label for="v">Vocabulary Size (v):</label>
|
133 |
+
<input type="range" id="v" name="v" min="1000" max="100000" value="30522">
|
134 |
+
<input type="number" id="v_input" value="30522" min="1000" max="100000">
|
135 |
+
</div>
|
136 |
+
<div class="row">
|
137 |
+
<label for="k">Optimizer Parameters (k):</label>
|
138 |
+
<input type="range" id="k" name="k" min="1" max="16" value="8">
|
139 |
+
<input type="number" id="k_input" value="8" min="1" max="16">
|
140 |
+
</div>
|
141 |
+
<div class="row">
|
142 |
+
<label for="tp">Tensor Model Parallelism (t):</label>
|
143 |
+
<input type="range" id="tp" name="tp" min="1" max="16" value="8">
|
144 |
+
<input type="number" id="tp_input" value="8" min="1" max="16">
|
145 |
+
</div>
|
146 |
+
<div class="row">
|
147 |
+
<label for="dp">Data Model Parallelism (d):</label>
|
148 |
+
<input type="range" id="dp" name="dp" min="1" max="256" value="1">
|
149 |
+
<input type="number" id="dp_input" value="1" min="1" max="256">
|
150 |
+
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
</div>
|
152 |
+
<div class="column">
|
153 |
+
<div class="row">
|
154 |
+
<label for="mixed">Mixed Precision:</label>
|
155 |
+
<input type="checkbox" id="mixed" name="mixed" checked>
|
156 |
+
<span></span> <!-- Empty span to maintain grid alignment -->
|
157 |
+
</div>
|
158 |
+
<div class="row">
|
159 |
+
<label for="recomputation">Recomputation:</label>
|
160 |
+
<select id="recomputation" name="recomputation">
|
161 |
+
<option value="none">None</option>
|
162 |
+
<option value="selective">Selective</option>
|
163 |
+
<option value="full">Full</option>
|
164 |
+
</select>
|
165 |
+
<span></span> <!-- Empty span to maintain grid alignment -->
|
166 |
+
</div>
|
167 |
+
<div class="row">
|
168 |
+
<label for="zero">Zero:</label>
|
169 |
+
<select id="zero" name="zero">
|
170 |
+
<option value="0">0</option>
|
171 |
+
<option value="1">1</option>
|
172 |
+
<option value="2">2</option>
|
173 |
+
<option value="3">3</option>
|
174 |
+
</select>
|
175 |
+
<span></span> <!-- Empty span to maintain grid alignment -->
|
176 |
+
</div>
|
177 |
+
<div class="row">
|
178 |
+
<label for="ff_activation">FF Activation:</label>
|
179 |
+
<select id="ff_activation" name="ff_activation">
|
180 |
+
<option value="relu">ReLU</option>
|
181 |
+
<option value="gelu">GELU</option>
|
182 |
+
<option value="swiglu">SwiGLU</option>
|
183 |
+
</select>
|
184 |
+
<span></span> <!-- Empty span to maintain grid alignment -->
|
185 |
+
</div>
|
186 |
+
<div class="row">
|
187 |
+
<label for="presets">Presets:</label>
|
188 |
+
<select id="presets" name="presets">
|
189 |
+
<option value="Llama 3 Tiny">Llama 3 Tiny</option>
|
190 |
+
<option value="Llama 3 8B">Llama 3 8B</option>
|
191 |
+
<option value="Llama 3 70B">Llama 3 70B</option>
|
192 |
+
<option value="Llama 3 405B">Llama 3 405B</option>
|
193 |
+
</select>
|
194 |
+
<span></span> <!-- Empty span to maintain grid alignment -->
|
195 |
+
</div>
|
196 |
</div>
|
197 |
</div>
|
198 |
|
dist/main.bundle.js
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
dist/main.bundle.js.map
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
src/index.html
CHANGED
@@ -9,32 +9,39 @@
|
|
9 |
<title>FineWeb: decanting the web for the finest text data at scale</title>
|
10 |
<link rel="stylesheet" href="style.css">
|
11 |
<style>
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
</style>
|
39 |
</head>
|
40 |
|
@@ -90,98 +97,102 @@
|
|
90 |
|
91 |
<div id="graph" style="position: relative; width: 700px; height: 500px;"></div>
|
92 |
<div id="controls">
|
93 |
-
<div class="
|
94 |
-
<
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
<
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
<
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
<
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
<
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
<
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
<
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
<
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
<
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
<
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
<label for="mixed">Mixed Precision:</label>
|
145 |
-
<input type="checkbox" id="mixed" name="mixed" checked>
|
146 |
-
<span></span> <!-- Empty span to maintain grid alignment -->
|
147 |
-
</div>
|
148 |
-
<div class="row">
|
149 |
-
<label for="recomputation">Recomputation:</label>
|
150 |
-
<select id="recomputation" name="recomputation">
|
151 |
-
<option value="none">None</option>
|
152 |
-
<option value="selective">Selective</option>
|
153 |
-
<option value="full">Full</option>
|
154 |
-
</select>
|
155 |
-
<span></span> <!-- Empty span to maintain grid alignment -->
|
156 |
-
</div>
|
157 |
-
<div class="row">
|
158 |
-
<label for="zero">Zero:</label>
|
159 |
-
<select id="zero" name="zero">
|
160 |
-
<option value="0">0</option>
|
161 |
-
<option value="1">1</option>
|
162 |
-
<option value="2">2</option>
|
163 |
-
<option value="3">3</option>
|
164 |
-
</select>
|
165 |
-
<span></span> <!-- Empty span to maintain grid alignment -->
|
166 |
-
</div>
|
167 |
-
<div class="row">
|
168 |
-
<label for="ff_activation">FF Activation:</label>
|
169 |
-
<select id="ff_activation" name="ff_activation">
|
170 |
-
<option value="relu">ReLU</option>
|
171 |
-
<option value="gelu">GELU</option>
|
172 |
-
<option value="swiglu">SwiGLU</option>
|
173 |
-
</select>
|
174 |
-
<span></span> <!-- Empty span to maintain grid alignment -->
|
175 |
</div>
|
176 |
-
<div class="
|
177 |
-
<
|
178 |
-
|
179 |
-
<
|
180 |
-
<
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
</div>
|
186 |
</div>
|
187 |
|
|
|
9 |
<title>FineWeb: decanting the web for the finest text data at scale</title>
|
10 |
<link rel="stylesheet" href="style.css">
|
11 |
<style>
|
12 |
+
#controls {
|
13 |
+
display: grid;
|
14 |
+
grid-template-columns: auto 1fr auto;
|
15 |
+
gap: 5px;
|
16 |
+
align-items: center;
|
17 |
+
max-width: 700px;
|
18 |
+
margin-bottom: 10px;
|
19 |
+
}
|
20 |
+
#controls .row {
|
21 |
+
display: contents;
|
22 |
+
}
|
23 |
+
#controls label {
|
24 |
+
text-align: right;
|
25 |
+
padding-right: 10px;
|
26 |
+
}
|
27 |
+
#controls input[type="range"] {
|
28 |
+
width: 100%;
|
29 |
+
}
|
30 |
+
#controls input[type="number"] {
|
31 |
+
width: 60px;
|
32 |
+
}
|
33 |
+
#controls select {
|
34 |
+
width: 100%;
|
35 |
+
}
|
36 |
+
#controls .column {
|
37 |
+
display: contents;
|
38 |
+
}
|
39 |
+
#graph svg {
|
40 |
+
font-family: sans-serif;
|
41 |
+
}
|
42 |
+
#graph svg rect {
|
43 |
+
cursor: pointer;
|
44 |
+
}
|
45 |
</style>
|
46 |
</head>
|
47 |
|
|
|
97 |
|
98 |
<div id="graph" style="position: relative; width: 700px; height: 500px;"></div>
|
99 |
<div id="controls">
|
100 |
+
<div class="column">
|
101 |
+
<div class="row">
|
102 |
+
<label for="a">Attention Heads (a):</label>
|
103 |
+
<input type="range" id="a" name="a" min="1" max="128" value="8">
|
104 |
+
<input type="number" id="a_input" value="8" min="1" max="128">
|
105 |
+
</div>
|
106 |
+
<div class="row">
|
107 |
+
<label for="b">Micro Batch Size (b):</label>
|
108 |
+
<input type="range" id="b" name="b" min="1" max="53248" value="32">
|
109 |
+
<input type="number" id="b_input" value="32" min="1" max="53248">
|
110 |
+
</div>
|
111 |
+
<div class="row">
|
112 |
+
<label for="h">Hidden Dimension Size (h):</label>
|
113 |
+
<input type="range" id="h" name="h" min="1" max="16384" value="512">
|
114 |
+
<input type="number" id="h_input" value="512" min="128" max="16384">
|
115 |
+
</div>
|
116 |
+
<div class="row">
|
117 |
+
<label for="h_ff">Feedforward Dimension Size (h_ff):</label>
|
118 |
+
<input type="range" id="h_ff" name="h_ff" min="1" max="65536" value="2048">
|
119 |
+
<input type="number" id="h_ff_input" value="2048" min="512" max="65536">
|
120 |
+
</div>
|
121 |
+
<div class="row">
|
122 |
+
<label for="L">Number of Layers (L):</label>
|
123 |
+
<input type="range" id="L" name="L" min="1" max="126" value="12">
|
124 |
+
<input type="number" id="L_input" value="12" min="1" max="126">
|
125 |
+
</div>
|
126 |
+
<div class="row">
|
127 |
+
<label for="s">Sequence Length (s):</label>
|
128 |
+
<input type="range" id="s" name="s" min="1" max="128000" value="128">
|
129 |
+
<input type="number" id="s_input" value="128" min="64" max="128000">
|
130 |
+
</div>
|
131 |
+
<div class="row">
|
132 |
+
<label for="v">Vocabulary Size (v):</label>
|
133 |
+
<input type="range" id="v" name="v" min="1000" max="100000" value="30522">
|
134 |
+
<input type="number" id="v_input" value="30522" min="1000" max="100000">
|
135 |
+
</div>
|
136 |
+
<div class="row">
|
137 |
+
<label for="k">Optimizer Parameters (k):</label>
|
138 |
+
<input type="range" id="k" name="k" min="1" max="16" value="8">
|
139 |
+
<input type="number" id="k_input" value="8" min="1" max="16">
|
140 |
+
</div>
|
141 |
+
<div class="row">
|
142 |
+
<label for="tp">Tensor Model Parallelism (t):</label>
|
143 |
+
<input type="range" id="tp" name="tp" min="1" max="16" value="8">
|
144 |
+
<input type="number" id="tp_input" value="8" min="1" max="16">
|
145 |
+
</div>
|
146 |
+
<div class="row">
|
147 |
+
<label for="dp">Data Model Parallelism (d):</label>
|
148 |
+
<input type="range" id="dp" name="dp" min="1" max="256" value="1">
|
149 |
+
<input type="number" id="dp_input" value="1" min="1" max="256">
|
150 |
+
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
</div>
|
152 |
+
<div class="column">
|
153 |
+
<div class="row">
|
154 |
+
<label for="mixed">Mixed Precision:</label>
|
155 |
+
<input type="checkbox" id="mixed" name="mixed" checked>
|
156 |
+
<span></span> <!-- Empty span to maintain grid alignment -->
|
157 |
+
</div>
|
158 |
+
<div class="row">
|
159 |
+
<label for="recomputation">Recomputation:</label>
|
160 |
+
<select id="recomputation" name="recomputation">
|
161 |
+
<option value="none">None</option>
|
162 |
+
<option value="selective">Selective</option>
|
163 |
+
<option value="full">Full</option>
|
164 |
+
</select>
|
165 |
+
<span></span> <!-- Empty span to maintain grid alignment -->
|
166 |
+
</div>
|
167 |
+
<div class="row">
|
168 |
+
<label for="zero">Zero:</label>
|
169 |
+
<select id="zero" name="zero">
|
170 |
+
<option value="0">0</option>
|
171 |
+
<option value="1">1</option>
|
172 |
+
<option value="2">2</option>
|
173 |
+
<option value="3">3</option>
|
174 |
+
</select>
|
175 |
+
<span></span> <!-- Empty span to maintain grid alignment -->
|
176 |
+
</div>
|
177 |
+
<div class="row">
|
178 |
+
<label for="ff_activation">FF Activation:</label>
|
179 |
+
<select id="ff_activation" name="ff_activation">
|
180 |
+
<option value="relu">ReLU</option>
|
181 |
+
<option value="gelu">GELU</option>
|
182 |
+
<option value="swiglu">SwiGLU</option>
|
183 |
+
</select>
|
184 |
+
<span></span> <!-- Empty span to maintain grid alignment -->
|
185 |
+
</div>
|
186 |
+
<div class="row">
|
187 |
+
<label for="presets">Presets:</label>
|
188 |
+
<select id="presets" name="presets">
|
189 |
+
<option value="Llama 3 Tiny">Llama 3 Tiny</option>
|
190 |
+
<option value="Llama 3 8B">Llama 3 8B</option>
|
191 |
+
<option value="Llama 3 70B">Llama 3 70B</option>
|
192 |
+
<option value="Llama 3 405B">Llama 3 405B</option>
|
193 |
+
</select>
|
194 |
+
<span></span> <!-- Empty span to maintain grid alignment -->
|
195 |
+
</div>
|
196 |
</div>
|
197 |
</div>
|
198 |
|
src/memory.js
CHANGED
@@ -294,10 +294,10 @@ function formatBytes(bytes) {
|
|
294 |
}
|
295 |
|
296 |
const presets = {
|
297 |
-
"Llama 3 Tiny": { a: 16, b: 3, h: 1024, h_ff: 4096, L: 1, s: 7, v: 30522, k: 8, tp: 1, zero: "
|
298 |
-
"Llama 3 8B": { a: 32, b: 32, h: 4096, h_ff: 16384, L: 32, s: 256, v: 30522, k: 8, tp: 1, zero: "
|
299 |
-
"Llama 3 70B": { a: 64, b: 32, h: 8192, h_ff: 32768, L: 80, s: 256, v: 30522, k: 8, tp:
|
300 |
-
"Llama 3 405B": { a: 128, b: 32, h: 16384, h_ff: 65536, L: 126, s: 256, v: 30522, k: 8,
|
301 |
};
|
302 |
|
303 |
function setPresetValues(preset) {
|
|
|
294 |
}
|
295 |
|
296 |
const presets = {
|
297 |
+
"Llama 3 Tiny": { a: 16, b: 3, h: 1024, h_ff: 4096, L: 1, s: 7, v: 30522, k: 8, tp: 1, zero: "1", dp: 1, mixed: true, recomputation: "none", ff_activation: "gelu" },
|
298 |
+
"Llama 3 8B": { a: 32, b: 32, h: 4096, h_ff: 16384, L: 32, s: 256, v: 30522, k: 8, tp: 1, zero: "1", dp: 1, mixed: true, recomputation: "none", ff_activation: "swiglu" },
|
299 |
+
"Llama 3 70B": { a: 64, b: 32, h: 8192, h_ff: 32768, L: 80, s: 256, v: 30522, k: 8, tp: 8, zero: "1", dp: 8, mixed: true, recomputation: "none", ff_activation: "swiglu" },
|
300 |
+
"Llama 3 405B": { a: 128, b: 32, h: 16384, h_ff: 65536, L: 126, s: 256, v: 30522, k: 8, tp: 8, zero: "1", dp: 8, mixed: true, recomputation: "none", ff_activation: "swiglu" }
|
301 |
};
|
302 |
|
303 |
function setPresetValues(preset) {
|