Spaces:
Running
Running
Update index.html
Browse files- index.html +204 -319
index.html
CHANGED
@@ -2,21 +2,17 @@
|
|
2 |
<html>
|
3 |
<head>
|
4 |
<meta charset="utf-8">
|
5 |
-
<meta name="description"
|
6 |
-
|
7 |
-
<meta name="keywords" content="Nerfies, D-NeRF, NeRF">
|
8 |
<meta name="viewport" content="width=device-width, initial-scale=1">
|
9 |
-
<title>
|
10 |
-
|
11 |
-
<link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro"
|
12 |
-
rel="stylesheet">
|
13 |
|
|
|
14 |
<link rel="stylesheet" href="./static/css/bulma.min.css">
|
15 |
<link rel="stylesheet" href="./static/css/bulma-carousel.min.css">
|
16 |
<link rel="stylesheet" href="./static/css/bulma-slider.min.css">
|
17 |
<link rel="stylesheet" href="./static/css/fontawesome.all.min.css">
|
18 |
-
<link rel="stylesheet"
|
19 |
-
href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
|
20 |
<link rel="stylesheet" href="./static/css/index.css">
|
21 |
<link rel="icon" href="./static/images/favicon.svg">
|
22 |
|
@@ -33,86 +29,25 @@
|
|
33 |
<div class="container is-max-desktop">
|
34 |
<div class="columns is-centered">
|
35 |
<div class="column has-text-centered">
|
36 |
-
<h1 class="title is-1 publication-title">
|
37 |
-
<div class="is-size-5 publication-authors">
|
38 |
-
<span class="author-block">
|
39 |
-
<a href="https://keunhong.com" target="_blank">Keunhong Park</a><sup>1</sup>,</span>
|
40 |
-
<span class="author-block">
|
41 |
-
<a href="https://utkarshsinha.com" target="_blank">Utkarsh Sinha</a><sup>2</sup>,</span>
|
42 |
-
<span class="author-block">
|
43 |
-
<a href="https://jonbarron.info" target="_blank">Jonathan T. Barron</a><sup>2</sup>,
|
44 |
-
</span>
|
45 |
-
<span class="author-block">
|
46 |
-
<a href="http://sofienbouaziz.com" target="_blank">Sofien Bouaziz</a><sup>2</sup>,
|
47 |
-
</span>
|
48 |
-
<span class="author-block">
|
49 |
-
<a href="https://www.danbgoldman.com" target="_blank">Dan B Goldman</a><sup>2</sup>,
|
50 |
-
</span>
|
51 |
-
<span class="author-block">
|
52 |
-
<a href="https://homes.cs.washington.edu/~seitz/" target="_blank">Steven M. Seitz</a><sup>1,2</sup>,
|
53 |
-
</span>
|
54 |
-
<span class="author-block">
|
55 |
-
<a href="http://www.ricardomartinbrualla.com" target="_blank">Ricardo Martin-Brualla</a><sup>2</sup>
|
56 |
-
</span>
|
57 |
-
</div>
|
58 |
-
|
59 |
<div class="is-size-5 publication-authors">
|
60 |
-
<span class="author-block"
|
61 |
-
<span class="author-block"><sup>2</sup>Google Research</span>
|
62 |
</div>
|
63 |
-
|
64 |
<div class="column has-text-centered">
|
65 |
<div class="publication-links">
|
66 |
-
<!-- PDF Link. -->
|
67 |
-
<span class="link-block">
|
68 |
-
<a href="https://arxiv.org/pdf/2011.12948" target="_blank"
|
69 |
-
class="external-link button is-normal is-rounded is-dark">
|
70 |
-
<span class="icon">
|
71 |
-
<i class="fas fa-file-pdf"></i>
|
72 |
-
</span>
|
73 |
-
<span>Paper</span>
|
74 |
-
</a>
|
75 |
-
</span>
|
76 |
<span class="link-block">
|
77 |
-
<a href="https://
|
78 |
-
|
79 |
-
<span
|
80 |
-
<i class="ai ai-arxiv"></i>
|
81 |
-
</span>
|
82 |
-
<span>arXiv</span>
|
83 |
</a>
|
84 |
</span>
|
85 |
-
<!-- Video Link. -->
|
86 |
<span class="link-block">
|
87 |
-
<a href="https://
|
88 |
-
|
89 |
-
<span
|
90 |
-
<i class="fab fa-youtube"></i>
|
91 |
-
</span>
|
92 |
-
<span>Video</span>
|
93 |
</a>
|
94 |
</span>
|
95 |
-
<!-- Code Link. -->
|
96 |
-
<span class="link-block">
|
97 |
-
<a href="https://github.com/google/nerfies" target="_blank"
|
98 |
-
class="external-link button is-normal is-rounded is-dark">
|
99 |
-
<span class="icon">
|
100 |
-
<i class="fab fa-github"></i>
|
101 |
-
</span>
|
102 |
-
<span>Code</span>
|
103 |
-
</a>
|
104 |
-
</span>
|
105 |
-
<!-- Dataset Link. -->
|
106 |
-
<span class="link-block">
|
107 |
-
<a href="https://github.com/google/nerfies/releases/tag/0.1" target="_blank"
|
108 |
-
class="external-link button is-normal is-rounded is-dark">
|
109 |
-
<span class="icon">
|
110 |
-
<i class="far fa-images"></i>
|
111 |
-
</span>
|
112 |
-
<span>Data</span>
|
113 |
-
</a>
|
114 |
</div>
|
115 |
-
|
116 |
</div>
|
117 |
</div>
|
118 |
</div>
|
@@ -120,293 +55,249 @@
|
|
120 |
</div>
|
121 |
</section>
|
122 |
|
123 |
-
<section class="hero teaser">
|
124 |
-
<div class="container is-max-desktop">
|
125 |
-
<div class="hero-body">
|
126 |
-
<video id="teaser" autoplay muted loop playsinline height="100%">
|
127 |
-
<source src="./static/videos/teaser.mp4"
|
128 |
-
type="video/mp4">
|
129 |
-
</video>
|
130 |
-
<h2 class="subtitle has-text-centered">
|
131 |
-
<span class="dnerf">Nerfies</span> turns selfie videos from your phone into
|
132 |
-
free-viewpoint
|
133 |
-
portraits.
|
134 |
-
</h2>
|
135 |
-
</div>
|
136 |
-
</div>
|
137 |
-
</section>
|
138 |
-
|
139 |
-
|
140 |
-
<section class="hero is-light is-small">
|
141 |
-
<div class="hero-body">
|
142 |
-
<div class="container">
|
143 |
-
<div id="results-carousel" class="carousel results-carousel">
|
144 |
-
<div class="item item-steve">
|
145 |
-
<video poster="" id="steve" autoplay controls muted loop playsinline height="100%">
|
146 |
-
<source src="./static/videos/steve.mp4"
|
147 |
-
type="video/mp4">
|
148 |
-
</video>
|
149 |
-
</div>
|
150 |
-
<div class="item item-chair-tp">
|
151 |
-
<video poster="" id="chair-tp" autoplay controls muted loop playsinline height="100%">
|
152 |
-
<source src="./static/videos/chair-tp.mp4"
|
153 |
-
type="video/mp4">
|
154 |
-
</video>
|
155 |
-
</div>
|
156 |
-
<div class="item item-shiba">
|
157 |
-
<video poster="" id="shiba" autoplay controls muted loop playsinline height="100%">
|
158 |
-
<source src="./static/videos/shiba.mp4"
|
159 |
-
type="video/mp4">
|
160 |
-
</video>
|
161 |
-
</div>
|
162 |
-
<div class="item item-fullbody">
|
163 |
-
<video poster="" id="fullbody" autoplay controls muted loop playsinline height="100%">
|
164 |
-
<source src="./static/videos/fullbody.mp4"
|
165 |
-
type="video/mp4">
|
166 |
-
</video>
|
167 |
-
</div>
|
168 |
-
<div class="item item-blueshirt">
|
169 |
-
<video poster="" id="blueshirt" autoplay controls muted loop playsinline height="100%">
|
170 |
-
<source src="./static/videos/blueshirt.mp4"
|
171 |
-
type="video/mp4">
|
172 |
-
</video>
|
173 |
-
</div>
|
174 |
-
<div class="item item-mask">
|
175 |
-
<video poster="" id="mask" autoplay controls muted loop playsinline height="100%">
|
176 |
-
<source src="./static/videos/mask.mp4"
|
177 |
-
type="video/mp4">
|
178 |
-
</video>
|
179 |
-
</div>
|
180 |
-
<div class="item item-coffee">
|
181 |
-
<video poster="" id="coffee" autoplay controls muted loop playsinline height="100%">
|
182 |
-
<source src="./static/videos/coffee.mp4"
|
183 |
-
type="video/mp4">
|
184 |
-
</video>
|
185 |
-
</div>
|
186 |
-
<div class="item item-toby">
|
187 |
-
<video poster="" id="toby" autoplay controls muted loop playsinline height="100%">
|
188 |
-
<source src="./static/videos/toby2.mp4"
|
189 |
-
type="video/mp4">
|
190 |
-
</video>
|
191 |
-
</div>
|
192 |
-
</div>
|
193 |
-
</div>
|
194 |
-
</div>
|
195 |
-
</section>
|
196 |
-
|
197 |
-
|
198 |
<section class="section">
|
199 |
<div class="container is-max-desktop">
|
200 |
-
<!-- Abstract. -->
|
201 |
<div class="columns is-centered has-text-centered">
|
202 |
<div class="column is-four-fifths">
|
203 |
-
<h2 class="title is-3">
|
204 |
<div class="content has-text-justified">
|
205 |
<p>
|
206 |
-
|
207 |
-
deforming scene using photos/videos captured casually from mobile phones.
|
208 |
-
</p>
|
209 |
-
<p>
|
210 |
-
Our approach augments neural radiance fields
|
211 |
-
(NeRF) by optimizing an
|
212 |
-
additional continuous volumetric deformation field that warps each observed point into a
|
213 |
-
canonical 5D NeRF.
|
214 |
-
We observe that these NeRF-like deformation fields are prone to local minima, and
|
215 |
-
propose a coarse-to-fine optimization method for coordinate-based models that allows for
|
216 |
-
more robust optimization.
|
217 |
-
By adapting principles from geometry processing and physical simulation to NeRF-like
|
218 |
-
models, we propose an elastic regularization of the deformation field that further
|
219 |
-
improves robustness.
|
220 |
-
</p>
|
221 |
-
<p>
|
222 |
-
We show that <span class="dnerf">Nerfies</span> can turn casually captured selfie
|
223 |
-
photos/videos into deformable NeRF
|
224 |
-
models that allow for photorealistic renderings of the subject from arbitrary
|
225 |
-
viewpoints, which we dub <i>"nerfies"</i>. We evaluate our method by collecting data
|
226 |
-
using a
|
227 |
-
rig with two mobile phones that take time-synchronized photos, yielding train/validation
|
228 |
-
images of the same pose at different viewpoints. We show that our method faithfully
|
229 |
-
reconstructs non-rigidly deforming scenes and reproduces unseen views with high
|
230 |
-
fidelity.
|
231 |
</p>
|
232 |
</div>
|
233 |
</div>
|
234 |
</div>
|
235 |
-
<!--/ Abstract. -->
|
236 |
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
<
|
241 |
-
|
242 |
-
<
|
243 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
244 |
</div>
|
245 |
</div>
|
246 |
</div>
|
247 |
-
<!--/ Paper video. -->
|
248 |
-
</div>
|
249 |
-
</section>
|
250 |
-
|
251 |
-
|
252 |
-
<section class="section">
|
253 |
-
<div class="container is-max-desktop">
|
254 |
|
255 |
<div class="columns is-centered">
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
</
|
265 |
-
<video id="dollyzoom" autoplay controls muted loop playsinline height="100%">
|
266 |
-
<source src="./static/videos/dollyzoom-stacked.mp4"
|
267 |
-
type="video/mp4">
|
268 |
-
</video>
|
269 |
</div>
|
270 |
</div>
|
271 |
-
|
272 |
-
|
273 |
-
<!-- Matting. -->
|
274 |
-
<div class="column">
|
275 |
-
<h2 class="title is-3">Matting</h2>
|
276 |
-
<div class="columns is-centered">
|
277 |
-
<div class="column content">
|
278 |
-
<p>
|
279 |
-
As a byproduct of our method, we can also solve the matting problem by ignoring
|
280 |
-
samples that fall outside of a bounding box during rendering.
|
281 |
-
</p>
|
282 |
-
<video id="matting-video" controls playsinline height="100%">
|
283 |
-
<source src="./static/videos/matting.mp4"
|
284 |
-
type="video/mp4">
|
285 |
-
</video>
|
286 |
-
</div>
|
287 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
</div>
|
289 |
</div>
|
290 |
</div>
|
291 |
-
<!--/ Matting. -->
|
292 |
|
293 |
-
<!-- Animation. -->
|
294 |
<div class="columns is-centered">
|
295 |
<div class="column is-full-width">
|
296 |
-
<h2 class="title is-3">
|
297 |
-
|
298 |
-
<!-- Interpolating. -->
|
299 |
-
<h3 class="title is-4">Interpolating states</h3>
|
300 |
<div class="content has-text-justified">
|
301 |
-
<p>
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
</
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
306 |
</div>
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
|
|
|
|
|
|
328 |
</div>
|
329 |
-
<br/>
|
330 |
-
<!--/ Interpolating. -->
|
331 |
|
332 |
-
|
333 |
-
<h3 class="title is-4">Re-rendering the input video</h3>
|
334 |
<div class="content has-text-justified">
|
335 |
-
<p>
|
336 |
-
|
337 |
-
|
338 |
-
</
|
339 |
</div>
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
350 |
</div>
|
351 |
-
|
|
|
352 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
353 |
</div>
|
354 |
</div>
|
355 |
-
<!--/ Animation. -->
|
356 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
357 |
|
358 |
-
<!-- Concurrent Work. -->
|
359 |
<div class="columns is-centered">
|
360 |
<div class="column is-full-width">
|
361 |
-
<h2 class="title is-3">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
362 |
|
|
|
|
|
|
|
363 |
<div class="content has-text-justified">
|
364 |
-
<
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
<
|
369 |
-
|
370 |
-
|
371 |
-
<
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
There are probably many more by the time you are reading this. Check out <a href="https://dellaert.github.io/NeRF/" target="_blank">Frank Dellart's survey on recent NeRF papers</a>, and <a href="https://github.com/yenchenlin/awesome-NeRF" target="_blank">Yen-Chen Lin's curated list of NeRF papers</a>.
|
379 |
-
</p>
|
380 |
</div>
|
381 |
</div>
|
382 |
</div>
|
383 |
-
<!--/ Concurrent Work. -->
|
384 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
385 |
</div>
|
386 |
</section>
|
387 |
|
388 |
-
|
389 |
-
<section class="section" id="BibTeX">
|
390 |
<div class="container is-max-desktop content">
|
391 |
-
<h2 class="title">
|
392 |
-
<
|
393 |
-
|
394 |
-
|
395 |
-
journal = {ICCV},
|
396 |
-
year = {2021},
|
397 |
-
}</code></pre>
|
398 |
</div>
|
399 |
</section>
|
400 |
|
401 |
-
|
402 |
<footer class="footer">
|
403 |
<div class="container">
|
404 |
<div class="content has-text-centered">
|
405 |
-
<a class="icon-link" target="_blank"
|
406 |
-
href="./static/videos/nerfies_paper.pdf">
|
407 |
-
<i class="fas fa-file-pdf"></i>
|
408 |
-
</a>
|
409 |
-
<a class="icon-link" href="https://github.com/keunhong" target="_blank" class="external-link" disabled>
|
410 |
<i class="fab fa-github"></i>
|
411 |
</a>
|
412 |
</div>
|
@@ -414,16 +305,10 @@
|
|
414 |
<div class="column is-8">
|
415 |
<div class="content">
|
416 |
<p>
|
417 |
-
This website is licensed under a <a rel="license" target="_blank"
|
418 |
-
href="http://creativecommons.org/licenses/by-sa/4.0/">Creative
|
419 |
-
Commons Attribution-ShareAlike 4.0 International License</a>.
|
420 |
</p>
|
421 |
<p>
|
422 |
-
This means you are free to borrow the <a target="_blank"
|
423 |
-
href="https://github.com/nerfies/nerfies.github.io">source code</a> of this website,
|
424 |
-
we just ask that you link back to this page in the footer.
|
425 |
-
Please remember to remove the analytics code included in the header of the website which
|
426 |
-
you do not want on your website.
|
427 |
</p>
|
428 |
</div>
|
429 |
</div>
|
@@ -432,4 +317,4 @@
|
|
432 |
</footer>
|
433 |
|
434 |
</body>
|
435 |
-
</html>
|
|
|
2 |
<html>
|
3 |
<head>
|
4 |
<meta charset="utf-8">
|
5 |
+
<meta name="description" content="Distributed Translation System for translating the DataTonic/dark_thoughts_case_study_merged dataset across multiple languages using RunPod and Ollama.">
|
6 |
+
<meta name="keywords" content="Distributed Translation, RunPod, Ollama, Dark Thoughts Dataset">
|
|
|
7 |
<meta name="viewport" content="width=device-width, initial-scale=1">
|
8 |
+
<title>Distributed Translation System for Dark Thoughts Dataset</title>
|
|
|
|
|
|
|
9 |
|
10 |
+
<link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" rel="stylesheet">
|
11 |
<link rel="stylesheet" href="./static/css/bulma.min.css">
|
12 |
<link rel="stylesheet" href="./static/css/bulma-carousel.min.css">
|
13 |
<link rel="stylesheet" href="./static/css/bulma-slider.min.css">
|
14 |
<link rel="stylesheet" href="./static/css/fontawesome.all.min.css">
|
15 |
+
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
|
|
|
16 |
<link rel="stylesheet" href="./static/css/index.css">
|
17 |
<link rel="icon" href="./static/images/favicon.svg">
|
18 |
|
|
|
29 |
<div class="container is-max-desktop">
|
30 |
<div class="columns is-centered">
|
31 |
<div class="column has-text-centered">
|
32 |
+
<h1 class="title is-1 publication-title">Distributed Translation System for Dark Thoughts Dataset</h1>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
<div class="is-size-5 publication-authors">
|
34 |
+
<span class="author-block">Your Name or Team</span>
|
|
|
35 |
</div>
|
|
|
36 |
<div class="column has-text-centered">
|
37 |
<div class="publication-links">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
<span class="link-block">
|
39 |
+
<a href="https://github.com/yourusername/distributed-translation" target="_blank" class="external-link button is-normal is-rounded is-dark">
|
40 |
+
<span class="icon"><i class="fab fa-github"></i></span>
|
41 |
+
<span>Code</span>
|
|
|
|
|
|
|
42 |
</a>
|
43 |
</span>
|
|
|
44 |
<span class="link-block">
|
45 |
+
<a href="https://huggingface.co/datasets/DataTonic/dark_thoughts_case_study_merged" target="_blank" class="external-link button is-normal is-rounded is-dark">
|
46 |
+
<span class="icon"><i class="far fa-images"></i></span>
|
47 |
+
<span>Data</span>
|
|
|
|
|
|
|
48 |
</a>
|
49 |
</span>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
</div>
|
|
|
51 |
</div>
|
52 |
</div>
|
53 |
</div>
|
|
|
55 |
</div>
|
56 |
</section>
|
57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
<section class="section">
|
59 |
<div class="container is-max-desktop">
|
|
|
60 |
<div class="columns is-centered has-text-centered">
|
61 |
<div class="column is-four-fifths">
|
62 |
+
<h2 class="title is-3">Overview</h2>
|
63 |
<div class="content has-text-justified">
|
64 |
<p>
|
65 |
+
This project implements a distributed translation system using RunPod and Ollama to translate the <a href="https://huggingface.co/datasets/DataTonic/dark_thoughts_case_study_merged" target="_blank">DataTonic/dark_thoughts_case_study_merged</a> dataset across multiple languages. The system parses thinking content from responses and translates both components separately.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
</p>
|
67 |
</div>
|
68 |
</div>
|
69 |
</div>
|
|
|
70 |
|
71 |
+
<div class="columns is-centered">
|
72 |
+
<div class="column is-full-width">
|
73 |
+
<h2 class="title is-3">Architecture</h2>
|
74 |
+
<div class="content has-text-justified">
|
75 |
+
<p>The system consists of several components:</p>
|
76 |
+
<ol>
|
77 |
+
<li><strong>RunPod API Client</strong> (<code>runpodapi.py</code>): Handles communication with the RunPod API for creating, managing, and monitoring pods.</li>
|
78 |
+
<li><strong>RunPod Command Executor</strong> (<code>runcommandsrunpod.py</code>): Executes commands on RunPod instances and checks their readiness.</li>
|
79 |
+
<li><strong>RunPod Launcher</strong> (<code>runpodlauncher.py</code>): Manages the launching and coordination of multiple RunPod instances.</li>
|
80 |
+
<li><strong>RunPod Manager</strong> (<code>runpodmanager.py</code>): High-level manager for RunPod instances used for distributed translation.</li>
|
81 |
+
<li><strong>Ollama Client</strong> (<code>ollamaclient.py</code>): Async client for interacting with Ollama API and distributing translation tasks.</li>
|
82 |
+
<li><strong>Translation Coordinator</strong> (<code>translationcoordinator.py</code>): Orchestrates the translation process across dataset splits and languages.</li>
|
83 |
+
<li><strong>Data Processor</strong> (<code>dataprocessor.py</code>): Handles loading, processing, and saving the translated dataset.</li>
|
84 |
+
<li><strong>Main Script</strong> (<code>translate.py</code>): Entry point for running the distributed translation process.</li>
|
85 |
+
<li><strong>Test Scripts</strong> (<code>test_translation.py</code>, <code>test_parsing.py</code>): Tests the functionality of the distributed translation system.</li>
|
86 |
+
</ol>
|
87 |
</div>
|
88 |
</div>
|
89 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
|
91 |
<div class="columns is-centered">
|
92 |
+
<div class="column is-full-width">
|
93 |
+
<h2 class="title is-3">Requirements</h2>
|
94 |
+
<div class="content has-text-justified">
|
95 |
+
<ul>
|
96 |
+
<li>Python 3.8+</li>
|
97 |
+
<li>RunPod API key</li>
|
98 |
+
<li>Access to RunPod GPU instances</li>
|
99 |
+
<li>The following Python packages: <code>aiohttp</code>, <code>asyncio</code>, <code>datasets</code>, <code>pandas</code>, <code>tqdm</code>, <code>requests</code>, <code>pydantic</code></li>
|
100 |
+
</ul>
|
|
|
|
|
|
|
|
|
101 |
</div>
|
102 |
</div>
|
103 |
+
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
|
105 |
+
<div class="columns is-centered">
|
106 |
+
<div class="column is-full-width">
|
107 |
+
<h2 class="title is-3">Installation</h2>
|
108 |
+
<div class="content has-text-justified">
|
109 |
+
<ol>
|
110 |
+
<li>Clone the repository:
|
111 |
+
<pre><code>git clone https://github.com/yourusername/distributed-translation.git
|
112 |
+
cd distributed-translation</code></pre>
|
113 |
+
</li>
|
114 |
+
<li>Install the required packages:
|
115 |
+
<pre><code>pip install -r requirements.txt</code></pre>
|
116 |
+
</li>
|
117 |
+
<li>Set up your RunPod API key:
|
118 |
+
<pre><code>export RUNPOD_API_KEY=your_runpod_api_key</code></pre>
|
119 |
+
</li>
|
120 |
+
</ol>
|
121 |
</div>
|
122 |
</div>
|
123 |
</div>
|
|
|
124 |
|
|
|
125 |
<div class="columns is-centered">
|
126 |
<div class="column is-full-width">
|
127 |
+
<h2 class="title is-3">Dataset Structure</h2>
|
|
|
|
|
|
|
128 |
<div class="content has-text-justified">
|
129 |
+
<p>The system works with the DataTonic/dark_thoughts_case_study_merged dataset, which contains:</p>
|
130 |
+
<ul>
|
131 |
+
<li>English split: 20,711 examples</li>
|
132 |
+
<li>Chinese split: 20,204 examples</li>
|
133 |
+
</ul>
|
134 |
+
<p>The system parses thinking content (text before <code></think></code>) from responses and translates both components separately.</p>
|
135 |
+
<p>The final dataset structure follows this model:</p>
|
136 |
+
<pre><code>class Feature(BaseModel):
|
137 |
+
id: int
|
138 |
+
thinking: str
|
139 |
+
response: str
|
140 |
+
thinking_translated: str
|
141 |
+
response_translated: str
|
142 |
+
query: str
|
143 |
+
source_data: str
|
144 |
+
category: str
|
145 |
+
endpoint: str
|
146 |
+
source: str</code></pre>
|
147 |
</div>
|
148 |
+
</div>
|
149 |
+
</div>
|
150 |
+
|
151 |
+
<div class="columns is-centered">
|
152 |
+
<div class="column is-full-width">
|
153 |
+
<h2 class="title is-3">Usage</h2>
|
154 |
+
<h3 class="title is-4">Running the Translation Process</h3>
|
155 |
+
<div class="content has-text-justified">
|
156 |
+
<p>To run the full translation process:</p>
|
157 |
+
<pre><code>python translate.py --pod-count 40 --batch-size 16 --max-tokens 100</code></pre>
|
158 |
+
<p>Additional options:</p>
|
159 |
+
<pre><code>--api-key TEXT RunPod API key (defaults to RUNPOD_API_KEY environment variable)
|
160 |
+
--pod-count INTEGER Number of RunPod instances to launch (default: 40)
|
161 |
+
--dataset TEXT Dataset name or path (default: DataTonic/dark_thoughts_case_study_merged)
|
162 |
+
--output-dir TEXT Output directory for translated data (default: translated_dataset)
|
163 |
+
--batch-size INTEGER Batch size for translation (default: 16)
|
164 |
+
--max-tokens INTEGER Maximum number of tokens to generate (default: 100)
|
165 |
+
--gpu-type TEXT GPU type ID for RunPod instances (default: NVIDIA RTX A5000)
|
166 |
+
--image TEXT Docker image name (default: tonic01/ollama-gemmax2)
|
167 |
+
--model TEXT Model name for translation (default: gemmax2)
|
168 |
+
--cleanup Terminate all pods after completion
|
169 |
+
--prepare-only Only prepare the dataset without translating
|
170 |
+
--process-only Only process the translated dataset
|
171 |
+
--validate Validate dataset structure after processing</code></pre>
|
172 |
</div>
|
|
|
|
|
173 |
|
174 |
+
<h3 class="title is-4">Testing the System</h3>
|
|
|
175 |
<div class="content has-text-justified">
|
176 |
+
<p>To test the system components:</p>
|
177 |
+
<pre><code>python test_translation.py --test all</code></pre>
|
178 |
+
<p>To test the parsing functionality:</p>
|
179 |
+
<pre><code>python test_parsing.py --test all</code></pre>
|
180 |
</div>
|
181 |
+
</div>
|
182 |
+
</div>
|
183 |
+
|
184 |
+
<div class="columns is-centered">
|
185 |
+
<div class="column is-full-width">
|
186 |
+
<h2 class="title is-3">Translation Process</h2>
|
187 |
+
<div class="content has-text-justified">
|
188 |
+
<p>The translation process follows these steps:</p>
|
189 |
+
<ol>
|
190 |
+
<li><strong>Preparation</strong>: Parse the dataset to separate thinking content from responses.</li>
|
191 |
+
<li><strong>Setup</strong>: Launch 40 RunPod instances with the <code>tonic01/ollama-gemmax2</code> Docker image.</li>
|
192 |
+
<li><strong>Readiness Check</strong>: Wait for all pods to be ready and for Ollama to be initialized with the required model.</li>
|
193 |
+
<li><strong>Translation</strong>:
|
194 |
+
<ul>
|
195 |
+
<li>For each dataset split (English and Chinese):</li>
|
196 |
+
<li>Translate thinking and response fields separately to all target languages.</li>
|
197 |
+
<li>Skip empty thinking content to optimize translation.</li>
|
198 |
+
<li>Save intermediate results periodically.</li>
|
199 |
+
</ul>
|
200 |
+
</li>
|
201 |
+
<li><strong>Processing</strong>: Merge translations and create a Hugging Face dataset structure.</li>
|
202 |
+
<li><strong>Validation</strong>: Ensure the dataset structure matches the required Feature model.</li>
|
203 |
+
<li><strong>Cleanup</strong>: Terminate all pods if requested.</li>
|
204 |
+
</ol>
|
205 |
</div>
|
206 |
+
</div>
|
207 |
+
</div>
|
208 |
|
209 |
+
<div class="columns is-centered">
|
210 |
+
<div class="column is-full-width">
|
211 |
+
<h2 class="title is-3">Supported Languages</h2>
|
212 |
+
<div class="content has-text-justified">
|
213 |
+
<p>The system supports translation between the following languages:</p>
|
214 |
+
<p>Arabic, Bengali, Czech, German, English, Spanish, Persian, French, Hebrew, Hindi, Indonesian, Italian, Japanese, Khmer, Korean, Lao, Malay, Burmese, Dutch, Polish, Portuguese, Russian, Thai, Tagalog, Turkish, Urdu, Vietnamese, Chinese.</p>
|
215 |
+
</div>
|
216 |
</div>
|
217 |
</div>
|
|
|
218 |
|
219 |
+
<div class="columns is-centered">
|
220 |
+
<div class="column is-full-width">
|
221 |
+
<h2 class="title is-3">Error Handling and Recovery</h2>
|
222 |
+
<div class="content has-text-justified">
|
223 |
+
<p>The system includes several error handling and recovery mechanisms:</p>
|
224 |
+
<ul>
|
225 |
+
<li><strong>Retry Logic</strong>: Failed translations are automatically retried.</li>
|
226 |
+
<li><strong>Checkpointing</strong>: Intermediate results are saved periodically to allow resuming from failures.</li>
|
227 |
+
<li><strong>Health Checks</strong>: Pod and Ollama health are checked before starting translation.</li>
|
228 |
+
<li><strong>Empty Content Handling</strong>: Empty thinking content is handled efficiently to avoid unnecessary translations.</li>
|
229 |
+
<li><strong>Graceful Termination</strong>: Resources are properly cleaned up on completion or failure.</li>
|
230 |
+
</ul>
|
231 |
+
</div>
|
232 |
+
</div>
|
233 |
+
</div>
|
234 |
|
|
|
235 |
<div class="columns is-centered">
|
236 |
<div class="column is-full-width">
|
237 |
+
<h2 class="title is-3">Docker Image Requirements</h2>
|
238 |
+
<div class="content has-text-justified">
|
239 |
+
<p>The <code>tonic01/ollama-gemmax2</code> Docker image should have:</p>
|
240 |
+
<ol>
|
241 |
+
<li>Ollama installed and configured to run on port 11434</li>
|
242 |
+
<li>The GemmaX2-28-2B-v0.1 model pre-loaded or configured to load automatically</li>
|
243 |
+
<li>Sufficient GPU memory (at least 24GB recommended)</li>
|
244 |
+
</ol>
|
245 |
+
</div>
|
246 |
+
</div>
|
247 |
+
</div>
|
248 |
|
249 |
+
<div class="columns is-centered">
|
250 |
+
<div class="column is-full-width">
|
251 |
+
<h2 class="title is-3">Example Workflow</h2>
|
252 |
<div class="content has-text-justified">
|
253 |
+
<ol>
|
254 |
+
<li><strong>Prepare Dataset</strong>:
|
255 |
+
<pre><code>python translate.py --prepare-only</code></pre>
|
256 |
+
</li>
|
257 |
+
<li><strong>Run Translation</strong>:
|
258 |
+
<pre><code>python translate.py --pod-count 40</code></pre>
|
259 |
+
</li>
|
260 |
+
<li><strong>Process Results Only</strong>:
|
261 |
+
<pre><code>python translate.py --process-only --validate</code></pre>
|
262 |
+
</li>
|
263 |
+
<li><strong>Cleanup</strong>:
|
264 |
+
<pre><code>python test_translation.py --test termination</code></pre>
|
265 |
+
</li>
|
266 |
+
</ol>
|
|
|
|
|
267 |
</div>
|
268 |
</div>
|
269 |
</div>
|
|
|
270 |
|
271 |
+
<div class="columns is-centered">
|
272 |
+
<div class="column is-full-width">
|
273 |
+
<h2 class="title is-3">Troubleshooting</h2>
|
274 |
+
<div class="content has-text-justified">
|
275 |
+
<ul>
|
276 |
+
<li><strong>API Key Issues</strong>: Ensure your RunPod API key is correctly set in the environment variable or passed as a parameter.</li>
|
277 |
+
<li><strong>GPU Availability</strong>: Check RunPod for GPU availability if pod creation fails.</li>
|
278 |
+
<li><strong>Model Loading</strong>: If Ollama readiness check times out, the model may be too large for the selected GPU type.</li>
|
279 |
+
<li><strong>Translation Errors</strong>: Check the logs for specific error messages. Most translation errors are automatically retried.</li>
|
280 |
+
<li><strong>Dataset Structure</strong>: Run with the <code>--validate</code> flag to ensure the dataset structure matches the required Feature model.</li>
|
281 |
+
</ul>
|
282 |
+
</div>
|
283 |
+
</div>
|
284 |
+
</div>
|
285 |
</div>
|
286 |
</section>
|
287 |
|
288 |
+
<section class="section" id="License">
|
|
|
289 |
<div class="container is-max-desktop content">
|
290 |
+
<h2 class="title">License</h2>
|
291 |
+
<div class="content has-text-justified">
|
292 |
+
<p>This project is licensed under the Apache 2.0 License - see the <a href="LICENSE" target="_blank">LICENSE</a> file for details.</p>
|
293 |
+
</div>
|
|
|
|
|
|
|
294 |
</div>
|
295 |
</section>
|
296 |
|
|
|
297 |
<footer class="footer">
|
298 |
<div class="container">
|
299 |
<div class="content has-text-centered">
|
300 |
+
<a class="icon-link" href="https://github.com/yourusername/distributed-translation" target="_blank">
|
|
|
|
|
|
|
|
|
301 |
<i class="fab fa-github"></i>
|
302 |
</a>
|
303 |
</div>
|
|
|
305 |
<div class="column is-8">
|
306 |
<div class="content">
|
307 |
<p>
|
308 |
+
This website is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/" target="_blank">Creative Commons Attribution-ShareAlike 4.0 International License</a>.
|
|
|
|
|
309 |
</p>
|
310 |
<p>
|
311 |
+
This means you are free to borrow the <a href="https://github.com/yourusername/distributed-translation" target="_blank">source code</a> of this website, we just ask that you link back to this page in the footer.
|
|
|
|
|
|
|
|
|
312 |
</p>
|
313 |
</div>
|
314 |
</div>
|
|
|
317 |
</footer>
|
318 |
|
319 |
</body>
|
320 |
+
</html>
|