Tonic commited on
Commit
aeed15a
·
verified ·
1 Parent(s): a6f238c

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +204 -319
index.html CHANGED
@@ -2,21 +2,17 @@
2
  <html>
3
  <head>
4
  <meta charset="utf-8">
5
- <meta name="description"
6
- content="Deformable Neural Radiance Fields creates free-viewpoint portraits (nerfies) from casually captured videos.">
7
- <meta name="keywords" content="Nerfies, D-NeRF, NeRF">
8
  <meta name="viewport" content="width=device-width, initial-scale=1">
9
- <title>Nerfies: Deformable Neural Radiance Fields</title>
10
-
11
- <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro"
12
- rel="stylesheet">
13
 
 
14
  <link rel="stylesheet" href="./static/css/bulma.min.css">
15
  <link rel="stylesheet" href="./static/css/bulma-carousel.min.css">
16
  <link rel="stylesheet" href="./static/css/bulma-slider.min.css">
17
  <link rel="stylesheet" href="./static/css/fontawesome.all.min.css">
18
- <link rel="stylesheet"
19
- href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
20
  <link rel="stylesheet" href="./static/css/index.css">
21
  <link rel="icon" href="./static/images/favicon.svg">
22
 
@@ -33,86 +29,25 @@
33
  <div class="container is-max-desktop">
34
  <div class="columns is-centered">
35
  <div class="column has-text-centered">
36
- <h1 class="title is-1 publication-title">Nerfies: Deformable Neural Radiance Fields</h1>
37
- <div class="is-size-5 publication-authors">
38
- <span class="author-block">
39
- <a href="https://keunhong.com" target="_blank">Keunhong Park</a><sup>1</sup>,</span>
40
- <span class="author-block">
41
- <a href="https://utkarshsinha.com" target="_blank">Utkarsh Sinha</a><sup>2</sup>,</span>
42
- <span class="author-block">
43
- <a href="https://jonbarron.info" target="_blank">Jonathan T. Barron</a><sup>2</sup>,
44
- </span>
45
- <span class="author-block">
46
- <a href="http://sofienbouaziz.com" target="_blank">Sofien Bouaziz</a><sup>2</sup>,
47
- </span>
48
- <span class="author-block">
49
- <a href="https://www.danbgoldman.com" target="_blank">Dan B Goldman</a><sup>2</sup>,
50
- </span>
51
- <span class="author-block">
52
- <a href="https://homes.cs.washington.edu/~seitz/" target="_blank">Steven M. Seitz</a><sup>1,2</sup>,
53
- </span>
54
- <span class="author-block">
55
- <a href="http://www.ricardomartinbrualla.com" target="_blank">Ricardo Martin-Brualla</a><sup>2</sup>
56
- </span>
57
- </div>
58
-
59
  <div class="is-size-5 publication-authors">
60
- <span class="author-block"><sup>1</sup>University of Washington,</span>
61
- <span class="author-block"><sup>2</sup>Google Research</span>
62
  </div>
63
-
64
  <div class="column has-text-centered">
65
  <div class="publication-links">
66
- <!-- PDF Link. -->
67
- <span class="link-block">
68
- <a href="https://arxiv.org/pdf/2011.12948" target="_blank"
69
- class="external-link button is-normal is-rounded is-dark">
70
- <span class="icon">
71
- <i class="fas fa-file-pdf"></i>
72
- </span>
73
- <span>Paper</span>
74
- </a>
75
- </span>
76
  <span class="link-block">
77
- <a href="https://arxiv.org/abs/2011.12948" target="_blank"
78
- class="external-link button is-normal is-rounded is-dark">
79
- <span class="icon">
80
- <i class="ai ai-arxiv"></i>
81
- </span>
82
- <span>arXiv</span>
83
  </a>
84
  </span>
85
- <!-- Video Link. -->
86
  <span class="link-block">
87
- <a href="https://www.youtube.com/watch?v=MrKrnHhk8IA" target="_blank"
88
- class="external-link button is-normal is-rounded is-dark">
89
- <span class="icon">
90
- <i class="fab fa-youtube"></i>
91
- </span>
92
- <span>Video</span>
93
  </a>
94
  </span>
95
- <!-- Code Link. -->
96
- <span class="link-block">
97
- <a href="https://github.com/google/nerfies" target="_blank"
98
- class="external-link button is-normal is-rounded is-dark">
99
- <span class="icon">
100
- <i class="fab fa-github"></i>
101
- </span>
102
- <span>Code</span>
103
- </a>
104
- </span>
105
- <!-- Dataset Link. -->
106
- <span class="link-block">
107
- <a href="https://github.com/google/nerfies/releases/tag/0.1" target="_blank"
108
- class="external-link button is-normal is-rounded is-dark">
109
- <span class="icon">
110
- <i class="far fa-images"></i>
111
- </span>
112
- <span>Data</span>
113
- </a>
114
  </div>
115
-
116
  </div>
117
  </div>
118
  </div>
@@ -120,293 +55,249 @@
120
  </div>
121
  </section>
122
 
123
- <section class="hero teaser">
124
- <div class="container is-max-desktop">
125
- <div class="hero-body">
126
- <video id="teaser" autoplay muted loop playsinline height="100%">
127
- <source src="./static/videos/teaser.mp4"
128
- type="video/mp4">
129
- </video>
130
- <h2 class="subtitle has-text-centered">
131
- <span class="dnerf">Nerfies</span> turns selfie videos from your phone into
132
- free-viewpoint
133
- portraits.
134
- </h2>
135
- </div>
136
- </div>
137
- </section>
138
-
139
-
140
- <section class="hero is-light is-small">
141
- <div class="hero-body">
142
- <div class="container">
143
- <div id="results-carousel" class="carousel results-carousel">
144
- <div class="item item-steve">
145
- <video poster="" id="steve" autoplay controls muted loop playsinline height="100%">
146
- <source src="./static/videos/steve.mp4"
147
- type="video/mp4">
148
- </video>
149
- </div>
150
- <div class="item item-chair-tp">
151
- <video poster="" id="chair-tp" autoplay controls muted loop playsinline height="100%">
152
- <source src="./static/videos/chair-tp.mp4"
153
- type="video/mp4">
154
- </video>
155
- </div>
156
- <div class="item item-shiba">
157
- <video poster="" id="shiba" autoplay controls muted loop playsinline height="100%">
158
- <source src="./static/videos/shiba.mp4"
159
- type="video/mp4">
160
- </video>
161
- </div>
162
- <div class="item item-fullbody">
163
- <video poster="" id="fullbody" autoplay controls muted loop playsinline height="100%">
164
- <source src="./static/videos/fullbody.mp4"
165
- type="video/mp4">
166
- </video>
167
- </div>
168
- <div class="item item-blueshirt">
169
- <video poster="" id="blueshirt" autoplay controls muted loop playsinline height="100%">
170
- <source src="./static/videos/blueshirt.mp4"
171
- type="video/mp4">
172
- </video>
173
- </div>
174
- <div class="item item-mask">
175
- <video poster="" id="mask" autoplay controls muted loop playsinline height="100%">
176
- <source src="./static/videos/mask.mp4"
177
- type="video/mp4">
178
- </video>
179
- </div>
180
- <div class="item item-coffee">
181
- <video poster="" id="coffee" autoplay controls muted loop playsinline height="100%">
182
- <source src="./static/videos/coffee.mp4"
183
- type="video/mp4">
184
- </video>
185
- </div>
186
- <div class="item item-toby">
187
- <video poster="" id="toby" autoplay controls muted loop playsinline height="100%">
188
- <source src="./static/videos/toby2.mp4"
189
- type="video/mp4">
190
- </video>
191
- </div>
192
- </div>
193
- </div>
194
- </div>
195
- </section>
196
-
197
-
198
  <section class="section">
199
  <div class="container is-max-desktop">
200
- <!-- Abstract. -->
201
  <div class="columns is-centered has-text-centered">
202
  <div class="column is-four-fifths">
203
- <h2 class="title is-3">Abstract</h2>
204
  <div class="content has-text-justified">
205
  <p>
206
- We present the first method capable of photorealistically reconstructing a non-rigidly
207
- deforming scene using photos/videos captured casually from mobile phones.
208
- </p>
209
- <p>
210
- Our approach augments neural radiance fields
211
- (NeRF) by optimizing an
212
- additional continuous volumetric deformation field that warps each observed point into a
213
- canonical 5D NeRF.
214
- We observe that these NeRF-like deformation fields are prone to local minima, and
215
- propose a coarse-to-fine optimization method for coordinate-based models that allows for
216
- more robust optimization.
217
- By adapting principles from geometry processing and physical simulation to NeRF-like
218
- models, we propose an elastic regularization of the deformation field that further
219
- improves robustness.
220
- </p>
221
- <p>
222
- We show that <span class="dnerf">Nerfies</span> can turn casually captured selfie
223
- photos/videos into deformable NeRF
224
- models that allow for photorealistic renderings of the subject from arbitrary
225
- viewpoints, which we dub <i>"nerfies"</i>. We evaluate our method by collecting data
226
- using a
227
- rig with two mobile phones that take time-synchronized photos, yielding train/validation
228
- images of the same pose at different viewpoints. We show that our method faithfully
229
- reconstructs non-rigidly deforming scenes and reproduces unseen views with high
230
- fidelity.
231
  </p>
232
  </div>
233
  </div>
234
  </div>
235
- <!--/ Abstract. -->
236
 
237
- <!-- Paper video. -->
238
- <div class="columns is-centered has-text-centered">
239
- <div class="column is-four-fifths">
240
- <h2 class="title is-3">Video</h2>
241
- <div class="publication-video">
242
- <iframe src="https://www.youtube.com/embed/MrKrnHhk8IA?rel=0&amp;showinfo=0"
243
- frameborder="0" allow="autoplay; encrypted-media" allowfullscreen></iframe>
 
 
 
 
 
 
 
 
 
244
  </div>
245
  </div>
246
  </div>
247
- <!--/ Paper video. -->
248
- </div>
249
- </section>
250
-
251
-
252
- <section class="section">
253
- <div class="container is-max-desktop">
254
 
255
  <div class="columns is-centered">
256
-
257
- <!-- Visual Effects. -->
258
- <div class="column">
259
- <div class="content">
260
- <h2 class="title is-3">Visual Effects</h2>
261
- <p>
262
- Using <i>nerfies</i> you can create fun visual effects. This Dolly zoom effect
263
- would be impossible without nerfies since it would require going through a wall.
264
- </p>
265
- <video id="dollyzoom" autoplay controls muted loop playsinline height="100%">
266
- <source src="./static/videos/dollyzoom-stacked.mp4"
267
- type="video/mp4">
268
- </video>
269
  </div>
270
  </div>
271
- <!--/ Visual Effects. -->
272
-
273
- <!-- Matting. -->
274
- <div class="column">
275
- <h2 class="title is-3">Matting</h2>
276
- <div class="columns is-centered">
277
- <div class="column content">
278
- <p>
279
- As a byproduct of our method, we can also solve the matting problem by ignoring
280
- samples that fall outside of a bounding box during rendering.
281
- </p>
282
- <video id="matting-video" controls playsinline height="100%">
283
- <source src="./static/videos/matting.mp4"
284
- type="video/mp4">
285
- </video>
286
- </div>
287
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  </div>
289
  </div>
290
  </div>
291
- <!--/ Matting. -->
292
 
293
- <!-- Animation. -->
294
  <div class="columns is-centered">
295
  <div class="column is-full-width">
296
- <h2 class="title is-3">Animation</h2>
297
-
298
- <!-- Interpolating. -->
299
- <h3 class="title is-4">Interpolating states</h3>
300
  <div class="content has-text-justified">
301
- <p>
302
- We can also animate the scene by interpolating the deformation latent codes of two input
303
- frames. Use the slider here to linearly interpolate between the left frame and the right
304
- frame.
305
- </p>
 
 
 
 
 
 
 
 
 
 
 
 
 
306
  </div>
307
- <div class="columns is-vcentered interpolation-panel">
308
- <div class="column is-3 has-text-centered">
309
- <img src="./static/images/interpolate_start.jpg"
310
- class="interpolation-image"
311
- alt="Interpolate start reference image."/>
312
- <p>Start Frame</p>
313
- </div>
314
- <div class="column interpolation-video-column">
315
- <div id="interpolation-image-wrapper">
316
- Loading...
317
- </div>
318
- <input class="slider is-fullwidth is-large is-info"
319
- id="interpolation-slider"
320
- step="1" min="0" max="100" value="0" type="range">
321
- </div>
322
- <div class="column is-3 has-text-centered">
323
- <img src="./static/images/interpolate_end.jpg"
324
- class="interpolation-image"
325
- alt="Interpolation end reference image."/>
326
- <p class="is-bold">End Frame</p>
327
- </div>
 
 
 
328
  </div>
329
- <br/>
330
- <!--/ Interpolating. -->
331
 
332
- <!-- Re-rendering. -->
333
- <h3 class="title is-4">Re-rendering the input video</h3>
334
  <div class="content has-text-justified">
335
- <p>
336
- Using <span class="dnerf">Nerfies</span>, you can re-render a video from a novel
337
- viewpoint such as a stabilized camera by playing back the training deformations.
338
- </p>
339
  </div>
340
- <div class="content has-text-centered">
341
- <video id="replay-video"
342
- controls
343
- muted
344
- preload
345
- playsinline
346
- width="75%">
347
- <source src="./static/videos/replay.mp4"
348
- type="video/mp4">
349
- </video>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
  </div>
351
- <!--/ Re-rendering. -->
 
352
 
 
 
 
 
 
 
 
353
  </div>
354
  </div>
355
- <!--/ Animation. -->
356
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
 
358
- <!-- Concurrent Work. -->
359
  <div class="columns is-centered">
360
  <div class="column is-full-width">
361
- <h2 class="title is-3">Related Links</h2>
 
 
 
 
 
 
 
 
 
 
362
 
 
 
 
363
  <div class="content has-text-justified">
364
- <p>
365
- There's a lot of excellent work that was introduced around the same time as ours.
366
- </p>
367
- <p>
368
- <a href="https://arxiv.org/abs/2104.09125" target="_blank">Progressive Encoding for Neural Optimization</a> introduces an idea similar to our windowed position encoding for coarse-to-fine optimization.
369
- </p>
370
- <p>
371
- <a href="https://www.albertpumarola.com/research/D-NeRF/index.html" target="_blank">D-NeRF</a> and <a href="https://gvv.mpi-inf.mpg.de/projects/nonrigid_nerf/" target="_blank">NR-NeRF</a>
372
- both use deformation fields to model non-rigid scenes.
373
- </p>
374
- <p>
375
- Some works model videos with a NeRF by directly modulating the density, such as <a href="https://video-nerf.github.io/" target="_blank">Video-NeRF</a>, <a href="https://www.cs.cornell.edu/~zl548/NSFF/" target="_blank">NSFF</a>, and <a href="https://neural-3d-video.github.io/" target="_blank">DyNeRF</a>
376
- </p>
377
- <p>
378
- There are probably many more by the time you are reading this. Check out <a href="https://dellaert.github.io/NeRF/" target="_blank">Frank Dellart's survey on recent NeRF papers</a>, and <a href="https://github.com/yenchenlin/awesome-NeRF" target="_blank">Yen-Chen Lin's curated list of NeRF papers</a>.
379
- </p>
380
  </div>
381
  </div>
382
  </div>
383
- <!--/ Concurrent Work. -->
384
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
  </div>
386
  </section>
387
 
388
-
389
- <section class="section" id="BibTeX">
390
  <div class="container is-max-desktop content">
391
- <h2 class="title">BibTeX</h2>
392
- <pre><code>@article{park2021nerfies,
393
- author = {Park, Keunhong and Sinha, Utkarsh and Barron, Jonathan T. and Bouaziz, Sofien and Goldman, Dan B and Seitz, Steven M. and Martin-Brualla, Ricardo},
394
- title = {Nerfies: Deformable Neural Radiance Fields},
395
- journal = {ICCV},
396
- year = {2021},
397
- }</code></pre>
398
  </div>
399
  </section>
400
 
401
-
402
  <footer class="footer">
403
  <div class="container">
404
  <div class="content has-text-centered">
405
- <a class="icon-link" target="_blank"
406
- href="./static/videos/nerfies_paper.pdf">
407
- <i class="fas fa-file-pdf"></i>
408
- </a>
409
- <a class="icon-link" href="https://github.com/keunhong" target="_blank" class="external-link" disabled>
410
  <i class="fab fa-github"></i>
411
  </a>
412
  </div>
@@ -414,16 +305,10 @@
414
  <div class="column is-8">
415
  <div class="content">
416
  <p>
417
- This website is licensed under a <a rel="license" target="_blank"
418
- href="http://creativecommons.org/licenses/by-sa/4.0/">Creative
419
- Commons Attribution-ShareAlike 4.0 International License</a>.
420
  </p>
421
  <p>
422
- This means you are free to borrow the <a target="_blank"
423
- href="https://github.com/nerfies/nerfies.github.io">source code</a> of this website,
424
- we just ask that you link back to this page in the footer.
425
- Please remember to remove the analytics code included in the header of the website which
426
- you do not want on your website.
427
  </p>
428
  </div>
429
  </div>
@@ -432,4 +317,4 @@
432
  </footer>
433
 
434
  </body>
435
- </html>
 
2
  <html>
3
  <head>
4
  <meta charset="utf-8">
5
+ <meta name="description" content="Distributed Translation System for translating the DataTonic/dark_thoughts_case_study_merged dataset across multiple languages using RunPod and Ollama.">
6
+ <meta name="keywords" content="Distributed Translation, RunPod, Ollama, Dark Thoughts Dataset">
 
7
  <meta name="viewport" content="width=device-width, initial-scale=1">
8
+ <title>Distributed Translation System for Dark Thoughts Dataset</title>
 
 
 
9
 
10
+ <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" rel="stylesheet">
11
  <link rel="stylesheet" href="./static/css/bulma.min.css">
12
  <link rel="stylesheet" href="./static/css/bulma-carousel.min.css">
13
  <link rel="stylesheet" href="./static/css/bulma-slider.min.css">
14
  <link rel="stylesheet" href="./static/css/fontawesome.all.min.css">
15
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
 
16
  <link rel="stylesheet" href="./static/css/index.css">
17
  <link rel="icon" href="./static/images/favicon.svg">
18
 
 
29
  <div class="container is-max-desktop">
30
  <div class="columns is-centered">
31
  <div class="column has-text-centered">
32
+ <h1 class="title is-1 publication-title">Distributed Translation System for Dark Thoughts Dataset</h1>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  <div class="is-size-5 publication-authors">
34
+ <span class="author-block">Your Name or Team</span>
 
35
  </div>
 
36
  <div class="column has-text-centered">
37
  <div class="publication-links">
 
 
 
 
 
 
 
 
 
 
38
  <span class="link-block">
39
+ <a href="https://github.com/yourusername/distributed-translation" target="_blank" class="external-link button is-normal is-rounded is-dark">
40
+ <span class="icon"><i class="fab fa-github"></i></span>
41
+ <span>Code</span>
 
 
 
42
  </a>
43
  </span>
 
44
  <span class="link-block">
45
+ <a href="https://huggingface.co/datasets/DataTonic/dark_thoughts_case_study_merged" target="_blank" class="external-link button is-normal is-rounded is-dark">
46
+ <span class="icon"><i class="far fa-images"></i></span>
47
+ <span>Data</span>
 
 
 
48
  </a>
49
  </span>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  </div>
 
51
  </div>
52
  </div>
53
  </div>
 
55
  </div>
56
  </section>
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  <section class="section">
59
  <div class="container is-max-desktop">
 
60
  <div class="columns is-centered has-text-centered">
61
  <div class="column is-four-fifths">
62
+ <h2 class="title is-3">Overview</h2>
63
  <div class="content has-text-justified">
64
  <p>
65
+ This project implements a distributed translation system using RunPod and Ollama to translate the <a href="https://huggingface.co/datasets/DataTonic/dark_thoughts_case_study_merged" target="_blank">DataTonic/dark_thoughts_case_study_merged</a> dataset across multiple languages. The system parses thinking content from responses and translates both components separately.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  </p>
67
  </div>
68
  </div>
69
  </div>
 
70
 
71
+ <div class="columns is-centered">
72
+ <div class="column is-full-width">
73
+ <h2 class="title is-3">Architecture</h2>
74
+ <div class="content has-text-justified">
75
+ <p>The system consists of several components:</p>
76
+ <ol>
77
+ <li><strong>RunPod API Client</strong> (<code>runpodapi.py</code>): Handles communication with the RunPod API for creating, managing, and monitoring pods.</li>
78
+ <li><strong>RunPod Command Executor</strong> (<code>runcommandsrunpod.py</code>): Executes commands on RunPod instances and checks their readiness.</li>
79
+ <li><strong>RunPod Launcher</strong> (<code>runpodlauncher.py</code>): Manages the launching and coordination of multiple RunPod instances.</li>
80
+ <li><strong>RunPod Manager</strong> (<code>runpodmanager.py</code>): High-level manager for RunPod instances used for distributed translation.</li>
81
+ <li><strong>Ollama Client</strong> (<code>ollamaclient.py</code>): Async client for interacting with Ollama API and distributing translation tasks.</li>
82
+ <li><strong>Translation Coordinator</strong> (<code>translationcoordinator.py</code>): Orchestrates the translation process across dataset splits and languages.</li>
83
+ <li><strong>Data Processor</strong> (<code>dataprocessor.py</code>): Handles loading, processing, and saving the translated dataset.</li>
84
+ <li><strong>Main Script</strong> (<code>translate.py</code>): Entry point for running the distributed translation process.</li>
85
+ <li><strong>Test Scripts</strong> (<code>test_translation.py</code>, <code>test_parsing.py</code>): Tests the functionality of the distributed translation system.</li>
86
+ </ol>
87
  </div>
88
  </div>
89
  </div>
 
 
 
 
 
 
 
90
 
91
  <div class="columns is-centered">
92
+ <div class="column is-full-width">
93
+ <h2 class="title is-3">Requirements</h2>
94
+ <div class="content has-text-justified">
95
+ <ul>
96
+ <li>Python 3.8+</li>
97
+ <li>RunPod API key</li>
98
+ <li>Access to RunPod GPU instances</li>
99
+ <li>The following Python packages: <code>aiohttp</code>, <code>asyncio</code>, <code>datasets</code>, <code>pandas</code>, <code>tqdm</code>, <code>requests</code>, <code>pydantic</code></li>
100
+ </ul>
 
 
 
 
101
  </div>
102
  </div>
103
+ </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
+ <div class="columns is-centered">
106
+ <div class="column is-full-width">
107
+ <h2 class="title is-3">Installation</h2>
108
+ <div class="content has-text-justified">
109
+ <ol>
110
+ <li>Clone the repository:
111
+ <pre><code>git clone https://github.com/yourusername/distributed-translation.git
112
+ cd distributed-translation</code></pre>
113
+ </li>
114
+ <li>Install the required packages:
115
+ <pre><code>pip install -r requirements.txt</code></pre>
116
+ </li>
117
+ <li>Set up your RunPod API key:
118
+ <pre><code>export RUNPOD_API_KEY=your_runpod_api_key</code></pre>
119
+ </li>
120
+ </ol>
121
  </div>
122
  </div>
123
  </div>
 
124
 
 
125
  <div class="columns is-centered">
126
  <div class="column is-full-width">
127
+ <h2 class="title is-3">Dataset Structure</h2>
 
 
 
128
  <div class="content has-text-justified">
129
+ <p>The system works with the DataTonic/dark_thoughts_case_study_merged dataset, which contains:</p>
130
+ <ul>
131
+ <li>English split: 20,711 examples</li>
132
+ <li>Chinese split: 20,204 examples</li>
133
+ </ul>
134
+ <p>The system parses thinking content (text before <code>&lt;/think&gt;</code>) from responses and translates both components separately.</p>
135
+ <p>The final dataset structure follows this model:</p>
136
+ <pre><code>class Feature(BaseModel):
137
+ id: int
138
+ thinking: str
139
+ response: str
140
+ thinking_translated: str
141
+ response_translated: str
142
+ query: str
143
+ source_data: str
144
+ category: str
145
+ endpoint: str
146
+ source: str</code></pre>
147
  </div>
148
+ </div>
149
+ </div>
150
+
151
+ <div class="columns is-centered">
152
+ <div class="column is-full-width">
153
+ <h2 class="title is-3">Usage</h2>
154
+ <h3 class="title is-4">Running the Translation Process</h3>
155
+ <div class="content has-text-justified">
156
+ <p>To run the full translation process:</p>
157
+ <pre><code>python translate.py --pod-count 40 --batch-size 16 --max-tokens 100</code></pre>
158
+ <p>Additional options:</p>
159
+ <pre><code>--api-key TEXT RunPod API key (defaults to RUNPOD_API_KEY environment variable)
160
+ --pod-count INTEGER Number of RunPod instances to launch (default: 40)
161
+ --dataset TEXT Dataset name or path (default: DataTonic/dark_thoughts_case_study_merged)
162
+ --output-dir TEXT Output directory for translated data (default: translated_dataset)
163
+ --batch-size INTEGER Batch size for translation (default: 16)
164
+ --max-tokens INTEGER Maximum number of tokens to generate (default: 100)
165
+ --gpu-type TEXT GPU type ID for RunPod instances (default: NVIDIA RTX A5000)
166
+ --image TEXT Docker image name (default: tonic01/ollama-gemmax2)
167
+ --model TEXT Model name for translation (default: gemmax2)
168
+ --cleanup Terminate all pods after completion
169
+ --prepare-only Only prepare the dataset without translating
170
+ --process-only Only process the translated dataset
171
+ --validate Validate dataset structure after processing</code></pre>
172
  </div>
 
 
173
 
174
+ <h3 class="title is-4">Testing the System</h3>
 
175
  <div class="content has-text-justified">
176
+ <p>To test the system components:</p>
177
+ <pre><code>python test_translation.py --test all</code></pre>
178
+ <p>To test the parsing functionality:</p>
179
+ <pre><code>python test_parsing.py --test all</code></pre>
180
  </div>
181
+ </div>
182
+ </div>
183
+
184
+ <div class="columns is-centered">
185
+ <div class="column is-full-width">
186
+ <h2 class="title is-3">Translation Process</h2>
187
+ <div class="content has-text-justified">
188
+ <p>The translation process follows these steps:</p>
189
+ <ol>
190
+ <li><strong>Preparation</strong>: Parse the dataset to separate thinking content from responses.</li>
191
+ <li><strong>Setup</strong>: Launch 40 RunPod instances with the <code>tonic01/ollama-gemmax2</code> Docker image.</li>
192
+ <li><strong>Readiness Check</strong>: Wait for all pods to be ready and for Ollama to be initialized with the required model.</li>
193
+ <li><strong>Translation</strong>:
194
+ <ul>
195
+ <li>For each dataset split (English and Chinese):</li>
196
+ <li>Translate thinking and response fields separately to all target languages.</li>
197
+ <li>Skip empty thinking content to optimize translation.</li>
198
+ <li>Save intermediate results periodically.</li>
199
+ </ul>
200
+ </li>
201
+ <li><strong>Processing</strong>: Merge translations and create a Hugging Face dataset structure.</li>
202
+ <li><strong>Validation</strong>: Ensure the dataset structure matches the required Feature model.</li>
203
+ <li><strong>Cleanup</strong>: Terminate all pods if requested.</li>
204
+ </ol>
205
  </div>
206
+ </div>
207
+ </div>
208
 
209
+ <div class="columns is-centered">
210
+ <div class="column is-full-width">
211
+ <h2 class="title is-3">Supported Languages</h2>
212
+ <div class="content has-text-justified">
213
+ <p>The system supports translation between the following languages:</p>
214
+ <p>Arabic, Bengali, Czech, German, English, Spanish, Persian, French, Hebrew, Hindi, Indonesian, Italian, Japanese, Khmer, Korean, Lao, Malay, Burmese, Dutch, Polish, Portuguese, Russian, Thai, Tagalog, Turkish, Urdu, Vietnamese, Chinese.</p>
215
+ </div>
216
  </div>
217
  </div>
 
218
 
219
+ <div class="columns is-centered">
220
+ <div class="column is-full-width">
221
+ <h2 class="title is-3">Error Handling and Recovery</h2>
222
+ <div class="content has-text-justified">
223
+ <p>The system includes several error handling and recovery mechanisms:</p>
224
+ <ul>
225
+ <li><strong>Retry Logic</strong>: Failed translations are automatically retried.</li>
226
+ <li><strong>Checkpointing</strong>: Intermediate results are saved periodically to allow resuming from failures.</li>
227
+ <li><strong>Health Checks</strong>: Pod and Ollama health are checked before starting translation.</li>
228
+ <li><strong>Empty Content Handling</strong>: Empty thinking content is handled efficiently to avoid unnecessary translations.</li>
229
+ <li><strong>Graceful Termination</strong>: Resources are properly cleaned up on completion or failure.</li>
230
+ </ul>
231
+ </div>
232
+ </div>
233
+ </div>
234
 
 
235
  <div class="columns is-centered">
236
  <div class="column is-full-width">
237
+ <h2 class="title is-3">Docker Image Requirements</h2>
238
+ <div class="content has-text-justified">
239
+ <p>The <code>tonic01/ollama-gemmax2</code> Docker image should have:</p>
240
+ <ol>
241
+ <li>Ollama installed and configured to run on port 11434</li>
242
+ <li>The GemmaX2-28-2B-v0.1 model pre-loaded or configured to load automatically</li>
243
+ <li>Sufficient GPU memory (at least 24GB recommended)</li>
244
+ </ol>
245
+ </div>
246
+ </div>
247
+ </div>
248
 
249
+ <div class="columns is-centered">
250
+ <div class="column is-full-width">
251
+ <h2 class="title is-3">Example Workflow</h2>
252
  <div class="content has-text-justified">
253
+ <ol>
254
+ <li><strong>Prepare Dataset</strong>:
255
+ <pre><code>python translate.py --prepare-only</code></pre>
256
+ </li>
257
+ <li><strong>Run Translation</strong>:
258
+ <pre><code>python translate.py --pod-count 40</code></pre>
259
+ </li>
260
+ <li><strong>Process Results Only</strong>:
261
+ <pre><code>python translate.py --process-only --validate</code></pre>
262
+ </li>
263
+ <li><strong>Cleanup</strong>:
264
+ <pre><code>python test_translation.py --test termination</code></pre>
265
+ </li>
266
+ </ol>
 
 
267
  </div>
268
  </div>
269
  </div>
 
270
 
271
+ <div class="columns is-centered">
272
+ <div class="column is-full-width">
273
+ <h2 class="title is-3">Troubleshooting</h2>
274
+ <div class="content has-text-justified">
275
+ <ul>
276
+ <li><strong>API Key Issues</strong>: Ensure your RunPod API key is correctly set in the environment variable or passed as a parameter.</li>
277
+ <li><strong>GPU Availability</strong>: Check RunPod for GPU availability if pod creation fails.</li>
278
+ <li><strong>Model Loading</strong>: If Ollama readiness check times out, the model may be too large for the selected GPU type.</li>
279
+ <li><strong>Translation Errors</strong>: Check the logs for specific error messages. Most translation errors are automatically retried.</li>
280
+ <li><strong>Dataset Structure</strong>: Run with the <code>--validate</code> flag to ensure the dataset structure matches the required Feature model.</li>
281
+ </ul>
282
+ </div>
283
+ </div>
284
+ </div>
285
  </div>
286
  </section>
287
 
288
+ <section class="section" id="License">
 
289
  <div class="container is-max-desktop content">
290
+ <h2 class="title">License</h2>
291
+ <div class="content has-text-justified">
292
+ <p>This project is licensed under the Apache 2.0 License - see the <a href="LICENSE" target="_blank">LICENSE</a> file for details.</p>
293
+ </div>
 
 
 
294
  </div>
295
  </section>
296
 
 
297
  <footer class="footer">
298
  <div class="container">
299
  <div class="content has-text-centered">
300
+ <a class="icon-link" href="https://github.com/yourusername/distributed-translation" target="_blank">
 
 
 
 
301
  <i class="fab fa-github"></i>
302
  </a>
303
  </div>
 
305
  <div class="column is-8">
306
  <div class="content">
307
  <p>
308
+ This website is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/" target="_blank">Creative Commons Attribution-ShareAlike 4.0 International License</a>.
 
 
309
  </p>
310
  <p>
311
+ This means you are free to borrow the <a href="https://github.com/yourusername/distributed-translation" target="_blank">source code</a> of this website, we just ask that you link back to this page in the footer.
 
 
 
 
312
  </p>
313
  </div>
314
  </div>
 
317
  </footer>
318
 
319
  </body>
320
+ </html>