c14kevincardenas commited on
Commit
777407e
·
verified ·
1 Parent(s): a4049da

End of training

Browse files
README.md CHANGED
@@ -3,6 +3,9 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: c14kevincardenas/beit-large-patch16-384-limb
5
  tags:
 
 
 
6
  - generated_from_trainer
7
  model-index:
8
  - name: limbxy_pose_4heads_1layers_8embeddim
@@ -14,10 +17,10 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  # limbxy_pose_4heads_1layers_8embeddim
16
 
17
- This model is a fine-tuned version of [c14kevincardenas/beit-large-patch16-384-limb](https://huggingface.co/c14kevincardenas/beit-large-patch16-384-limb) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 0.1451
20
- - Rmse: 0.3810
21
 
22
  ## Model description
23
 
 
3
  license: apache-2.0
4
  base_model: c14kevincardenas/beit-large-patch16-384-limb
5
  tags:
6
+ - image-regression
7
+ - human-movement
8
+ - vision
9
  - generated_from_trainer
10
  model-index:
11
  - name: limbxy_pose_4heads_1layers_8embeddim
 
17
 
18
  # limbxy_pose_4heads_1layers_8embeddim
19
 
20
+ This model is a fine-tuned version of [c14kevincardenas/beit-large-patch16-384-limb](https://huggingface.co/c14kevincardenas/beit-large-patch16-384-limb) on the c14kevincardenas/beta_caller_284_limbxy_pose dataset.
21
  It achieves the following results on the evaluation set:
22
+ - Loss: 0.1449
23
+ - Rmse: 0.3806
24
 
25
  ## Model description
26
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_loss": 0.1417153775691986,
4
- "eval_rmse": 0.3764510452747345,
5
- "eval_runtime": 9.7727,
6
- "eval_samples_per_second": 102.326,
7
- "eval_steps_per_second": 1.637,
8
  "total_flos": 0.0,
9
- "train_loss": 0.17676029650682815,
10
- "train_runtime": 3336.7226,
11
- "train_samples_per_second": 33.937,
12
- "train_steps_per_second": 0.533
13
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_loss": 0.14487937092781067,
4
+ "eval_rmse": 0.380630224943161,
5
+ "eval_runtime": 9.5453,
6
+ "eval_samples_per_second": 104.764,
7
+ "eval_steps_per_second": 1.676,
8
  "total_flos": 0.0,
9
+ "train_loss": 0.16350786438149012,
10
+ "train_runtime": 3270.5878,
11
+ "train_samples_per_second": 34.624,
12
+ "train_steps_per_second": 0.544
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_loss": 0.1417153775691986,
4
- "eval_rmse": 0.3764510452747345,
5
- "eval_runtime": 9.7727,
6
- "eval_samples_per_second": 102.326,
7
- "eval_steps_per_second": 1.637
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_loss": 0.14487937092781067,
4
+ "eval_rmse": 0.380630224943161,
5
+ "eval_runtime": 9.5453,
6
+ "eval_samples_per_second": 104.764,
7
+ "eval_steps_per_second": 1.676
8
  }
runs/Feb19_07-59-38_galactica.ad.cirange.net/events.out.tfevents.1739955603.galactica.ad.cirange.net.2838155.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:820971256c9c39f81b23da63a32e102499bc0271a24128744cfb6ab5469e632a
3
+ size 407
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.17676029650682815,
5
- "train_runtime": 3336.7226,
6
- "train_samples_per_second": 33.937,
7
- "train_steps_per_second": 0.533
8
  }
 
1
  {
2
  "epoch": 20.0,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.16350786438149012,
5
+ "train_runtime": 3270.5878,
6
+ "train_samples_per_second": 34.624,
7
+ "train_steps_per_second": 0.544
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.1417153775691986,
3
- "best_model_checkpoint": "limbxy_pose/checkpoint-1780",
4
  "epoch": 20.0,
5
  "eval_steps": 500,
6
  "global_step": 1780,
@@ -10,689 +10,689 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.2808988764044944,
13
- "grad_norm": 5.3874141476076276e-14,
14
  "learning_rate": 5e-06,
15
  "loss": 0.3282,
16
  "step": 25
17
  },
18
  {
19
  "epoch": 0.5617977528089888,
20
- "grad_norm": 2.9326387238765017e-12,
21
  "learning_rate": 1e-05,
22
- "loss": 0.3304,
23
  "step": 50
24
  },
25
  {
26
  "epoch": 0.8426966292134831,
27
- "grad_norm": 7.455580207024748e-13,
28
  "learning_rate": 1.5e-05,
29
- "loss": 0.3414,
30
  "step": 75
31
  },
32
  {
33
  "epoch": 1.0,
34
- "eval_loss": 0.33112141489982605,
35
- "eval_rmse": 0.5754314661026001,
36
- "eval_runtime": 9.1133,
37
- "eval_samples_per_second": 109.73,
38
- "eval_steps_per_second": 1.756,
39
  "step": 89
40
  },
41
  {
42
  "epoch": 1.1235955056179776,
43
- "grad_norm": 2.835559207142041e-13,
44
  "learning_rate": 2e-05,
45
- "loss": 0.3408,
46
  "step": 100
47
  },
48
  {
49
  "epoch": 1.404494382022472,
50
- "grad_norm": 1.3095855780007049e-12,
51
  "learning_rate": 2.5e-05,
52
- "loss": 0.3358,
53
  "step": 125
54
  },
55
  {
56
  "epoch": 1.6853932584269664,
57
- "grad_norm": 1.1560928603326914e-10,
58
  "learning_rate": 3e-05,
59
- "loss": 0.3348,
60
  "step": 150
61
  },
62
  {
63
  "epoch": 1.9662921348314608,
64
- "grad_norm": 6.433731585681057e-12,
65
  "learning_rate": 3.5e-05,
66
- "loss": 0.3313,
67
  "step": 175
68
  },
69
  {
70
  "epoch": 2.0,
71
- "eval_loss": 0.33112141489982605,
72
- "eval_rmse": 0.5754314661026001,
73
- "eval_runtime": 9.2294,
74
- "eval_samples_per_second": 108.35,
75
- "eval_steps_per_second": 1.734,
76
  "step": 178
77
  },
78
  {
79
  "epoch": 2.247191011235955,
80
- "grad_norm": 654211.5,
81
  "learning_rate": 4e-05,
82
- "loss": 0.3285,
83
  "step": 200
84
  },
85
  {
86
  "epoch": 2.5280898876404496,
87
- "grad_norm": 342525.875,
88
  "learning_rate": 4.5e-05,
89
- "loss": 0.2248,
90
  "step": 225
91
  },
92
  {
93
  "epoch": 2.808988764044944,
94
- "grad_norm": 1131143.125,
95
  "learning_rate": 5e-05,
96
- "loss": 0.2277,
97
  "step": 250
98
  },
99
  {
100
  "epoch": 3.0,
101
- "eval_loss": 0.1668396145105362,
102
- "eval_rmse": 0.40846002101898193,
103
- "eval_runtime": 9.5775,
104
- "eval_samples_per_second": 104.411,
105
- "eval_steps_per_second": 1.671,
106
  "step": 267
107
  },
108
  {
109
  "epoch": 3.0898876404494384,
110
- "grad_norm": 3920230.0,
111
  "learning_rate": 4.918300653594771e-05,
112
- "loss": 0.2028,
113
  "step": 275
114
  },
115
  {
116
  "epoch": 3.370786516853933,
117
- "grad_norm": 1259230.75,
118
  "learning_rate": 4.8366013071895424e-05,
119
- "loss": 0.2022,
120
  "step": 300
121
  },
122
  {
123
  "epoch": 3.6516853932584272,
124
- "grad_norm": 380599.59375,
125
  "learning_rate": 4.7549019607843135e-05,
126
- "loss": 0.1957,
127
  "step": 325
128
  },
129
  {
130
  "epoch": 3.932584269662921,
131
- "grad_norm": 928917.8125,
132
  "learning_rate": 4.673202614379085e-05,
133
- "loss": 0.1756,
134
  "step": 350
135
  },
136
  {
137
  "epoch": 4.0,
138
- "eval_loss": 0.15156960487365723,
139
- "eval_rmse": 0.38931941986083984,
140
- "eval_runtime": 9.6534,
141
- "eval_samples_per_second": 103.59,
142
- "eval_steps_per_second": 1.657,
143
  "step": 356
144
  },
145
  {
146
  "epoch": 4.213483146067416,
147
- "grad_norm": 4586462.0,
148
  "learning_rate": 4.5915032679738564e-05,
149
- "loss": 0.1686,
150
  "step": 375
151
  },
152
  {
153
  "epoch": 4.49438202247191,
154
- "grad_norm": 1776099.0,
155
  "learning_rate": 4.5098039215686275e-05,
156
- "loss": 0.1619,
157
  "step": 400
158
  },
159
  {
160
  "epoch": 4.775280898876405,
161
- "grad_norm": 3756013.5,
162
  "learning_rate": 4.4281045751633986e-05,
163
- "loss": 0.1761,
164
  "step": 425
165
  },
166
  {
167
  "epoch": 5.0,
168
- "eval_loss": 0.1556388884782791,
169
- "eval_rmse": 0.394510954618454,
170
- "eval_runtime": 9.5792,
171
- "eval_samples_per_second": 104.393,
172
- "eval_steps_per_second": 1.67,
173
  "step": 445
174
  },
175
  {
176
  "epoch": 5.056179775280899,
177
- "grad_norm": 2636789.0,
178
  "learning_rate": 4.3464052287581704e-05,
179
- "loss": 0.1791,
180
  "step": 450
181
  },
182
  {
183
  "epoch": 5.337078651685394,
184
- "grad_norm": 2208706.25,
185
  "learning_rate": 4.2647058823529415e-05,
186
- "loss": 0.1736,
187
  "step": 475
188
  },
189
  {
190
  "epoch": 5.617977528089888,
191
- "grad_norm": 499775.21875,
192
  "learning_rate": 4.1830065359477126e-05,
193
- "loss": 0.1565,
194
  "step": 500
195
  },
196
  {
197
  "epoch": 5.898876404494382,
198
- "grad_norm": 1655149.625,
199
  "learning_rate": 4.101307189542484e-05,
200
- "loss": 0.1632,
201
  "step": 525
202
  },
203
  {
204
  "epoch": 6.0,
205
- "eval_loss": 0.14596471190452576,
206
- "eval_rmse": 0.38205331563949585,
207
- "eval_runtime": 9.775,
208
- "eval_samples_per_second": 102.302,
209
- "eval_steps_per_second": 1.637,
210
  "step": 534
211
  },
212
  {
213
  "epoch": 6.179775280898877,
214
- "grad_norm": 69511.53125,
215
  "learning_rate": 4.0196078431372555e-05,
216
- "loss": 0.1537,
217
  "step": 550
218
  },
219
  {
220
  "epoch": 6.460674157303371,
221
- "grad_norm": 1434219.875,
222
  "learning_rate": 3.9379084967320266e-05,
223
- "loss": 0.1578,
224
  "step": 575
225
  },
226
  {
227
  "epoch": 6.741573033707866,
228
- "grad_norm": 959990.375,
229
  "learning_rate": 3.8562091503267977e-05,
230
- "loss": 0.1488,
231
  "step": 600
232
  },
233
  {
234
  "epoch": 7.0,
235
- "eval_loss": 0.14534823596477509,
236
- "eval_rmse": 0.3812456429004669,
237
- "eval_runtime": 9.5565,
238
- "eval_samples_per_second": 104.641,
239
- "eval_steps_per_second": 1.674,
240
  "step": 623
241
  },
242
  {
243
  "epoch": 7.022471910112359,
244
- "grad_norm": 887092.875,
245
  "learning_rate": 3.774509803921569e-05,
246
- "loss": 0.1508,
247
  "step": 625
248
  },
249
  {
250
  "epoch": 7.303370786516854,
251
- "grad_norm": 1917969.375,
252
  "learning_rate": 3.6928104575163405e-05,
253
- "loss": 0.1532,
254
  "step": 650
255
  },
256
  {
257
  "epoch": 7.584269662921348,
258
- "grad_norm": 142502.96875,
259
  "learning_rate": 3.611111111111111e-05,
260
- "loss": 0.1501,
261
  "step": 675
262
  },
263
  {
264
  "epoch": 7.865168539325842,
265
- "grad_norm": 308292.90625,
266
  "learning_rate": 3.529411764705883e-05,
267
- "loss": 0.1502,
268
  "step": 700
269
  },
270
  {
271
  "epoch": 8.0,
272
- "eval_loss": 0.1449001282453537,
273
- "eval_rmse": 0.38065749406814575,
274
- "eval_runtime": 9.5176,
275
- "eval_samples_per_second": 105.069,
276
- "eval_steps_per_second": 1.681,
277
  "step": 712
278
  },
279
  {
280
  "epoch": 8.146067415730338,
281
- "grad_norm": 2082431.625,
282
  "learning_rate": 3.447712418300654e-05,
283
- "loss": 0.1591,
284
  "step": 725
285
  },
286
  {
287
  "epoch": 8.426966292134832,
288
- "grad_norm": 2036905.875,
289
  "learning_rate": 3.366013071895425e-05,
290
- "loss": 0.1563,
291
  "step": 750
292
  },
293
  {
294
  "epoch": 8.707865168539326,
295
- "grad_norm": 77962.4765625,
296
  "learning_rate": 3.284313725490196e-05,
297
- "loss": 0.1551,
298
  "step": 775
299
  },
300
  {
301
  "epoch": 8.98876404494382,
302
- "grad_norm": 1343378.0,
303
  "learning_rate": 3.202614379084967e-05,
304
- "loss": 0.1512,
305
  "step": 800
306
  },
307
  {
308
  "epoch": 9.0,
309
- "eval_loss": 0.15092459321022034,
310
- "eval_rmse": 0.38849014043807983,
311
- "eval_runtime": 9.3212,
312
- "eval_samples_per_second": 107.283,
313
- "eval_steps_per_second": 1.717,
314
  "step": 801
315
  },
316
  {
317
  "epoch": 9.269662921348315,
318
- "grad_norm": 2264512.0,
319
  "learning_rate": 3.120915032679739e-05,
320
- "loss": 0.1495,
321
  "step": 825
322
  },
323
  {
324
  "epoch": 9.55056179775281,
325
- "grad_norm": 1154337.125,
326
  "learning_rate": 3.0392156862745097e-05,
327
- "loss": 0.1536,
328
  "step": 850
329
  },
330
  {
331
  "epoch": 9.831460674157304,
332
- "grad_norm": 1739664.875,
333
  "learning_rate": 2.957516339869281e-05,
334
- "loss": 0.1491,
335
  "step": 875
336
  },
337
  {
338
  "epoch": 10.0,
339
- "eval_loss": 0.1450948417186737,
340
- "eval_rmse": 0.3809131681919098,
341
- "eval_runtime": 9.5293,
342
- "eval_samples_per_second": 104.94,
343
- "eval_steps_per_second": 1.679,
344
  "step": 890
345
  },
346
  {
347
  "epoch": 10.112359550561798,
348
- "grad_norm": 1887076.125,
349
  "learning_rate": 2.8758169934640522e-05,
350
- "loss": 0.1586,
351
  "step": 900
352
  },
353
  {
354
  "epoch": 10.393258426966293,
355
- "grad_norm": 2062948.625,
356
  "learning_rate": 2.7941176470588236e-05,
357
- "loss": 0.1537,
358
  "step": 925
359
  },
360
  {
361
  "epoch": 10.674157303370787,
362
- "grad_norm": 622017.4375,
363
  "learning_rate": 2.7124183006535947e-05,
364
- "loss": 0.1586,
365
  "step": 950
366
  },
367
  {
368
  "epoch": 10.955056179775282,
369
- "grad_norm": 279420.875,
370
  "learning_rate": 2.630718954248366e-05,
371
- "loss": 0.1615,
372
  "step": 975
373
  },
374
  {
375
  "epoch": 11.0,
376
- "eval_loss": 0.14514553546905518,
377
- "eval_rmse": 0.3809797465801239,
378
- "eval_runtime": 9.4448,
379
- "eval_samples_per_second": 105.879,
380
- "eval_steps_per_second": 1.694,
381
  "step": 979
382
  },
383
  {
384
  "epoch": 11.235955056179776,
385
- "grad_norm": 468609.03125,
386
  "learning_rate": 2.5490196078431373e-05,
387
- "loss": 0.1498,
388
  "step": 1000
389
  },
390
  {
391
  "epoch": 11.51685393258427,
392
- "grad_norm": 420918.21875,
393
  "learning_rate": 2.4673202614379087e-05,
394
- "loss": 0.1511,
395
  "step": 1025
396
  },
397
  {
398
  "epoch": 11.797752808988765,
399
- "grad_norm": 1311995.125,
400
  "learning_rate": 2.38562091503268e-05,
401
- "loss": 0.1549,
402
  "step": 1050
403
  },
404
  {
405
  "epoch": 12.0,
406
- "eval_loss": 0.14528058469295502,
407
- "eval_rmse": 0.38115692138671875,
408
- "eval_runtime": 9.5503,
409
- "eval_samples_per_second": 104.709,
410
- "eval_steps_per_second": 1.675,
411
  "step": 1068
412
  },
413
  {
414
  "epoch": 12.07865168539326,
415
- "grad_norm": 2639625.0,
416
  "learning_rate": 2.303921568627451e-05,
417
- "loss": 0.153,
418
  "step": 1075
419
  },
420
  {
421
  "epoch": 12.359550561797754,
422
- "grad_norm": 774080.25,
423
  "learning_rate": 2.2222222222222223e-05,
424
- "loss": 0.1482,
425
  "step": 1100
426
  },
427
  {
428
  "epoch": 12.640449438202246,
429
- "grad_norm": 55058.171875,
430
  "learning_rate": 2.1405228758169934e-05,
431
- "loss": 0.1468,
432
  "step": 1125
433
  },
434
  {
435
  "epoch": 12.921348314606742,
436
- "grad_norm": 386005.375,
437
  "learning_rate": 2.058823529411765e-05,
438
- "loss": 0.1511,
439
  "step": 1150
440
  },
441
  {
442
  "epoch": 13.0,
443
- "eval_loss": 0.1451251208782196,
444
- "eval_rmse": 0.380952924489975,
445
- "eval_runtime": 9.5734,
446
- "eval_samples_per_second": 104.456,
447
- "eval_steps_per_second": 1.671,
448
  "step": 1157
449
  },
450
  {
451
  "epoch": 13.202247191011235,
452
- "grad_norm": 1428037.875,
453
  "learning_rate": 1.977124183006536e-05,
454
- "loss": 0.1493,
455
  "step": 1175
456
  },
457
  {
458
  "epoch": 13.48314606741573,
459
- "grad_norm": 1060426.0,
460
  "learning_rate": 1.895424836601307e-05,
461
- "loss": 0.1475,
462
  "step": 1200
463
  },
464
  {
465
  "epoch": 13.764044943820224,
466
- "grad_norm": 393123.84375,
467
  "learning_rate": 1.8137254901960785e-05,
468
- "loss": 0.146,
469
  "step": 1225
470
  },
471
  {
472
  "epoch": 14.0,
473
- "eval_loss": 0.14483922719955444,
474
- "eval_rmse": 0.3805775046348572,
475
- "eval_runtime": 9.5961,
476
- "eval_samples_per_second": 104.209,
477
- "eval_steps_per_second": 1.667,
478
  "step": 1246
479
  },
480
  {
481
  "epoch": 14.044943820224718,
482
- "grad_norm": 55979.83203125,
483
  "learning_rate": 1.7320261437908496e-05,
484
- "loss": 0.1474,
485
  "step": 1250
486
  },
487
  {
488
  "epoch": 14.325842696629213,
489
- "grad_norm": 1408408.875,
490
  "learning_rate": 1.650326797385621e-05,
491
- "loss": 0.1483,
492
  "step": 1275
493
  },
494
  {
495
  "epoch": 14.606741573033707,
496
- "grad_norm": 1193337.625,
497
  "learning_rate": 1.568627450980392e-05,
498
- "loss": 0.1472,
499
  "step": 1300
500
  },
501
  {
502
  "epoch": 14.887640449438202,
503
- "grad_norm": 138167.046875,
504
  "learning_rate": 1.4869281045751634e-05,
505
- "loss": 0.1495,
506
  "step": 1325
507
  },
508
  {
509
  "epoch": 15.0,
510
- "eval_loss": 0.14416830241680145,
511
- "eval_rmse": 0.37969499826431274,
512
- "eval_runtime": 9.6797,
513
- "eval_samples_per_second": 103.309,
514
- "eval_steps_per_second": 1.653,
515
  "step": 1335
516
  },
517
  {
518
  "epoch": 15.168539325842696,
519
- "grad_norm": 1532522.0,
520
  "learning_rate": 1.4052287581699347e-05,
521
- "loss": 0.1461,
522
  "step": 1350
523
  },
524
  {
525
  "epoch": 15.44943820224719,
526
- "grad_norm": 562337.5,
527
  "learning_rate": 1.323529411764706e-05,
528
- "loss": 0.1492,
529
  "step": 1375
530
  },
531
  {
532
  "epoch": 15.730337078651685,
533
- "grad_norm": 814187.375,
534
  "learning_rate": 1.2418300653594772e-05,
535
- "loss": 0.1463,
536
  "step": 1400
537
  },
538
  {
539
  "epoch": 16.0,
540
- "eval_loss": 0.1432572454214096,
541
- "eval_rmse": 0.37849339842796326,
542
- "eval_runtime": 9.6044,
543
- "eval_samples_per_second": 104.119,
544
- "eval_steps_per_second": 1.666,
545
  "step": 1424
546
  },
547
  {
548
  "epoch": 16.01123595505618,
549
- "grad_norm": 670902.375,
550
  "learning_rate": 1.1601307189542485e-05,
551
- "loss": 0.148,
552
  "step": 1425
553
  },
554
  {
555
  "epoch": 16.292134831460675,
556
- "grad_norm": 1786915.75,
557
  "learning_rate": 1.0784313725490197e-05,
558
  "loss": 0.1464,
559
  "step": 1450
560
  },
561
  {
562
  "epoch": 16.573033707865168,
563
- "grad_norm": 204775.359375,
564
  "learning_rate": 9.96732026143791e-06,
565
- "loss": 0.147,
566
  "step": 1475
567
  },
568
  {
569
  "epoch": 16.853932584269664,
570
- "grad_norm": 434166.53125,
571
  "learning_rate": 9.150326797385621e-06,
572
- "loss": 0.1455,
573
  "step": 1500
574
  },
575
  {
576
  "epoch": 17.0,
577
- "eval_loss": 0.14278554916381836,
578
- "eval_rmse": 0.37786978483200073,
579
- "eval_runtime": 9.3464,
580
- "eval_samples_per_second": 106.993,
581
- "eval_steps_per_second": 1.712,
582
  "step": 1513
583
  },
584
  {
585
  "epoch": 17.134831460674157,
586
- "grad_norm": 427259.8125,
587
  "learning_rate": 8.333333333333334e-06,
588
- "loss": 0.1476,
589
  "step": 1525
590
  },
591
  {
592
  "epoch": 17.415730337078653,
593
- "grad_norm": 216315.09375,
594
  "learning_rate": 7.5163398692810456e-06,
595
- "loss": 0.1481,
596
  "step": 1550
597
  },
598
  {
599
  "epoch": 17.696629213483146,
600
- "grad_norm": 842487.625,
601
  "learning_rate": 6.699346405228758e-06,
602
- "loss": 0.1435,
603
  "step": 1575
604
  },
605
  {
606
  "epoch": 17.97752808988764,
607
- "grad_norm": 376340.8125,
608
  "learning_rate": 5.882352941176471e-06,
609
- "loss": 0.144,
610
  "step": 1600
611
  },
612
  {
613
  "epoch": 18.0,
614
- "eval_loss": 0.1422489583492279,
615
- "eval_rmse": 0.37715908885002136,
616
- "eval_runtime": 9.364,
617
- "eval_samples_per_second": 106.792,
618
- "eval_steps_per_second": 1.709,
619
  "step": 1602
620
  },
621
  {
622
  "epoch": 18.258426966292134,
623
- "grad_norm": 287156.1875,
624
  "learning_rate": 5.065359477124184e-06,
625
- "loss": 0.1435,
626
  "step": 1625
627
  },
628
  {
629
  "epoch": 18.53932584269663,
630
- "grad_norm": 129252.7109375,
631
  "learning_rate": 4.2483660130718954e-06,
632
- "loss": 0.142,
633
  "step": 1650
634
  },
635
  {
636
  "epoch": 18.820224719101123,
637
- "grad_norm": 415142.15625,
638
  "learning_rate": 3.431372549019608e-06,
639
- "loss": 0.1461,
640
  "step": 1675
641
  },
642
  {
643
  "epoch": 19.0,
644
- "eval_loss": 0.14274005591869354,
645
- "eval_rmse": 0.3778095543384552,
646
- "eval_runtime": 9.4422,
647
- "eval_samples_per_second": 105.907,
648
- "eval_steps_per_second": 1.695,
649
  "step": 1691
650
  },
651
  {
652
  "epoch": 19.10112359550562,
653
- "grad_norm": 202907.5625,
654
  "learning_rate": 2.6143790849673204e-06,
655
- "loss": 0.1441,
656
  "step": 1700
657
  },
658
  {
659
  "epoch": 19.382022471910112,
660
- "grad_norm": 269955.5625,
661
  "learning_rate": 1.7973856209150326e-06,
662
- "loss": 0.1438,
663
  "step": 1725
664
  },
665
  {
666
  "epoch": 19.662921348314608,
667
- "grad_norm": 197176.828125,
668
  "learning_rate": 9.80392156862745e-07,
669
- "loss": 0.1436,
670
  "step": 1750
671
  },
672
  {
673
  "epoch": 19.9438202247191,
674
- "grad_norm": 579304.625,
675
  "learning_rate": 1.6339869281045752e-07,
676
- "loss": 0.1425,
677
  "step": 1775
678
  },
679
  {
680
  "epoch": 20.0,
681
- "eval_loss": 0.1417153775691986,
682
- "eval_rmse": 0.3764510452747345,
683
- "eval_runtime": 9.4376,
684
- "eval_samples_per_second": 105.959,
685
- "eval_steps_per_second": 1.695,
686
  "step": 1780
687
  },
688
  {
689
  "epoch": 20.0,
690
  "step": 1780,
691
  "total_flos": 0.0,
692
- "train_loss": 0.17676029650682815,
693
- "train_runtime": 3336.7226,
694
- "train_samples_per_second": 33.937,
695
- "train_steps_per_second": 0.533
696
  }
697
  ],
698
  "logging_steps": 25,
 
1
  {
2
+ "best_metric": 0.14487937092781067,
3
+ "best_model_checkpoint": "limbxy_pose/checkpoint-1602",
4
  "epoch": 20.0,
5
  "eval_steps": 500,
6
  "global_step": 1780,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.2808988764044944,
13
+ "grad_norm": 1.6329148591013448e-13,
14
  "learning_rate": 5e-06,
15
  "loss": 0.3282,
16
  "step": 25
17
  },
18
  {
19
  "epoch": 0.5617977528089888,
20
+ "grad_norm": 609844.4375,
21
  "learning_rate": 1e-05,
22
+ "loss": 0.3197,
23
  "step": 50
24
  },
25
  {
26
  "epoch": 0.8426966292134831,
27
+ "grad_norm": 51261.34765625,
28
  "learning_rate": 1.5e-05,
29
+ "loss": 0.2128,
30
  "step": 75
31
  },
32
  {
33
  "epoch": 1.0,
34
+ "eval_loss": 0.17451409995555878,
35
+ "eval_rmse": 0.4177488386631012,
36
+ "eval_runtime": 9.5044,
37
+ "eval_samples_per_second": 105.214,
38
+ "eval_steps_per_second": 1.683,
39
  "step": 89
40
  },
41
  {
42
  "epoch": 1.1235955056179776,
43
+ "grad_norm": 2084511.375,
44
  "learning_rate": 2e-05,
45
+ "loss": 0.198,
46
  "step": 100
47
  },
48
  {
49
  "epoch": 1.404494382022472,
50
+ "grad_norm": 721293.5625,
51
  "learning_rate": 2.5e-05,
52
+ "loss": 0.1779,
53
  "step": 125
54
  },
55
  {
56
  "epoch": 1.6853932584269664,
57
+ "grad_norm": 1280031.75,
58
  "learning_rate": 3e-05,
59
+ "loss": 0.1604,
60
  "step": 150
61
  },
62
  {
63
  "epoch": 1.9662921348314608,
64
+ "grad_norm": 1277506.125,
65
  "learning_rate": 3.5e-05,
66
+ "loss": 0.1574,
67
  "step": 175
68
  },
69
  {
70
  "epoch": 2.0,
71
+ "eval_loss": 0.148615300655365,
72
+ "eval_rmse": 0.3855065405368805,
73
+ "eval_runtime": 9.2155,
74
+ "eval_samples_per_second": 108.513,
75
+ "eval_steps_per_second": 1.736,
76
  "step": 178
77
  },
78
  {
79
  "epoch": 2.247191011235955,
80
+ "grad_norm": 1975641.0,
81
  "learning_rate": 4e-05,
82
+ "loss": 0.1589,
83
  "step": 200
84
  },
85
  {
86
  "epoch": 2.5280898876404496,
87
+ "grad_norm": 2331497.75,
88
  "learning_rate": 4.5e-05,
89
+ "loss": 0.1757,
90
  "step": 225
91
  },
92
  {
93
  "epoch": 2.808988764044944,
94
+ "grad_norm": 4412882.0,
95
  "learning_rate": 5e-05,
96
+ "loss": 0.2045,
97
  "step": 250
98
  },
99
  {
100
  "epoch": 3.0,
101
+ "eval_loss": 0.15186643600463867,
102
+ "eval_rmse": 0.3897004723548889,
103
+ "eval_runtime": 9.3829,
104
+ "eval_samples_per_second": 106.576,
105
+ "eval_steps_per_second": 1.705,
106
  "step": 267
107
  },
108
  {
109
  "epoch": 3.0898876404494384,
110
+ "grad_norm": 1961631.5,
111
  "learning_rate": 4.918300653594771e-05,
112
+ "loss": 0.1809,
113
  "step": 275
114
  },
115
  {
116
  "epoch": 3.370786516853933,
117
+ "grad_norm": 5652000.5,
118
  "learning_rate": 4.8366013071895424e-05,
119
+ "loss": 0.1778,
120
  "step": 300
121
  },
122
  {
123
  "epoch": 3.6516853932584272,
124
+ "grad_norm": 1794376.5,
125
  "learning_rate": 4.7549019607843135e-05,
126
+ "loss": 0.1861,
127
  "step": 325
128
  },
129
  {
130
  "epoch": 3.932584269662921,
131
+ "grad_norm": 1615815.75,
132
  "learning_rate": 4.673202614379085e-05,
133
+ "loss": 0.1697,
134
  "step": 350
135
  },
136
  {
137
  "epoch": 4.0,
138
+ "eval_loss": 0.16323314607143402,
139
+ "eval_rmse": 0.4040212333202362,
140
+ "eval_runtime": 9.3078,
141
+ "eval_samples_per_second": 107.437,
142
+ "eval_steps_per_second": 1.719,
143
  "step": 356
144
  },
145
  {
146
  "epoch": 4.213483146067416,
147
+ "grad_norm": 2846568.25,
148
  "learning_rate": 4.5915032679738564e-05,
149
+ "loss": 0.1743,
150
  "step": 375
151
  },
152
  {
153
  "epoch": 4.49438202247191,
154
+ "grad_norm": 2384729.0,
155
  "learning_rate": 4.5098039215686275e-05,
156
+ "loss": 0.1723,
157
  "step": 400
158
  },
159
  {
160
  "epoch": 4.775280898876405,
161
+ "grad_norm": 1569872.375,
162
  "learning_rate": 4.4281045751633986e-05,
163
+ "loss": 0.1818,
164
  "step": 425
165
  },
166
  {
167
  "epoch": 5.0,
168
+ "eval_loss": 0.19486868381500244,
169
+ "eval_rmse": 0.44143933057785034,
170
+ "eval_runtime": 9.2813,
171
+ "eval_samples_per_second": 107.744,
172
+ "eval_steps_per_second": 1.724,
173
  "step": 445
174
  },
175
  {
176
  "epoch": 5.056179775280899,
177
+ "grad_norm": 343663.96875,
178
  "learning_rate": 4.3464052287581704e-05,
179
+ "loss": 0.1845,
180
  "step": 450
181
  },
182
  {
183
  "epoch": 5.337078651685394,
184
+ "grad_norm": 1288543.625,
185
  "learning_rate": 4.2647058823529415e-05,
186
+ "loss": 0.1941,
187
  "step": 475
188
  },
189
  {
190
  "epoch": 5.617977528089888,
191
+ "grad_norm": 4547344.0,
192
  "learning_rate": 4.1830065359477126e-05,
193
+ "loss": 0.1685,
194
  "step": 500
195
  },
196
  {
197
  "epoch": 5.898876404494382,
198
+ "grad_norm": 185522.03125,
199
  "learning_rate": 4.101307189542484e-05,
200
+ "loss": 0.1624,
201
  "step": 525
202
  },
203
  {
204
  "epoch": 6.0,
205
+ "eval_loss": 0.14749938249588013,
206
+ "eval_rmse": 0.3840564787387848,
207
+ "eval_runtime": 9.2186,
208
+ "eval_samples_per_second": 108.477,
209
+ "eval_steps_per_second": 1.736,
210
  "step": 534
211
  },
212
  {
213
  "epoch": 6.179775280898877,
214
+ "grad_norm": 2298808.25,
215
  "learning_rate": 4.0196078431372555e-05,
216
+ "loss": 0.1578,
217
  "step": 550
218
  },
219
  {
220
  "epoch": 6.460674157303371,
221
+ "grad_norm": 1625852.375,
222
  "learning_rate": 3.9379084967320266e-05,
223
+ "loss": 0.162,
224
  "step": 575
225
  },
226
  {
227
  "epoch": 6.741573033707866,
228
+ "grad_norm": 2281359.0,
229
  "learning_rate": 3.8562091503267977e-05,
230
+ "loss": 0.1645,
231
  "step": 600
232
  },
233
  {
234
  "epoch": 7.0,
235
+ "eval_loss": 0.14835500717163086,
236
+ "eval_rmse": 0.38516879081726074,
237
+ "eval_runtime": 9.4807,
238
+ "eval_samples_per_second": 105.477,
239
+ "eval_steps_per_second": 1.688,
240
  "step": 623
241
  },
242
  {
243
  "epoch": 7.022471910112359,
244
+ "grad_norm": 673713.0625,
245
  "learning_rate": 3.774509803921569e-05,
246
+ "loss": 0.1554,
247
  "step": 625
248
  },
249
  {
250
  "epoch": 7.303370786516854,
251
+ "grad_norm": 260786.171875,
252
  "learning_rate": 3.6928104575163405e-05,
253
+ "loss": 0.1656,
254
  "step": 650
255
  },
256
  {
257
  "epoch": 7.584269662921348,
258
+ "grad_norm": 2860977.5,
259
  "learning_rate": 3.611111111111111e-05,
260
+ "loss": 0.1642,
261
  "step": 675
262
  },
263
  {
264
  "epoch": 7.865168539325842,
265
+ "grad_norm": 1183203.125,
266
  "learning_rate": 3.529411764705883e-05,
267
+ "loss": 0.1655,
268
  "step": 700
269
  },
270
  {
271
  "epoch": 8.0,
272
+ "eval_loss": 0.14708983898162842,
273
+ "eval_rmse": 0.3835229277610779,
274
+ "eval_runtime": 9.2282,
275
+ "eval_samples_per_second": 108.363,
276
+ "eval_steps_per_second": 1.734,
277
  "step": 712
278
  },
279
  {
280
  "epoch": 8.146067415730338,
281
+ "grad_norm": 2235303.5,
282
  "learning_rate": 3.447712418300654e-05,
283
+ "loss": 0.1543,
284
  "step": 725
285
  },
286
  {
287
  "epoch": 8.426966292134832,
288
+ "grad_norm": 1311017.875,
289
  "learning_rate": 3.366013071895425e-05,
290
+ "loss": 0.1507,
291
  "step": 750
292
  },
293
  {
294
  "epoch": 8.707865168539326,
295
+ "grad_norm": 1330708.25,
296
  "learning_rate": 3.284313725490196e-05,
297
+ "loss": 0.1625,
298
  "step": 775
299
  },
300
  {
301
  "epoch": 8.98876404494382,
302
+ "grad_norm": 553780.625,
303
  "learning_rate": 3.202614379084967e-05,
304
+ "loss": 0.1594,
305
  "step": 800
306
  },
307
  {
308
  "epoch": 9.0,
309
+ "eval_loss": 0.15354213118553162,
310
+ "eval_rmse": 0.3918445110321045,
311
+ "eval_runtime": 9.1297,
312
+ "eval_samples_per_second": 109.533,
313
+ "eval_steps_per_second": 1.753,
314
  "step": 801
315
  },
316
  {
317
  "epoch": 9.269662921348315,
318
+ "grad_norm": 208979.359375,
319
  "learning_rate": 3.120915032679739e-05,
320
+ "loss": 0.1518,
321
  "step": 825
322
  },
323
  {
324
  "epoch": 9.55056179775281,
325
+ "grad_norm": 78840.34375,
326
  "learning_rate": 3.0392156862745097e-05,
327
+ "loss": 0.1552,
328
  "step": 850
329
  },
330
  {
331
  "epoch": 9.831460674157304,
332
+ "grad_norm": 1873128.625,
333
  "learning_rate": 2.957516339869281e-05,
334
+ "loss": 0.1513,
335
  "step": 875
336
  },
337
  {
338
  "epoch": 10.0,
339
+ "eval_loss": 0.1448940634727478,
340
+ "eval_rmse": 0.38064953684806824,
341
+ "eval_runtime": 9.4691,
342
+ "eval_samples_per_second": 105.607,
343
+ "eval_steps_per_second": 1.69,
344
  "step": 890
345
  },
346
  {
347
  "epoch": 10.112359550561798,
348
+ "grad_norm": 1098680.625,
349
  "learning_rate": 2.8758169934640522e-05,
350
+ "loss": 0.1551,
351
  "step": 900
352
  },
353
  {
354
  "epoch": 10.393258426966293,
355
+ "grad_norm": 1874154.875,
356
  "learning_rate": 2.7941176470588236e-05,
357
+ "loss": 0.1516,
358
  "step": 925
359
  },
360
  {
361
  "epoch": 10.674157303370787,
362
+ "grad_norm": 53160.01953125,
363
  "learning_rate": 2.7124183006535947e-05,
364
+ "loss": 0.1466,
365
  "step": 950
366
  },
367
  {
368
  "epoch": 10.955056179775282,
369
+ "grad_norm": 929561.25,
370
  "learning_rate": 2.630718954248366e-05,
371
+ "loss": 0.1488,
372
  "step": 975
373
  },
374
  {
375
  "epoch": 11.0,
376
+ "eval_loss": 0.14547079801559448,
377
+ "eval_rmse": 0.38140633702278137,
378
+ "eval_runtime": 9.1414,
379
+ "eval_samples_per_second": 109.393,
380
+ "eval_steps_per_second": 1.75,
381
  "step": 979
382
  },
383
  {
384
  "epoch": 11.235955056179776,
385
+ "grad_norm": 1361347.875,
386
  "learning_rate": 2.5490196078431373e-05,
387
+ "loss": 0.1457,
388
  "step": 1000
389
  },
390
  {
391
  "epoch": 11.51685393258427,
392
+ "grad_norm": 72723.1953125,
393
  "learning_rate": 2.4673202614379087e-05,
394
+ "loss": 0.1508,
395
  "step": 1025
396
  },
397
  {
398
  "epoch": 11.797752808988765,
399
+ "grad_norm": 93677.0625,
400
  "learning_rate": 2.38562091503268e-05,
401
+ "loss": 0.1507,
402
  "step": 1050
403
  },
404
  {
405
  "epoch": 12.0,
406
+ "eval_loss": 0.1535731852054596,
407
+ "eval_rmse": 0.3918841481208801,
408
+ "eval_runtime": 9.5703,
409
+ "eval_samples_per_second": 104.49,
410
+ "eval_steps_per_second": 1.672,
411
  "step": 1068
412
  },
413
  {
414
  "epoch": 12.07865168539326,
415
+ "grad_norm": 1383022.125,
416
  "learning_rate": 2.303921568627451e-05,
417
+ "loss": 0.155,
418
  "step": 1075
419
  },
420
  {
421
  "epoch": 12.359550561797754,
422
+ "grad_norm": 1435498.5,
423
  "learning_rate": 2.2222222222222223e-05,
424
+ "loss": 0.15,
425
  "step": 1100
426
  },
427
  {
428
  "epoch": 12.640449438202246,
429
+ "grad_norm": 256395.265625,
430
  "learning_rate": 2.1405228758169934e-05,
431
+ "loss": 0.1465,
432
  "step": 1125
433
  },
434
  {
435
  "epoch": 12.921348314606742,
436
+ "grad_norm": 258689.03125,
437
  "learning_rate": 2.058823529411765e-05,
438
+ "loss": 0.1522,
439
  "step": 1150
440
  },
441
  {
442
  "epoch": 13.0,
443
+ "eval_loss": 0.14494504034519196,
444
+ "eval_rmse": 0.380716472864151,
445
+ "eval_runtime": 9.4294,
446
+ "eval_samples_per_second": 106.051,
447
+ "eval_steps_per_second": 1.697,
448
  "step": 1157
449
  },
450
  {
451
  "epoch": 13.202247191011235,
452
+ "grad_norm": 1009406.375,
453
  "learning_rate": 1.977124183006536e-05,
454
+ "loss": 0.1491,
455
  "step": 1175
456
  },
457
  {
458
  "epoch": 13.48314606741573,
459
+ "grad_norm": 720892.125,
460
  "learning_rate": 1.895424836601307e-05,
461
+ "loss": 0.1502,
462
  "step": 1200
463
  },
464
  {
465
  "epoch": 13.764044943820224,
466
+ "grad_norm": 48925.546875,
467
  "learning_rate": 1.8137254901960785e-05,
468
+ "loss": 0.1458,
469
  "step": 1225
470
  },
471
  {
472
  "epoch": 14.0,
473
+ "eval_loss": 0.14527221024036407,
474
+ "eval_rmse": 0.3811459243297577,
475
+ "eval_runtime": 9.2593,
476
+ "eval_samples_per_second": 108.0,
477
+ "eval_steps_per_second": 1.728,
478
  "step": 1246
479
  },
480
  {
481
  "epoch": 14.044943820224718,
482
+ "grad_norm": 421335.125,
483
  "learning_rate": 1.7320261437908496e-05,
484
+ "loss": 0.1485,
485
  "step": 1250
486
  },
487
  {
488
  "epoch": 14.325842696629213,
489
+ "grad_norm": 375955.40625,
490
  "learning_rate": 1.650326797385621e-05,
491
+ "loss": 0.1457,
492
  "step": 1275
493
  },
494
  {
495
  "epoch": 14.606741573033707,
496
+ "grad_norm": 624158.0,
497
  "learning_rate": 1.568627450980392e-05,
498
+ "loss": 0.1498,
499
  "step": 1300
500
  },
501
  {
502
  "epoch": 14.887640449438202,
503
+ "grad_norm": 115186.8984375,
504
  "learning_rate": 1.4869281045751634e-05,
505
+ "loss": 0.1506,
506
  "step": 1325
507
  },
508
  {
509
  "epoch": 15.0,
510
+ "eval_loss": 0.1455306112766266,
511
+ "eval_rmse": 0.3814847767353058,
512
+ "eval_runtime": 9.3528,
513
+ "eval_samples_per_second": 106.92,
514
+ "eval_steps_per_second": 1.711,
515
  "step": 1335
516
  },
517
  {
518
  "epoch": 15.168539325842696,
519
+ "grad_norm": 1805258.0,
520
  "learning_rate": 1.4052287581699347e-05,
521
+ "loss": 0.1467,
522
  "step": 1350
523
  },
524
  {
525
  "epoch": 15.44943820224719,
526
+ "grad_norm": 520502.5625,
527
  "learning_rate": 1.323529411764706e-05,
528
+ "loss": 0.1529,
529
  "step": 1375
530
  },
531
  {
532
  "epoch": 15.730337078651685,
533
+ "grad_norm": 2020412.75,
534
  "learning_rate": 1.2418300653594772e-05,
535
+ "loss": 0.1505,
536
  "step": 1400
537
  },
538
  {
539
  "epoch": 16.0,
540
+ "eval_loss": 0.1451566517353058,
541
+ "eval_rmse": 0.3809943199157715,
542
+ "eval_runtime": 9.215,
543
+ "eval_samples_per_second": 108.518,
544
+ "eval_steps_per_second": 1.736,
545
  "step": 1424
546
  },
547
  {
548
  "epoch": 16.01123595505618,
549
+ "grad_norm": 252456.1875,
550
  "learning_rate": 1.1601307189542485e-05,
551
+ "loss": 0.1481,
552
  "step": 1425
553
  },
554
  {
555
  "epoch": 16.292134831460675,
556
+ "grad_norm": 267339.03125,
557
  "learning_rate": 1.0784313725490197e-05,
558
  "loss": 0.1464,
559
  "step": 1450
560
  },
561
  {
562
  "epoch": 16.573033707865168,
563
+ "grad_norm": 482848.15625,
564
  "learning_rate": 9.96732026143791e-06,
565
+ "loss": 0.1482,
566
  "step": 1475
567
  },
568
  {
569
  "epoch": 16.853932584269664,
570
+ "grad_norm": 297641.71875,
571
  "learning_rate": 9.150326797385621e-06,
572
+ "loss": 0.1463,
573
  "step": 1500
574
  },
575
  {
576
  "epoch": 17.0,
577
+ "eval_loss": 0.1449102759361267,
578
+ "eval_rmse": 0.38067084550857544,
579
+ "eval_runtime": 9.2491,
580
+ "eval_samples_per_second": 108.119,
581
+ "eval_steps_per_second": 1.73,
582
  "step": 1513
583
  },
584
  {
585
  "epoch": 17.134831460674157,
586
+ "grad_norm": 353910.8125,
587
  "learning_rate": 8.333333333333334e-06,
588
+ "loss": 0.1481,
589
  "step": 1525
590
  },
591
  {
592
  "epoch": 17.415730337078653,
593
+ "grad_norm": 847917.6875,
594
  "learning_rate": 7.5163398692810456e-06,
595
+ "loss": 0.1494,
596
  "step": 1550
597
  },
598
  {
599
  "epoch": 17.696629213483146,
600
+ "grad_norm": 197619.375,
601
  "learning_rate": 6.699346405228758e-06,
602
+ "loss": 0.145,
603
  "step": 1575
604
  },
605
  {
606
  "epoch": 17.97752808988764,
607
+ "grad_norm": 934886.75,
608
  "learning_rate": 5.882352941176471e-06,
609
+ "loss": 0.1463,
610
  "step": 1600
611
  },
612
  {
613
  "epoch": 18.0,
614
+ "eval_loss": 0.14487937092781067,
615
+ "eval_rmse": 0.380630224943161,
616
+ "eval_runtime": 9.3303,
617
+ "eval_samples_per_second": 107.177,
618
+ "eval_steps_per_second": 1.715,
619
  "step": 1602
620
  },
621
  {
622
  "epoch": 18.258426966292134,
623
+ "grad_norm": 178175.765625,
624
  "learning_rate": 5.065359477124184e-06,
625
+ "loss": 0.1456,
626
  "step": 1625
627
  },
628
  {
629
  "epoch": 18.53932584269663,
630
+ "grad_norm": 282639.03125,
631
  "learning_rate": 4.2483660130718954e-06,
632
+ "loss": 0.145,
633
  "step": 1650
634
  },
635
  {
636
  "epoch": 18.820224719101123,
637
+ "grad_norm": 338323.84375,
638
  "learning_rate": 3.431372549019608e-06,
639
+ "loss": 0.1494,
640
  "step": 1675
641
  },
642
  {
643
  "epoch": 19.0,
644
+ "eval_loss": 0.1456519514322281,
645
+ "eval_rmse": 0.38164374232292175,
646
+ "eval_runtime": 9.5481,
647
+ "eval_samples_per_second": 104.733,
648
+ "eval_steps_per_second": 1.676,
649
  "step": 1691
650
  },
651
  {
652
  "epoch": 19.10112359550562,
653
+ "grad_norm": 340422.8125,
654
  "learning_rate": 2.6143790849673204e-06,
655
+ "loss": 0.147,
656
  "step": 1700
657
  },
658
  {
659
  "epoch": 19.382022471910112,
660
+ "grad_norm": 366319.65625,
661
  "learning_rate": 1.7973856209150326e-06,
662
+ "loss": 0.1471,
663
  "step": 1725
664
  },
665
  {
666
  "epoch": 19.662921348314608,
667
+ "grad_norm": 642705.4375,
668
  "learning_rate": 9.80392156862745e-07,
669
+ "loss": 0.1467,
670
  "step": 1750
671
  },
672
  {
673
  "epoch": 19.9438202247191,
674
+ "grad_norm": 455884.28125,
675
  "learning_rate": 1.6339869281045752e-07,
676
+ "loss": 0.1454,
677
  "step": 1775
678
  },
679
  {
680
  "epoch": 20.0,
681
+ "eval_loss": 0.14513316750526428,
682
+ "eval_rmse": 0.3809635043144226,
683
+ "eval_runtime": 9.5295,
684
+ "eval_samples_per_second": 104.937,
685
+ "eval_steps_per_second": 1.679,
686
  "step": 1780
687
  },
688
  {
689
  "epoch": 20.0,
690
  "step": 1780,
691
  "total_flos": 0.0,
692
+ "train_loss": 0.16350786438149012,
693
+ "train_runtime": 3270.5878,
694
+ "train_samples_per_second": 34.624,
695
+ "train_steps_per_second": 0.544
696
  }
697
  ],
698
  "logging_steps": 25,