c14kevincardenas commited on
Commit
29fc390
·
verified ·
1 Parent(s): 817ae49

End of training

Browse files
README.md CHANGED
@@ -3,6 +3,9 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: c14kevincardenas/beit-large-patch16-384-limb
5
  tags:
 
 
 
6
  - generated_from_trainer
7
  model-index:
8
  - name: limbxy_ext_crop_hands
@@ -14,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  # limbxy_ext_crop_hands
16
 
17
- This model is a fine-tuned version of [c14kevincardenas/beit-large-patch16-384-limb](https://huggingface.co/c14kevincardenas/beit-large-patch16-384-limb) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
  - Loss: 0.0130
20
  - Rmse: 0.1140
 
3
  license: apache-2.0
4
  base_model: c14kevincardenas/beit-large-patch16-384-limb
5
  tags:
6
+ - image-regression
7
+ - human-movement
8
+ - vision
9
  - generated_from_trainer
10
  model-index:
11
  - name: limbxy_ext_crop_hands
 
17
 
18
  # limbxy_ext_crop_hands
19
 
20
+ This model is a fine-tuned version of [c14kevincardenas/beit-large-patch16-384-limb](https://huggingface.co/c14kevincardenas/beit-large-patch16-384-limb) on the c14kevincardenas/beta_caller_284_limbxy_ext_crop_hands dataset.
21
  It achieves the following results on the evaluation set:
22
  - Loss: 0.0130
23
  - Rmse: 0.1140
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_loss": 0.008117899298667908,
4
- "eval_rmse": 0.09012944251298904,
5
- "eval_runtime": 5.0777,
6
- "eval_samples_per_second": 102.998,
7
- "eval_steps_per_second": 1.772,
8
  "total_flos": 0.0,
9
- "train_loss": 0.028548606136377823,
10
- "train_runtime": 1786.5714,
11
- "train_samples_per_second": 33.17,
12
- "train_steps_per_second": 0.526
13
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_loss": 0.012980646453797817,
4
+ "eval_rmse": 0.11399345099925995,
5
+ "eval_runtime": 4.987,
6
+ "eval_samples_per_second": 104.873,
7
+ "eval_steps_per_second": 1.805,
8
  "total_flos": 0.0,
9
+ "train_loss": 0.03554724605476602,
10
+ "train_runtime": 1780.6867,
11
+ "train_samples_per_second": 33.279,
12
+ "train_steps_per_second": 0.528
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_loss": 0.008117899298667908,
4
- "eval_rmse": 0.09012944251298904,
5
- "eval_runtime": 5.0777,
6
- "eval_samples_per_second": 102.998,
7
- "eval_steps_per_second": 1.772
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_loss": 0.012980646453797817,
4
+ "eval_rmse": 0.11399345099925995,
5
+ "eval_runtime": 4.987,
6
+ "eval_samples_per_second": 104.873,
7
+ "eval_steps_per_second": 1.805
8
  }
runs/Oct25_16-03-52_galactica.ad.cirange.net/events.out.tfevents.1729874125.galactica.ad.cirange.net.151270.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c31a8e5ab6a3cee7765ba53a47235eb16c989ba0bb44483f453e8682b9e2c1c1
3
+ size 407
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.028548606136377823,
5
- "train_runtime": 1786.5714,
6
- "train_samples_per_second": 33.17,
7
- "train_steps_per_second": 0.526
8
  }
 
1
  {
2
  "epoch": 20.0,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.03554724605476602,
5
+ "train_runtime": 1780.6867,
6
+ "train_samples_per_second": 33.279,
7
+ "train_steps_per_second": 0.528
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.008117899298667908,
3
  "best_model_checkpoint": "/mnt/ml_drive/kcardenas/limbxy_ext_crop_hands/checkpoint-940",
4
  "epoch": 20.0,
5
  "eval_steps": 500,
@@ -10,451 +10,451 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.5319148936170213,
13
- "grad_norm": 828477.25,
14
  "learning_rate": 5e-06,
15
- "loss": 0.2561,
16
  "step": 25
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_loss": 0.14001771807670593,
21
- "eval_rmse": 0.37419962882995605,
22
- "eval_runtime": 5.5814,
23
- "eval_samples_per_second": 93.705,
24
- "eval_steps_per_second": 1.613,
25
  "step": 47
26
  },
27
  {
28
  "epoch": 1.0638297872340425,
29
- "grad_norm": 38252.140625,
30
  "learning_rate": 1e-05,
31
- "loss": 0.1555,
32
  "step": 50
33
  },
34
  {
35
  "epoch": 1.5957446808510638,
36
- "grad_norm": 5285939.0,
37
  "learning_rate": 1.5e-05,
38
- "loss": 0.1544,
39
  "step": 75
40
  },
41
  {
42
  "epoch": 2.0,
43
- "eval_loss": 0.0731724426150322,
44
- "eval_rmse": 0.2704622745513916,
45
- "eval_runtime": 5.1268,
46
- "eval_samples_per_second": 102.013,
47
- "eval_steps_per_second": 1.755,
48
  "step": 94
49
  },
50
  {
51
  "epoch": 2.127659574468085,
52
- "grad_norm": 1347811.375,
53
  "learning_rate": 2e-05,
54
- "loss": 0.1012,
55
  "step": 100
56
  },
57
  {
58
  "epoch": 2.6595744680851063,
59
- "grad_norm": 2469424.25,
60
  "learning_rate": 2.5e-05,
61
- "loss": 0.0404,
62
  "step": 125
63
  },
64
  {
65
  "epoch": 3.0,
66
- "eval_loss": 0.05305316299200058,
67
- "eval_rmse": 0.23046264052391052,
68
- "eval_runtime": 4.9002,
69
- "eval_samples_per_second": 106.731,
70
- "eval_steps_per_second": 1.837,
71
  "step": 141
72
  },
73
  {
74
  "epoch": 3.1914893617021276,
75
- "grad_norm": 3102997.0,
76
  "learning_rate": 3e-05,
77
- "loss": 0.0493,
78
  "step": 150
79
  },
80
  {
81
  "epoch": 3.723404255319149,
82
- "grad_norm": 3376308.0,
83
  "learning_rate": 3.5e-05,
84
- "loss": 0.0322,
85
  "step": 175
86
  },
87
  {
88
  "epoch": 4.0,
89
- "eval_loss": 0.015296007506549358,
90
- "eval_rmse": 0.12374413758516312,
91
- "eval_runtime": 4.9466,
92
- "eval_samples_per_second": 105.73,
93
- "eval_steps_per_second": 1.819,
94
  "step": 188
95
  },
96
  {
97
  "epoch": 4.25531914893617,
98
- "grad_norm": 2041911.875,
99
  "learning_rate": 4e-05,
100
- "loss": 0.0221,
101
  "step": 200
102
  },
103
  {
104
  "epoch": 4.787234042553192,
105
- "grad_norm": 752186.125,
106
  "learning_rate": 4.5e-05,
107
- "loss": 0.0284,
108
  "step": 225
109
  },
110
  {
111
  "epoch": 5.0,
112
- "eval_loss": 0.02443227916955948,
113
- "eval_rmse": 0.1564129739999771,
114
- "eval_runtime": 5.1046,
115
- "eval_samples_per_second": 102.456,
116
- "eval_steps_per_second": 1.763,
117
  "step": 235
118
  },
119
  {
120
  "epoch": 5.319148936170213,
121
- "grad_norm": 1242507.75,
122
  "learning_rate": 5e-05,
123
- "loss": 0.0169,
124
  "step": 250
125
  },
126
  {
127
  "epoch": 5.851063829787234,
128
- "grad_norm": 1721578.125,
129
  "learning_rate": 4.818840579710145e-05,
130
- "loss": 0.0252,
131
  "step": 275
132
  },
133
  {
134
  "epoch": 6.0,
135
- "eval_loss": 0.02938535064458847,
136
- "eval_rmse": 0.17153353989124298,
137
- "eval_runtime": 5.0847,
138
- "eval_samples_per_second": 102.857,
139
- "eval_steps_per_second": 1.77,
140
  "step": 282
141
  },
142
  {
143
  "epoch": 6.382978723404255,
144
- "grad_norm": 1635126.75,
145
  "learning_rate": 4.63768115942029e-05,
146
- "loss": 0.0254,
147
  "step": 300
148
  },
149
  {
150
  "epoch": 6.914893617021277,
151
- "grad_norm": 2097984.0,
152
  "learning_rate": 4.456521739130435e-05,
153
- "loss": 0.0123,
154
  "step": 325
155
  },
156
  {
157
  "epoch": 7.0,
158
- "eval_loss": 0.0171244814991951,
159
- "eval_rmse": 0.13094477355480194,
160
- "eval_runtime": 5.0335,
161
- "eval_samples_per_second": 103.904,
162
- "eval_steps_per_second": 1.788,
163
  "step": 329
164
  },
165
  {
166
  "epoch": 7.446808510638298,
167
- "grad_norm": 3019492.25,
168
  "learning_rate": 4.27536231884058e-05,
169
- "loss": 0.0148,
170
  "step": 350
171
  },
172
  {
173
  "epoch": 7.9787234042553195,
174
- "grad_norm": 3237462.5,
175
  "learning_rate": 4.094202898550725e-05,
176
- "loss": 0.0216,
177
  "step": 375
178
  },
179
  {
180
  "epoch": 8.0,
181
- "eval_loss": 0.015043811872601509,
182
- "eval_rmse": 0.12263169139623642,
183
- "eval_runtime": 4.8436,
184
- "eval_samples_per_second": 107.977,
185
- "eval_steps_per_second": 1.858,
186
  "step": 376
187
  },
188
  {
189
  "epoch": 8.51063829787234,
190
- "grad_norm": 2551103.0,
191
  "learning_rate": 3.91304347826087e-05,
192
- "loss": 0.0101,
193
  "step": 400
194
  },
195
  {
196
  "epoch": 9.0,
197
- "eval_loss": 0.024509919807314873,
198
- "eval_rmse": 0.15662036836147308,
199
- "eval_runtime": 5.014,
200
- "eval_samples_per_second": 104.309,
201
- "eval_steps_per_second": 1.795,
202
  "step": 423
203
  },
204
  {
205
  "epoch": 9.042553191489361,
206
- "grad_norm": 4249340.5,
207
  "learning_rate": 3.7318840579710144e-05,
208
- "loss": 0.0096,
209
  "step": 425
210
  },
211
  {
212
  "epoch": 9.574468085106384,
213
- "grad_norm": 963757.5,
214
  "learning_rate": 3.5507246376811596e-05,
215
- "loss": 0.012,
216
  "step": 450
217
  },
218
  {
219
  "epoch": 10.0,
220
- "eval_loss": 0.011943971738219261,
221
- "eval_rmse": 0.10932554304599762,
222
- "eval_runtime": 4.9998,
223
- "eval_samples_per_second": 104.604,
224
- "eval_steps_per_second": 1.8,
225
  "step": 470
226
  },
227
  {
228
  "epoch": 10.106382978723405,
229
- "grad_norm": 1531059.5,
230
  "learning_rate": 3.369565217391305e-05,
231
- "loss": 0.0073,
232
  "step": 475
233
  },
234
  {
235
  "epoch": 10.638297872340425,
236
- "grad_norm": 1131259.625,
237
  "learning_rate": 3.188405797101449e-05,
238
- "loss": 0.0083,
239
  "step": 500
240
  },
241
  {
242
  "epoch": 11.0,
243
- "eval_loss": 0.014895778149366379,
244
- "eval_rmse": 0.12203000485897064,
245
- "eval_runtime": 5.144,
246
- "eval_samples_per_second": 101.671,
247
- "eval_steps_per_second": 1.75,
248
  "step": 517
249
  },
250
  {
251
  "epoch": 11.170212765957446,
252
- "grad_norm": 1930957.75,
253
  "learning_rate": 3.0072463768115944e-05,
254
- "loss": 0.0071,
255
  "step": 525
256
  },
257
  {
258
  "epoch": 11.702127659574469,
259
- "grad_norm": 1709068.75,
260
  "learning_rate": 2.826086956521739e-05,
261
- "loss": 0.0072,
262
  "step": 550
263
  },
264
  {
265
  "epoch": 12.0,
266
- "eval_loss": 0.009299115277826786,
267
- "eval_rmse": 0.09643761068582535,
268
- "eval_runtime": 4.9587,
269
- "eval_samples_per_second": 105.471,
270
- "eval_steps_per_second": 1.815,
271
  "step": 564
272
  },
273
  {
274
  "epoch": 12.23404255319149,
275
- "grad_norm": 1622884.0,
276
  "learning_rate": 2.6449275362318844e-05,
277
- "loss": 0.0089,
278
  "step": 575
279
  },
280
  {
281
  "epoch": 12.76595744680851,
282
- "grad_norm": 780158.875,
283
  "learning_rate": 2.4637681159420292e-05,
284
- "loss": 0.006,
285
  "step": 600
286
  },
287
  {
288
  "epoch": 13.0,
289
- "eval_loss": 0.00986443180590868,
290
- "eval_rmse": 0.09930924326181412,
291
- "eval_runtime": 5.0729,
292
- "eval_samples_per_second": 103.096,
293
- "eval_steps_per_second": 1.774,
294
  "step": 611
295
  },
296
  {
297
  "epoch": 13.297872340425531,
298
- "grad_norm": 995551.3125,
299
  "learning_rate": 2.282608695652174e-05,
300
- "loss": 0.0049,
301
  "step": 625
302
  },
303
  {
304
  "epoch": 13.829787234042554,
305
- "grad_norm": 731185.1875,
306
  "learning_rate": 2.101449275362319e-05,
307
- "loss": 0.0044,
308
  "step": 650
309
  },
310
  {
311
  "epoch": 14.0,
312
- "eval_loss": 0.010285141877830029,
313
- "eval_rmse": 0.10145359486341476,
314
- "eval_runtime": 5.0416,
315
- "eval_samples_per_second": 103.736,
316
- "eval_steps_per_second": 1.785,
317
  "step": 658
318
  },
319
  {
320
  "epoch": 14.361702127659575,
321
- "grad_norm": 1295382.375,
322
  "learning_rate": 1.9202898550724637e-05,
323
- "loss": 0.0058,
324
  "step": 675
325
  },
326
  {
327
  "epoch": 14.893617021276595,
328
- "grad_norm": 751297.9375,
329
  "learning_rate": 1.739130434782609e-05,
330
- "loss": 0.0041,
331
  "step": 700
332
  },
333
  {
334
  "epoch": 15.0,
335
- "eval_loss": 0.008721875958144665,
336
- "eval_rmse": 0.09339401870965958,
337
- "eval_runtime": 5.0079,
338
- "eval_samples_per_second": 104.434,
339
- "eval_steps_per_second": 1.797,
340
  "step": 705
341
  },
342
  {
343
  "epoch": 15.425531914893616,
344
- "grad_norm": 1133890.875,
345
  "learning_rate": 1.5579710144927537e-05,
346
- "loss": 0.003,
347
  "step": 725
348
  },
349
  {
350
  "epoch": 15.957446808510639,
351
- "grad_norm": 1333124.375,
352
  "learning_rate": 1.3768115942028985e-05,
353
- "loss": 0.003,
354
  "step": 750
355
  },
356
  {
357
  "epoch": 16.0,
358
- "eval_loss": 0.012206021696329117,
359
- "eval_rmse": 0.11044398695230484,
360
- "eval_runtime": 4.7907,
361
- "eval_samples_per_second": 109.169,
362
- "eval_steps_per_second": 1.879,
363
  "step": 752
364
  },
365
  {
366
  "epoch": 16.48936170212766,
367
- "grad_norm": 1097979.375,
368
  "learning_rate": 1.1956521739130435e-05,
369
- "loss": 0.0044,
370
  "step": 775
371
  },
372
  {
373
  "epoch": 17.0,
374
- "eval_loss": 0.009473947808146477,
375
- "eval_rmse": 0.09737709909677505,
376
- "eval_runtime": 4.979,
377
- "eval_samples_per_second": 105.041,
378
- "eval_steps_per_second": 1.808,
379
  "step": 799
380
  },
381
  {
382
  "epoch": 17.02127659574468,
383
- "grad_norm": 1345082.75,
384
  "learning_rate": 1.0144927536231885e-05,
385
- "loss": 0.0023,
386
  "step": 800
387
  },
388
  {
389
  "epoch": 17.5531914893617,
390
- "grad_norm": 252615.640625,
391
  "learning_rate": 8.333333333333334e-06,
392
- "loss": 0.0019,
393
  "step": 825
394
  },
395
  {
396
  "epoch": 18.0,
397
- "eval_loss": 0.0081509193405509,
398
- "eval_rmse": 0.09029744565486908,
399
- "eval_runtime": 5.0468,
400
- "eval_samples_per_second": 103.629,
401
- "eval_steps_per_second": 1.783,
402
  "step": 846
403
  },
404
  {
405
  "epoch": 18.085106382978722,
406
- "grad_norm": 301606.09375,
407
  "learning_rate": 6.521739130434783e-06,
408
- "loss": 0.0017,
409
  "step": 850
410
  },
411
  {
412
  "epoch": 18.617021276595743,
413
- "grad_norm": 594859.5625,
414
  "learning_rate": 4.710144927536232e-06,
415
- "loss": 0.0018,
416
  "step": 875
417
  },
418
  {
419
  "epoch": 19.0,
420
- "eval_loss": 0.009004783816635609,
421
- "eval_rmse": 0.09493986517190933,
422
- "eval_runtime": 4.9612,
423
- "eval_samples_per_second": 105.419,
424
- "eval_steps_per_second": 1.814,
425
  "step": 893
426
  },
427
  {
428
  "epoch": 19.148936170212767,
429
- "grad_norm": 798350.8125,
430
  "learning_rate": 2.898550724637681e-06,
431
- "loss": 0.0017,
432
  "step": 900
433
  },
434
  {
435
  "epoch": 19.680851063829788,
436
- "grad_norm": 109003.640625,
437
  "learning_rate": 1.0869565217391306e-06,
438
- "loss": 0.0014,
439
  "step": 925
440
  },
441
  {
442
  "epoch": 20.0,
443
- "eval_loss": 0.008117899298667908,
444
- "eval_rmse": 0.09012944251298904,
445
- "eval_runtime": 4.9089,
446
- "eval_samples_per_second": 106.541,
447
- "eval_steps_per_second": 1.833,
448
  "step": 940
449
  },
450
  {
451
  "epoch": 20.0,
452
  "step": 940,
453
  "total_flos": 0.0,
454
- "train_loss": 0.028548606136377823,
455
- "train_runtime": 1786.5714,
456
- "train_samples_per_second": 33.17,
457
- "train_steps_per_second": 0.526
458
  }
459
  ],
460
  "logging_steps": 25,
 
1
  {
2
+ "best_metric": 0.012980646453797817,
3
  "best_model_checkpoint": "/mnt/ml_drive/kcardenas/limbxy_ext_crop_hands/checkpoint-940",
4
  "epoch": 20.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.5319148936170213,
13
+ "grad_norm": 623358.4375,
14
  "learning_rate": 5e-06,
15
+ "loss": 0.2234,
16
  "step": 25
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_loss": 0.15381918847560883,
21
+ "eval_rmse": 0.3922234773635864,
22
+ "eval_runtime": 5.3503,
23
+ "eval_samples_per_second": 97.751,
24
+ "eval_steps_per_second": 1.682,
25
  "step": 47
26
  },
27
  {
28
  "epoch": 1.0638297872340425,
29
+ "grad_norm": 2976699.0,
30
  "learning_rate": 1e-05,
31
+ "loss": 0.1582,
32
  "step": 50
33
  },
34
  {
35
  "epoch": 1.5957446808510638,
36
+ "grad_norm": 3106934.25,
37
  "learning_rate": 1.5e-05,
38
+ "loss": 0.1462,
39
  "step": 75
40
  },
41
  {
42
  "epoch": 2.0,
43
+ "eval_loss": 0.07616347819566727,
44
+ "eval_rmse": 0.2758934795856476,
45
+ "eval_runtime": 4.8381,
46
+ "eval_samples_per_second": 108.101,
47
+ "eval_steps_per_second": 1.86,
48
  "step": 94
49
  },
50
  {
51
  "epoch": 2.127659574468085,
52
+ "grad_norm": 1030784.1875,
53
  "learning_rate": 2e-05,
54
+ "loss": 0.0747,
55
  "step": 100
56
  },
57
  {
58
  "epoch": 2.6595744680851063,
59
+ "grad_norm": 3752779.75,
60
  "learning_rate": 2.5e-05,
61
+ "loss": 0.0446,
62
  "step": 125
63
  },
64
  {
65
  "epoch": 3.0,
66
+ "eval_loss": 0.04878520593047142,
67
+ "eval_rmse": 0.22092542052268982,
68
+ "eval_runtime": 4.8409,
69
+ "eval_samples_per_second": 108.038,
70
+ "eval_steps_per_second": 1.859,
71
  "step": 141
72
  },
73
  {
74
  "epoch": 3.1914893617021276,
75
+ "grad_norm": 4766084.5,
76
  "learning_rate": 3e-05,
77
+ "loss": 0.0437,
78
  "step": 150
79
  },
80
  {
81
  "epoch": 3.723404255319149,
82
+ "grad_norm": 3292715.5,
83
  "learning_rate": 3.5e-05,
84
+ "loss": 0.0436,
85
  "step": 175
86
  },
87
  {
88
  "epoch": 4.0,
89
+ "eval_loss": 0.017620466649532318,
90
+ "eval_rmse": 0.13286800682544708,
91
+ "eval_runtime": 4.9214,
92
+ "eval_samples_per_second": 106.271,
93
+ "eval_steps_per_second": 1.829,
94
  "step": 188
95
  },
96
  {
97
  "epoch": 4.25531914893617,
98
+ "grad_norm": 2775416.75,
99
  "learning_rate": 4e-05,
100
+ "loss": 0.0285,
101
  "step": 200
102
  },
103
  {
104
  "epoch": 4.787234042553192,
105
+ "grad_norm": 379769.03125,
106
  "learning_rate": 4.5e-05,
107
+ "loss": 0.0326,
108
  "step": 225
109
  },
110
  {
111
  "epoch": 5.0,
112
+ "eval_loss": 0.03224412351846695,
113
+ "eval_rmse": 0.1795557141304016,
114
+ "eval_runtime": 4.8651,
115
+ "eval_samples_per_second": 107.5,
116
+ "eval_steps_per_second": 1.85,
117
  "step": 235
118
  },
119
  {
120
  "epoch": 5.319148936170213,
121
+ "grad_norm": 1152975.25,
122
  "learning_rate": 5e-05,
123
+ "loss": 0.0314,
124
  "step": 250
125
  },
126
  {
127
  "epoch": 5.851063829787234,
128
+ "grad_norm": 2960943.0,
129
  "learning_rate": 4.818840579710145e-05,
130
+ "loss": 0.0212,
131
  "step": 275
132
  },
133
  {
134
  "epoch": 6.0,
135
+ "eval_loss": 0.01861540600657463,
136
+ "eval_rmse": 0.13647863268852234,
137
+ "eval_runtime": 5.0083,
138
+ "eval_samples_per_second": 104.427,
139
+ "eval_steps_per_second": 1.797,
140
  "step": 282
141
  },
142
  {
143
  "epoch": 6.382978723404255,
144
+ "grad_norm": 1633797.25,
145
  "learning_rate": 4.63768115942029e-05,
146
+ "loss": 0.0211,
147
  "step": 300
148
  },
149
  {
150
  "epoch": 6.914893617021277,
151
+ "grad_norm": 4293457.5,
152
  "learning_rate": 4.456521739130435e-05,
153
+ "loss": 0.0334,
154
  "step": 325
155
  },
156
  {
157
  "epoch": 7.0,
158
+ "eval_loss": 0.030206123366951942,
159
+ "eval_rmse": 0.1739334762096405,
160
+ "eval_runtime": 4.8758,
161
+ "eval_samples_per_second": 107.264,
162
+ "eval_steps_per_second": 1.846,
163
  "step": 329
164
  },
165
  {
166
  "epoch": 7.446808510638298,
167
+ "grad_norm": 4272689.0,
168
  "learning_rate": 4.27536231884058e-05,
169
+ "loss": 0.027,
170
  "step": 350
171
  },
172
  {
173
  "epoch": 7.9787234042553195,
174
+ "grad_norm": 1988792.5,
175
  "learning_rate": 4.094202898550725e-05,
176
+ "loss": 0.0247,
177
  "step": 375
178
  },
179
  {
180
  "epoch": 8.0,
181
+ "eval_loss": 0.03623066842556,
182
+ "eval_rmse": 0.19043287634849548,
183
+ "eval_runtime": 5.0165,
184
+ "eval_samples_per_second": 104.256,
185
+ "eval_steps_per_second": 1.794,
186
  "step": 376
187
  },
188
  {
189
  "epoch": 8.51063829787234,
190
+ "grad_norm": 1632416.25,
191
  "learning_rate": 3.91304347826087e-05,
192
+ "loss": 0.0238,
193
  "step": 400
194
  },
195
  {
196
  "epoch": 9.0,
197
+ "eval_loss": 0.015264267101883888,
198
+ "eval_rmse": 0.12361017614603043,
199
+ "eval_runtime": 4.9733,
200
+ "eval_samples_per_second": 105.161,
201
+ "eval_steps_per_second": 1.81,
202
  "step": 423
203
  },
204
  {
205
  "epoch": 9.042553191489361,
206
+ "grad_norm": 1837231.75,
207
  "learning_rate": 3.7318840579710144e-05,
208
+ "loss": 0.0182,
209
  "step": 425
210
  },
211
  {
212
  "epoch": 9.574468085106384,
213
+ "grad_norm": 3900810.5,
214
  "learning_rate": 3.5507246376811596e-05,
215
+ "loss": 0.0342,
216
  "step": 450
217
  },
218
  {
219
  "epoch": 10.0,
220
+ "eval_loss": 0.015248410403728485,
221
+ "eval_rmse": 0.1235644593834877,
222
+ "eval_runtime": 4.9635,
223
+ "eval_samples_per_second": 105.368,
224
+ "eval_steps_per_second": 1.813,
225
  "step": 470
226
  },
227
  {
228
  "epoch": 10.106382978723405,
229
+ "grad_norm": 2050042.25,
230
  "learning_rate": 3.369565217391305e-05,
231
+ "loss": 0.0186,
232
  "step": 475
233
  },
234
  {
235
  "epoch": 10.638297872340425,
236
+ "grad_norm": 3730581.0,
237
  "learning_rate": 3.188405797101449e-05,
238
+ "loss": 0.0188,
239
  "step": 500
240
  },
241
  {
242
  "epoch": 11.0,
243
+ "eval_loss": 0.01616613008081913,
244
+ "eval_rmse": 0.12722349166870117,
245
+ "eval_runtime": 4.9524,
246
+ "eval_samples_per_second": 105.606,
247
+ "eval_steps_per_second": 1.817,
248
  "step": 517
249
  },
250
  {
251
  "epoch": 11.170212765957446,
252
+ "grad_norm": 865433.75,
253
  "learning_rate": 3.0072463768115944e-05,
254
+ "loss": 0.0163,
255
  "step": 525
256
  },
257
  {
258
  "epoch": 11.702127659574469,
259
+ "grad_norm": 658995.6875,
260
  "learning_rate": 2.826086956521739e-05,
261
+ "loss": 0.0165,
262
  "step": 550
263
  },
264
  {
265
  "epoch": 12.0,
266
+ "eval_loss": 0.022758642211556435,
267
+ "eval_rmse": 0.15094806253910065,
268
+ "eval_runtime": 4.8205,
269
+ "eval_samples_per_second": 108.496,
270
+ "eval_steps_per_second": 1.867,
271
  "step": 564
272
  },
273
  {
274
  "epoch": 12.23404255319149,
275
+ "grad_norm": 2234811.25,
276
  "learning_rate": 2.6449275362318844e-05,
277
+ "loss": 0.0179,
278
  "step": 575
279
  },
280
  {
281
  "epoch": 12.76595744680851,
282
+ "grad_norm": 1162747.5,
283
  "learning_rate": 2.4637681159420292e-05,
284
+ "loss": 0.0163,
285
  "step": 600
286
  },
287
  {
288
  "epoch": 13.0,
289
+ "eval_loss": 0.014487933367490768,
290
+ "eval_rmse": 0.12045549601316452,
291
+ "eval_runtime": 5.0569,
292
+ "eval_samples_per_second": 103.423,
293
+ "eval_steps_per_second": 1.78,
294
  "step": 611
295
  },
296
  {
297
  "epoch": 13.297872340425531,
298
+ "grad_norm": 4238076.0,
299
  "learning_rate": 2.282608695652174e-05,
300
+ "loss": 0.0176,
301
  "step": 625
302
  },
303
  {
304
  "epoch": 13.829787234042554,
305
+ "grad_norm": 172452.703125,
306
  "learning_rate": 2.101449275362319e-05,
307
+ "loss": 0.0154,
308
  "step": 650
309
  },
310
  {
311
  "epoch": 14.0,
312
+ "eval_loss": 0.013961490243673325,
313
+ "eval_rmse": 0.11822474002838135,
314
+ "eval_runtime": 5.1475,
315
+ "eval_samples_per_second": 101.603,
316
+ "eval_steps_per_second": 1.748,
317
  "step": 658
318
  },
319
  {
320
  "epoch": 14.361702127659575,
321
+ "grad_norm": 1203963.375,
322
  "learning_rate": 1.9202898550724637e-05,
323
+ "loss": 0.0154,
324
  "step": 675
325
  },
326
  {
327
  "epoch": 14.893617021276595,
328
+ "grad_norm": 1234547.5,
329
  "learning_rate": 1.739130434782609e-05,
330
+ "loss": 0.0168,
331
  "step": 700
332
  },
333
  {
334
  "epoch": 15.0,
335
+ "eval_loss": 0.014746125787496567,
336
+ "eval_rmse": 0.12149151414632797,
337
+ "eval_runtime": 4.9322,
338
+ "eval_samples_per_second": 106.038,
339
+ "eval_steps_per_second": 1.825,
340
  "step": 705
341
  },
342
  {
343
  "epoch": 15.425531914893616,
344
+ "grad_norm": 878691.75,
345
  "learning_rate": 1.5579710144927537e-05,
346
+ "loss": 0.0154,
347
  "step": 725
348
  },
349
  {
350
  "epoch": 15.957446808510639,
351
+ "grad_norm": 2425489.25,
352
  "learning_rate": 1.3768115942028985e-05,
353
+ "loss": 0.0156,
354
  "step": 750
355
  },
356
  {
357
  "epoch": 16.0,
358
+ "eval_loss": 0.014588478021323681,
359
+ "eval_rmse": 0.12084110826253891,
360
+ "eval_runtime": 5.2911,
361
+ "eval_samples_per_second": 98.846,
362
+ "eval_steps_per_second": 1.701,
363
  "step": 752
364
  },
365
  {
366
  "epoch": 16.48936170212766,
367
+ "grad_norm": 497963.0,
368
  "learning_rate": 1.1956521739130435e-05,
369
+ "loss": 0.0159,
370
  "step": 775
371
  },
372
  {
373
  "epoch": 17.0,
374
+ "eval_loss": 0.015692666172981262,
375
+ "eval_rmse": 0.12533892691135406,
376
+ "eval_runtime": 4.7367,
377
+ "eval_samples_per_second": 110.415,
378
+ "eval_steps_per_second": 1.9,
379
  "step": 799
380
  },
381
  {
382
  "epoch": 17.02127659574468,
383
+ "grad_norm": 1821127.75,
384
  "learning_rate": 1.0144927536231885e-05,
385
+ "loss": 0.0149,
386
  "step": 800
387
  },
388
  {
389
  "epoch": 17.5531914893617,
390
+ "grad_norm": 772281.125,
391
  "learning_rate": 8.333333333333334e-06,
392
+ "loss": 0.0145,
393
  "step": 825
394
  },
395
  {
396
  "epoch": 18.0,
397
+ "eval_loss": 0.014158854261040688,
398
+ "eval_rmse": 0.11905129253864288,
399
+ "eval_runtime": 4.935,
400
+ "eval_samples_per_second": 105.978,
401
+ "eval_steps_per_second": 1.824,
402
  "step": 846
403
  },
404
  {
405
  "epoch": 18.085106382978722,
406
+ "grad_norm": 480387.40625,
407
  "learning_rate": 6.521739130434783e-06,
408
+ "loss": 0.0133,
409
  "step": 850
410
  },
411
  {
412
  "epoch": 18.617021276595743,
413
+ "grad_norm": 622875.125,
414
  "learning_rate": 4.710144927536232e-06,
415
+ "loss": 0.0133,
416
  "step": 875
417
  },
418
  {
419
  "epoch": 19.0,
420
+ "eval_loss": 0.013214284554123878,
421
+ "eval_rmse": 0.11501393467187881,
422
+ "eval_runtime": 4.9198,
423
+ "eval_samples_per_second": 106.304,
424
+ "eval_steps_per_second": 1.829,
425
  "step": 893
426
  },
427
  {
428
  "epoch": 19.148936170212767,
429
+ "grad_norm": 335946.59375,
430
  "learning_rate": 2.898550724637681e-06,
431
+ "loss": 0.0133,
432
  "step": 900
433
  },
434
  {
435
  "epoch": 19.680851063829788,
436
+ "grad_norm": 479721.125,
437
  "learning_rate": 1.0869565217391306e-06,
438
+ "loss": 0.0127,
439
  "step": 925
440
  },
441
  {
442
  "epoch": 20.0,
443
+ "eval_loss": 0.012980646453797817,
444
+ "eval_rmse": 0.11399345099925995,
445
+ "eval_runtime": 4.9566,
446
+ "eval_samples_per_second": 105.517,
447
+ "eval_steps_per_second": 1.816,
448
  "step": 940
449
  },
450
  {
451
  "epoch": 20.0,
452
  "step": 940,
453
  "total_flos": 0.0,
454
+ "train_loss": 0.03554724605476602,
455
+ "train_runtime": 1780.6867,
456
+ "train_samples_per_second": 33.279,
457
+ "train_steps_per_second": 0.528
458
  }
459
  ],
460
  "logging_steps": 25,