c14kevincardenas commited on
Commit
33b0a22
·
verified ·
1 Parent(s): 567e3e4

Training in progress, epoch 1

Browse files
Files changed (33) hide show
  1. README.md +25 -28
  2. all_results.json +9 -9
  3. eval_results.json +5 -5
  4. model.safetensors +2 -2
  5. runs/Feb19_02-57-14_galactica.ad.cirange.net/events.out.tfevents.1739933839.galactica.ad.cirange.net.2734067.0 +3 -0
  6. runs/Feb19_03-54-03_galactica.ad.cirange.net/events.out.tfevents.1739937248.galactica.ad.cirange.net.2752720.0 +3 -0
  7. runs/Feb19_03-54-03_galactica.ad.cirange.net/events.out.tfevents.1739940943.galactica.ad.cirange.net.2752720.1 +3 -0
  8. runs/Feb19_04-55-54_galactica.ad.cirange.net/events.out.tfevents.1739940960.galactica.ad.cirange.net.2771453.0 +3 -0
  9. runs/Feb19_04-55-54_galactica.ad.cirange.net/events.out.tfevents.1739944602.galactica.ad.cirange.net.2771453.1 +3 -0
  10. runs/Feb19_05-56-53_galactica.ad.cirange.net/events.out.tfevents.1739944619.galactica.ad.cirange.net.2790190.0 +3 -0
  11. runs/Feb19_05-56-53_galactica.ad.cirange.net/events.out.tfevents.1739948266.galactica.ad.cirange.net.2790190.1 +3 -0
  12. runs/Feb19_06-57-57_galactica.ad.cirange.net/events.out.tfevents.1739948282.galactica.ad.cirange.net.2819429.0 +3 -0
  13. runs/Feb19_06-57-57_galactica.ad.cirange.net/events.out.tfevents.1739951967.galactica.ad.cirange.net.2819429.1 +3 -0
  14. runs/Feb19_07-59-38_galactica.ad.cirange.net/events.out.tfevents.1739951984.galactica.ad.cirange.net.2838155.0 +3 -0
  15. runs/Feb19_07-59-38_galactica.ad.cirange.net/events.out.tfevents.1739955603.galactica.ad.cirange.net.2838155.1 +3 -0
  16. runs/Feb19_09-00-14_galactica.ad.cirange.net/events.out.tfevents.1739955625.galactica.ad.cirange.net.2856934.0 +3 -0
  17. runs/Feb19_09-00-14_galactica.ad.cirange.net/events.out.tfevents.1739959252.galactica.ad.cirange.net.2856934.1 +3 -0
  18. runs/Feb19_10-01-04_galactica.ad.cirange.net/events.out.tfevents.1739959269.galactica.ad.cirange.net.2875412.0 +3 -0
  19. runs/Feb19_10-01-04_galactica.ad.cirange.net/events.out.tfevents.1739962891.galactica.ad.cirange.net.2875412.1 +3 -0
  20. runs/Feb19_11-01-42_galactica.ad.cirange.net/events.out.tfevents.1739962907.galactica.ad.cirange.net.2894098.0 +3 -0
  21. runs/Feb19_11-01-42_galactica.ad.cirange.net/events.out.tfevents.1739966536.galactica.ad.cirange.net.2894098.1 +3 -0
  22. runs/Feb19_12-02-27_galactica.ad.cirange.net/events.out.tfevents.1739966552.galactica.ad.cirange.net.2912705.0 +3 -0
  23. runs/Feb19_12-02-27_galactica.ad.cirange.net/events.out.tfevents.1739970145.galactica.ad.cirange.net.2912705.1 +3 -0
  24. runs/Feb19_13-02-36_galactica.ad.cirange.net/events.out.tfevents.1739970161.galactica.ad.cirange.net.2931857.0 +3 -0
  25. runs/Feb19_13-02-36_galactica.ad.cirange.net/events.out.tfevents.1739973785.galactica.ad.cirange.net.2931857.1 +3 -0
  26. runs/Feb19_14-03-16_galactica.ad.cirange.net/events.out.tfevents.1739973802.galactica.ad.cirange.net.2950524.0 +3 -0
  27. runs/Feb19_14-03-16_galactica.ad.cirange.net/events.out.tfevents.1739977526.galactica.ad.cirange.net.2950524.1 +3 -0
  28. runs/Feb19_15-05-38_galactica.ad.cirange.net/events.out.tfevents.1739977543.galactica.ad.cirange.net.2969069.0 +3 -0
  29. runs/Feb19_16-06-47_galactica.ad.cirange.net/events.out.tfevents.1739981213.galactica.ad.cirange.net.2987544.0 +3 -0
  30. runs/Feb19_16-14-54_galactica.ad.cirange.net/events.out.tfevents.1739981699.galactica.ad.cirange.net.2991274.0 +3 -0
  31. train_results.json +4 -4
  32. trainer_state.json +244 -244
  33. training_args.bin +1 -1
README.md CHANGED
@@ -3,24 +3,21 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: c14kevincardenas/beit-large-patch16-384-limb
5
  tags:
6
- - image-regression
7
- - human-movement
8
- - vision
9
  - generated_from_trainer
10
  model-index:
11
- - name: limbxy_pose_2heads_1layers_8embeddim
12
  results: []
13
  ---
14
 
15
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
16
  should probably proofread and complete it, then remove this comment. -->
17
 
18
- # limbxy_pose_2heads_1layers_8embeddim
19
 
20
- This model is a fine-tuned version of [c14kevincardenas/beit-large-patch16-384-limb](https://huggingface.co/c14kevincardenas/beit-large-patch16-384-limb) on the c14kevincardenas/beta_caller_284_limbxy_pose dataset.
21
  It achieves the following results on the evaluation set:
22
- - Loss: 0.1391
23
- - Rmse: 0.3729
24
 
25
  ## Model description
26
 
@@ -53,26 +50,26 @@ The following hyperparameters were used during training:
53
 
54
  | Training Loss | Epoch | Step | Validation Loss | Rmse |
55
  |:-------------:|:-----:|:----:|:---------------:|:------:|
56
- | 0.214 | 1.0 | 89 | 0.1933 | 0.4396 |
57
- | 0.161 | 2.0 | 178 | 0.1474 | 0.3839 |
58
- | 0.1634 | 3.0 | 267 | 0.1541 | 0.3925 |
59
- | 0.1547 | 4.0 | 356 | 0.1495 | 0.3867 |
60
- | 0.1674 | 5.0 | 445 | 0.1680 | 0.4098 |
61
- | 0.1644 | 6.0 | 534 | 0.1473 | 0.3838 |
62
- | 0.1459 | 7.0 | 623 | 0.1405 | 0.3748 |
63
- | 0.1544 | 8.0 | 712 | 0.1514 | 0.3891 |
64
- | 0.1531 | 9.0 | 801 | 0.1419 | 0.3767 |
65
- | 0.1468 | 10.0 | 890 | 0.1422 | 0.3770 |
66
- | 0.143 | 11.0 | 979 | 0.1447 | 0.3804 |
67
- | 0.1467 | 12.0 | 1068 | 0.1597 | 0.3996 |
68
- | 0.149 | 13.0 | 1157 | 0.1394 | 0.3733 |
69
- | 0.139 | 14.0 | 1246 | 0.1412 | 0.3758 |
70
- | 0.1433 | 15.0 | 1335 | 0.1397 | 0.3738 |
71
- | 0.1426 | 16.0 | 1424 | 0.1396 | 0.3736 |
72
- | 0.1403 | 17.0 | 1513 | 0.1396 | 0.3737 |
73
- | 0.1393 | 18.0 | 1602 | 0.1393 | 0.3733 |
74
- | 0.1426 | 19.0 | 1691 | 0.1391 | 0.3730 |
75
- | 0.1387 | 20.0 | 1780 | 0.1391 | 0.3729 |
76
 
77
 
78
  ### Framework versions
 
3
  license: apache-2.0
4
  base_model: c14kevincardenas/beit-large-patch16-384-limb
5
  tags:
 
 
 
6
  - generated_from_trainer
7
  model-index:
8
+ - name: limbxy_pose_8heads_1layers_16embeddim
9
  results: []
10
  ---
11
 
12
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
  should probably proofread and complete it, then remove this comment. -->
14
 
15
+ # limbxy_pose_8heads_1layers_16embeddim
16
 
17
+ This model is a fine-tuned version of [c14kevincardenas/beit-large-patch16-384-limb](https://huggingface.co/c14kevincardenas/beit-large-patch16-384-limb) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 0.1402
20
+ - Rmse: 0.3745
21
 
22
  ## Model description
23
 
 
50
 
51
  | Training Loss | Epoch | Step | Validation Loss | Rmse |
52
  |:-------------:|:-----:|:----:|:---------------:|:------:|
53
+ | 0.3414 | 1.0 | 89 | 0.3311 | 0.5754 |
54
+ | 0.1834 | 2.0 | 178 | 0.2026 | 0.4501 |
55
+ | 0.1645 | 3.0 | 267 | 0.1491 | 0.3862 |
56
+ | 0.1729 | 4.0 | 356 | 0.2542 | 0.5042 |
57
+ | 0.1612 | 5.0 | 445 | 0.1539 | 0.3923 |
58
+ | 0.1578 | 6.0 | 534 | 0.1464 | 0.3827 |
59
+ | 0.1594 | 7.0 | 623 | 0.1860 | 0.4313 |
60
+ | 0.1546 | 8.0 | 712 | 0.1433 | 0.3785 |
61
+ | 0.1517 | 9.0 | 801 | 0.1416 | 0.3763 |
62
+ | 0.1461 | 10.0 | 890 | 0.1576 | 0.3969 |
63
+ | 0.1519 | 11.0 | 979 | 0.1623 | 0.4029 |
64
+ | 0.1491 | 12.0 | 1068 | 0.1411 | 0.3756 |
65
+ | 0.1489 | 13.0 | 1157 | 0.1416 | 0.3763 |
66
+ | 0.1425 | 14.0 | 1246 | 0.1426 | 0.3776 |
67
+ | 0.145 | 15.0 | 1335 | 0.1407 | 0.3751 |
68
+ | 0.1418 | 16.0 | 1424 | 0.1443 | 0.3799 |
69
+ | 0.1411 | 17.0 | 1513 | 0.1403 | 0.3745 |
70
+ | 0.1398 | 18.0 | 1602 | 0.1403 | 0.3746 |
71
+ | 0.143 | 19.0 | 1691 | 0.1405 | 0.3749 |
72
+ | 0.1395 | 20.0 | 1780 | 0.1402 | 0.3745 |
73
 
74
 
75
  ### Framework versions
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_loss": 0.13906994462013245,
4
- "eval_rmse": 0.37292084097862244,
5
- "eval_runtime": 9.5722,
6
- "eval_samples_per_second": 104.469,
7
- "eval_steps_per_second": 1.672,
8
  "total_flos": 0.0,
9
- "train_loss": 0.15599652267573924,
10
- "train_runtime": 3320.1272,
11
- "train_samples_per_second": 34.107,
12
- "train_steps_per_second": 0.536
13
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_loss": 0.1402168720960617,
4
+ "eval_rmse": 0.37445545196533203,
5
+ "eval_runtime": 9.652,
6
+ "eval_samples_per_second": 103.605,
7
+ "eval_steps_per_second": 1.658,
8
  "total_flos": 0.0,
9
+ "train_loss": 0.16565035152971075,
10
+ "train_runtime": 3332.9842,
11
+ "train_samples_per_second": 33.976,
12
+ "train_steps_per_second": 0.534
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_loss": 0.13906994462013245,
4
- "eval_rmse": 0.37292084097862244,
5
- "eval_runtime": 9.5722,
6
- "eval_samples_per_second": 104.469,
7
- "eval_steps_per_second": 1.672
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_loss": 0.1402168720960617,
4
+ "eval_rmse": 0.37445545196533203,
5
+ "eval_runtime": 9.652,
6
+ "eval_samples_per_second": 103.605,
7
+ "eval_steps_per_second": 1.658
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f930902133dc7897c174a9ecf134fc548403da98da0e5b7ab54e179493ea62cb
3
- size 1216514232
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34ca9281913611c6dabda111c789e7183251607472342d6b77b16908077a9095
3
+ size 1216102672
runs/Feb19_02-57-14_galactica.ad.cirange.net/events.out.tfevents.1739933839.galactica.ad.cirange.net.2734067.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:083c1df90e770553b1cbc1ec4236ed8471b565b24957337db6a35e742cfff98f
3
+ size 27897
runs/Feb19_03-54-03_galactica.ad.cirange.net/events.out.tfevents.1739937248.galactica.ad.cirange.net.2752720.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de08cf834243ea9be39fe68fc8a007ed30f482981045fc8761092f1807b7bbc6
3
+ size 27897
runs/Feb19_03-54-03_galactica.ad.cirange.net/events.out.tfevents.1739940943.galactica.ad.cirange.net.2752720.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9854903d30d50a32d347e9ef99ca01dd10f18b77297bd69b0b42823e335eb920
3
+ size 407
runs/Feb19_04-55-54_galactica.ad.cirange.net/events.out.tfevents.1739940960.galactica.ad.cirange.net.2771453.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52338c6c8014cac3af12abb6c0a089a44ea63a40a7acbf41445aba3dd88f9024
3
+ size 27896
runs/Feb19_04-55-54_galactica.ad.cirange.net/events.out.tfevents.1739944602.galactica.ad.cirange.net.2771453.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:912fb793845466b26a393df3a973dabac10bc45ba6122ce0502f9706eeda81b0
3
+ size 407
runs/Feb19_05-56-53_galactica.ad.cirange.net/events.out.tfevents.1739944619.galactica.ad.cirange.net.2790190.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a89a2ed63ce74c3b38f9924ec3dba25021ba91a8e6be549b75abbdd8ebd8fae
3
+ size 27897
runs/Feb19_05-56-53_galactica.ad.cirange.net/events.out.tfevents.1739948266.galactica.ad.cirange.net.2790190.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7004eb0e5b94605e043bf0cb4e48df317dc0ba2108f2655b1f1a61c95079924
3
+ size 407
runs/Feb19_06-57-57_galactica.ad.cirange.net/events.out.tfevents.1739948282.galactica.ad.cirange.net.2819429.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:024af1f858d12e5f7b12de38c37b32224f1c236e62f059cb7e3b3a0b5ea9e22d
3
+ size 27897
runs/Feb19_06-57-57_galactica.ad.cirange.net/events.out.tfevents.1739951967.galactica.ad.cirange.net.2819429.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4a017c0d7651db70a3ef3ef2b2d482349d014de3c5c8af992d45afcd17093b1
3
+ size 407
runs/Feb19_07-59-38_galactica.ad.cirange.net/events.out.tfevents.1739951984.galactica.ad.cirange.net.2838155.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca2c397d94bd5023766ea9fb0897952fca04089e59466e88d58d6061e3dc5972
3
+ size 27896
runs/Feb19_07-59-38_galactica.ad.cirange.net/events.out.tfevents.1739955603.galactica.ad.cirange.net.2838155.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:820971256c9c39f81b23da63a32e102499bc0271a24128744cfb6ab5469e632a
3
+ size 407
runs/Feb19_09-00-14_galactica.ad.cirange.net/events.out.tfevents.1739955625.galactica.ad.cirange.net.2856934.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3ccbc97c62f6670b269ad0ec2160edfff91cc3e7e2a66c3ad43bc4dabfdece4
3
+ size 27897
runs/Feb19_09-00-14_galactica.ad.cirange.net/events.out.tfevents.1739959252.galactica.ad.cirange.net.2856934.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f60df53da4af8480912dad62101e5f6fb47abdd554c4a863915103fccad085f
3
+ size 407
runs/Feb19_10-01-04_galactica.ad.cirange.net/events.out.tfevents.1739959269.galactica.ad.cirange.net.2875412.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2052a4f66ec9d5f9385ec59efe9233474848655f2650408319e410cef42baca
3
+ size 27897
runs/Feb19_10-01-04_galactica.ad.cirange.net/events.out.tfevents.1739962891.galactica.ad.cirange.net.2875412.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b443b2eb9fe23bffa7386c4fe14044de084a9a8e6dc608864891e99578cf721
3
+ size 407
runs/Feb19_11-01-42_galactica.ad.cirange.net/events.out.tfevents.1739962907.galactica.ad.cirange.net.2894098.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e43d05b2adeaf4bdf820bda8352c467e9cbb287fb81796a0ea6d8834c23833ee
3
+ size 27896
runs/Feb19_11-01-42_galactica.ad.cirange.net/events.out.tfevents.1739966536.galactica.ad.cirange.net.2894098.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e0722843c8b72848a7bdc416c2e6d3b84533c624b79f9f79ca60395366e21ab
3
+ size 407
runs/Feb19_12-02-27_galactica.ad.cirange.net/events.out.tfevents.1739966552.galactica.ad.cirange.net.2912705.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db36a5322e114742b2b0679bc52799905e4c6358f1c1af69202907693cacbd94
3
+ size 27897
runs/Feb19_12-02-27_galactica.ad.cirange.net/events.out.tfevents.1739970145.galactica.ad.cirange.net.2912705.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e777ff322861f267149c32dff760db76541cb1893b1e9d33a58bd479a4295156
3
+ size 407
runs/Feb19_13-02-36_galactica.ad.cirange.net/events.out.tfevents.1739970161.galactica.ad.cirange.net.2931857.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:953fc7534ef31c45d0af9f62aad250ff6f1f60eb5e065df2902cc055159011bf
3
+ size 27897
runs/Feb19_13-02-36_galactica.ad.cirange.net/events.out.tfevents.1739973785.galactica.ad.cirange.net.2931857.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:158ec9fb65261394bdf4f802a4fb820697f3cbd229cb6b3cec7baff10353ef2e
3
+ size 407
runs/Feb19_14-03-16_galactica.ad.cirange.net/events.out.tfevents.1739973802.galactica.ad.cirange.net.2950524.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7eb062f103adab23250585e761302ba65e68add5862e2e2000d667cbb3b9991a
3
+ size 27896
runs/Feb19_14-03-16_galactica.ad.cirange.net/events.out.tfevents.1739977526.galactica.ad.cirange.net.2950524.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:940f8816ecfa3ea1b3551981558a2a0e359c7866badf92870754656c3896988d
3
+ size 407
runs/Feb19_15-05-38_galactica.ad.cirange.net/events.out.tfevents.1739977543.galactica.ad.cirange.net.2969069.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a13e58db687d997262aeb0b120e96e40c6a7637fd2d55b4ca64e3d84d7de7fa
3
+ size 27897
runs/Feb19_16-06-47_galactica.ad.cirange.net/events.out.tfevents.1739981213.galactica.ad.cirange.net.2987544.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9084b9bf6793d0973695fcc11ac85613037bf1b6faf78e484a10a4649e06a57
3
+ size 8719
runs/Feb19_16-14-54_galactica.ad.cirange.net/events.out.tfevents.1739981699.galactica.ad.cirange.net.2991274.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3af983f73ae9542d8906d0eec536dc86745e3f17f2e0b5374e7d4410bbfe171
3
+ size 7141
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.15599652267573924,
5
- "train_runtime": 3320.1272,
6
- "train_samples_per_second": 34.107,
7
- "train_steps_per_second": 0.536
8
  }
 
1
  {
2
  "epoch": 20.0,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.16565035152971075,
5
+ "train_runtime": 3332.9842,
6
+ "train_samples_per_second": 33.976,
7
+ "train_steps_per_second": 0.534
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.13906994462013245,
3
  "best_model_checkpoint": "limbxy_pose/checkpoint-1780",
4
  "epoch": 20.0,
5
  "eval_steps": 500,
@@ -10,689 +10,689 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.2808988764044944,
13
- "grad_norm": 8.567283167615347e-13,
14
  "learning_rate": 5e-06,
15
  "loss": 0.3282,
16
  "step": 25
17
  },
18
  {
19
  "epoch": 0.5617977528089888,
20
- "grad_norm": 110866.890625,
21
  "learning_rate": 1e-05,
22
- "loss": 0.2853,
23
  "step": 50
24
  },
25
  {
26
  "epoch": 0.8426966292134831,
27
- "grad_norm": 1257591.75,
28
  "learning_rate": 1.5e-05,
29
- "loss": 0.214,
30
  "step": 75
31
  },
32
  {
33
  "epoch": 1.0,
34
- "eval_loss": 0.19327276945114136,
35
- "eval_rmse": 0.4396279752254486,
36
- "eval_runtime": 9.4099,
37
- "eval_samples_per_second": 106.272,
38
- "eval_steps_per_second": 1.7,
39
  "step": 89
40
  },
41
  {
42
  "epoch": 1.1235955056179776,
43
- "grad_norm": 1151241.875,
44
  "learning_rate": 2e-05,
45
- "loss": 0.1918,
46
  "step": 100
47
  },
48
  {
49
  "epoch": 1.404494382022472,
50
- "grad_norm": 2122279.75,
51
  "learning_rate": 2.5e-05,
52
- "loss": 0.1868,
53
  "step": 125
54
  },
55
  {
56
  "epoch": 1.6853932584269664,
57
- "grad_norm": 2307076.25,
58
  "learning_rate": 3e-05,
59
- "loss": 0.1788,
60
  "step": 150
61
  },
62
  {
63
  "epoch": 1.9662921348314608,
64
- "grad_norm": 1163768.75,
65
  "learning_rate": 3.5e-05,
66
- "loss": 0.161,
67
  "step": 175
68
  },
69
  {
70
  "epoch": 2.0,
71
- "eval_loss": 0.14737975597381592,
72
- "eval_rmse": 0.3839007019996643,
73
- "eval_runtime": 9.5512,
74
- "eval_samples_per_second": 104.698,
75
- "eval_steps_per_second": 1.675,
76
  "step": 178
77
  },
78
  {
79
  "epoch": 2.247191011235955,
80
- "grad_norm": 3384581.5,
81
  "learning_rate": 4e-05,
82
- "loss": 0.1542,
83
  "step": 200
84
  },
85
  {
86
  "epoch": 2.5280898876404496,
87
- "grad_norm": 5387857.0,
88
  "learning_rate": 4.5e-05,
89
- "loss": 0.1643,
90
  "step": 225
91
  },
92
  {
93
  "epoch": 2.808988764044944,
94
- "grad_norm": 2503687.0,
95
  "learning_rate": 5e-05,
96
- "loss": 0.1634,
97
  "step": 250
98
  },
99
  {
100
  "epoch": 3.0,
101
- "eval_loss": 0.15408433973789215,
102
- "eval_rmse": 0.3925357758998871,
103
- "eval_runtime": 9.5151,
104
- "eval_samples_per_second": 105.096,
105
- "eval_steps_per_second": 1.682,
106
  "step": 267
107
  },
108
  {
109
  "epoch": 3.0898876404494384,
110
- "grad_norm": 1989428.125,
111
  "learning_rate": 4.918300653594771e-05,
112
- "loss": 0.1626,
113
  "step": 275
114
  },
115
  {
116
  "epoch": 3.370786516853933,
117
- "grad_norm": 2154801.5,
118
  "learning_rate": 4.8366013071895424e-05,
119
- "loss": 0.1588,
120
  "step": 300
121
  },
122
  {
123
  "epoch": 3.6516853932584272,
124
- "grad_norm": 1134807.0,
125
  "learning_rate": 4.7549019607843135e-05,
126
- "loss": 0.1532,
127
  "step": 325
128
  },
129
  {
130
  "epoch": 3.932584269662921,
131
- "grad_norm": 2527826.5,
132
  "learning_rate": 4.673202614379085e-05,
133
- "loss": 0.1547,
134
  "step": 350
135
  },
136
  {
137
  "epoch": 4.0,
138
- "eval_loss": 0.1495393067598343,
139
- "eval_rmse": 0.38670313358306885,
140
- "eval_runtime": 9.5702,
141
- "eval_samples_per_second": 104.491,
142
- "eval_steps_per_second": 1.672,
143
  "step": 356
144
  },
145
  {
146
  "epoch": 4.213483146067416,
147
- "grad_norm": 830469.375,
148
  "learning_rate": 4.5915032679738564e-05,
149
- "loss": 0.1496,
150
  "step": 375
151
  },
152
  {
153
  "epoch": 4.49438202247191,
154
- "grad_norm": 823327.75,
155
  "learning_rate": 4.5098039215686275e-05,
156
- "loss": 0.1554,
157
  "step": 400
158
  },
159
  {
160
  "epoch": 4.775280898876405,
161
- "grad_norm": 1184849.0,
162
  "learning_rate": 4.4281045751633986e-05,
163
- "loss": 0.1674,
164
  "step": 425
165
  },
166
  {
167
  "epoch": 5.0,
168
- "eval_loss": 0.1679653525352478,
169
- "eval_rmse": 0.4098357558250427,
170
- "eval_runtime": 9.3594,
171
- "eval_samples_per_second": 106.845,
172
- "eval_steps_per_second": 1.71,
173
  "step": 445
174
  },
175
  {
176
  "epoch": 5.056179775280899,
177
- "grad_norm": 169527.875,
178
  "learning_rate": 4.3464052287581704e-05,
179
- "loss": 0.1676,
180
  "step": 450
181
  },
182
  {
183
  "epoch": 5.337078651685394,
184
- "grad_norm": 1563091.5,
185
  "learning_rate": 4.2647058823529415e-05,
186
- "loss": 0.1821,
187
  "step": 475
188
  },
189
  {
190
  "epoch": 5.617977528089888,
191
- "grad_norm": 1015313.25,
192
  "learning_rate": 4.1830065359477126e-05,
193
- "loss": 0.176,
194
  "step": 500
195
  },
196
  {
197
  "epoch": 5.898876404494382,
198
- "grad_norm": 2744912.25,
199
  "learning_rate": 4.101307189542484e-05,
200
- "loss": 0.1644,
201
  "step": 525
202
  },
203
  {
204
  "epoch": 6.0,
205
- "eval_loss": 0.14729027450084686,
206
- "eval_rmse": 0.3837841749191284,
207
- "eval_runtime": 9.5398,
208
- "eval_samples_per_second": 104.824,
209
- "eval_steps_per_second": 1.677,
210
  "step": 534
211
  },
212
  {
213
  "epoch": 6.179775280898877,
214
- "grad_norm": 116897.2265625,
215
  "learning_rate": 4.0196078431372555e-05,
216
- "loss": 0.1588,
217
  "step": 550
218
  },
219
  {
220
  "epoch": 6.460674157303371,
221
- "grad_norm": 1905296.75,
222
  "learning_rate": 3.9379084967320266e-05,
223
- "loss": 0.1517,
224
  "step": 575
225
  },
226
  {
227
  "epoch": 6.741573033707866,
228
- "grad_norm": 2599395.75,
229
  "learning_rate": 3.8562091503267977e-05,
230
- "loss": 0.1459,
231
  "step": 600
232
  },
233
  {
234
  "epoch": 7.0,
235
- "eval_loss": 0.1404775083065033,
236
- "eval_rmse": 0.3748033046722412,
237
- "eval_runtime": 9.6349,
238
- "eval_samples_per_second": 103.79,
239
- "eval_steps_per_second": 1.661,
240
  "step": 623
241
  },
242
  {
243
  "epoch": 7.022471910112359,
244
- "grad_norm": 2782315.75,
245
  "learning_rate": 3.774509803921569e-05,
246
- "loss": 0.1479,
247
  "step": 625
248
  },
249
  {
250
  "epoch": 7.303370786516854,
251
- "grad_norm": 1166099.875,
252
  "learning_rate": 3.6928104575163405e-05,
253
- "loss": 0.1553,
254
  "step": 650
255
  },
256
  {
257
  "epoch": 7.584269662921348,
258
- "grad_norm": 2184319.0,
259
  "learning_rate": 3.611111111111111e-05,
260
- "loss": 0.1522,
261
  "step": 675
262
  },
263
  {
264
  "epoch": 7.865168539325842,
265
- "grad_norm": 1279161.625,
266
  "learning_rate": 3.529411764705883e-05,
267
- "loss": 0.1544,
268
  "step": 700
269
  },
270
  {
271
  "epoch": 8.0,
272
- "eval_loss": 0.15137900412082672,
273
- "eval_rmse": 0.38907453417778015,
274
- "eval_runtime": 9.1535,
275
- "eval_samples_per_second": 109.248,
276
- "eval_steps_per_second": 1.748,
277
  "step": 712
278
  },
279
  {
280
  "epoch": 8.146067415730338,
281
- "grad_norm": 2559078.25,
282
  "learning_rate": 3.447712418300654e-05,
283
- "loss": 0.1521,
284
  "step": 725
285
  },
286
  {
287
  "epoch": 8.426966292134832,
288
- "grad_norm": 257444.171875,
289
  "learning_rate": 3.366013071895425e-05,
290
- "loss": 0.143,
291
  "step": 750
292
  },
293
  {
294
  "epoch": 8.707865168539326,
295
- "grad_norm": 927010.125,
296
  "learning_rate": 3.284313725490196e-05,
297
- "loss": 0.1477,
298
  "step": 775
299
  },
300
  {
301
  "epoch": 8.98876404494382,
302
- "grad_norm": 159431.078125,
303
  "learning_rate": 3.202614379084967e-05,
304
- "loss": 0.1531,
305
  "step": 800
306
  },
307
  {
308
  "epoch": 9.0,
309
- "eval_loss": 0.14193063974380493,
310
- "eval_rmse": 0.3767368197441101,
311
- "eval_runtime": 9.5112,
312
- "eval_samples_per_second": 105.139,
313
- "eval_steps_per_second": 1.682,
314
  "step": 801
315
  },
316
  {
317
  "epoch": 9.269662921348315,
318
- "grad_norm": 1847863.875,
319
  "learning_rate": 3.120915032679739e-05,
320
- "loss": 0.1485,
321
  "step": 825
322
  },
323
  {
324
  "epoch": 9.55056179775281,
325
- "grad_norm": 952230.1875,
326
  "learning_rate": 3.0392156862745097e-05,
327
- "loss": 0.154,
328
  "step": 850
329
  },
330
  {
331
  "epoch": 9.831460674157304,
332
- "grad_norm": 88050.4765625,
333
  "learning_rate": 2.957516339869281e-05,
334
- "loss": 0.1468,
335
  "step": 875
336
  },
337
  {
338
  "epoch": 10.0,
339
- "eval_loss": 0.14216481149196625,
340
- "eval_rmse": 0.37704750895500183,
341
- "eval_runtime": 9.4123,
342
- "eval_samples_per_second": 106.244,
343
- "eval_steps_per_second": 1.7,
344
  "step": 890
345
  },
346
  {
347
  "epoch": 10.112359550561798,
348
- "grad_norm": 837362.1875,
349
  "learning_rate": 2.8758169934640522e-05,
350
- "loss": 0.1504,
351
  "step": 900
352
  },
353
  {
354
  "epoch": 10.393258426966293,
355
- "grad_norm": 1949830.125,
356
  "learning_rate": 2.7941176470588236e-05,
357
- "loss": 0.1456,
358
  "step": 925
359
  },
360
  {
361
  "epoch": 10.674157303370787,
362
- "grad_norm": 3423746.0,
363
  "learning_rate": 2.7124183006535947e-05,
364
- "loss": 0.1445,
365
  "step": 950
366
  },
367
  {
368
  "epoch": 10.955056179775282,
369
- "grad_norm": 267509.53125,
370
  "learning_rate": 2.630718954248366e-05,
371
- "loss": 0.143,
372
  "step": 975
373
  },
374
  {
375
  "epoch": 11.0,
376
- "eval_loss": 0.14469632506370544,
377
- "eval_rmse": 0.3803896903991699,
378
- "eval_runtime": 9.2725,
379
- "eval_samples_per_second": 107.846,
380
- "eval_steps_per_second": 1.726,
381
  "step": 979
382
  },
383
  {
384
  "epoch": 11.235955056179776,
385
- "grad_norm": 1893457.375,
386
  "learning_rate": 2.5490196078431373e-05,
387
- "loss": 0.14,
388
  "step": 1000
389
  },
390
  {
391
  "epoch": 11.51685393258427,
392
- "grad_norm": 1374771.875,
393
  "learning_rate": 2.4673202614379087e-05,
394
- "loss": 0.1458,
395
  "step": 1025
396
  },
397
  {
398
  "epoch": 11.797752808988765,
399
- "grad_norm": 2660207.0,
400
  "learning_rate": 2.38562091503268e-05,
401
- "loss": 0.1467,
402
  "step": 1050
403
  },
404
  {
405
  "epoch": 12.0,
406
- "eval_loss": 0.1596774458885193,
407
- "eval_rmse": 0.39959660172462463,
408
- "eval_runtime": 9.2164,
409
- "eval_samples_per_second": 108.502,
410
- "eval_steps_per_second": 1.736,
411
  "step": 1068
412
  },
413
  {
414
  "epoch": 12.07865168539326,
415
- "grad_norm": 4057213.75,
416
  "learning_rate": 2.303921568627451e-05,
417
- "loss": 0.1556,
418
  "step": 1075
419
  },
420
  {
421
  "epoch": 12.359550561797754,
422
- "grad_norm": 551497.625,
423
  "learning_rate": 2.2222222222222223e-05,
424
- "loss": 0.1514,
425
  "step": 1100
426
  },
427
  {
428
  "epoch": 12.640449438202246,
429
- "grad_norm": 61427.015625,
430
  "learning_rate": 2.1405228758169934e-05,
431
- "loss": 0.1446,
432
  "step": 1125
433
  },
434
  {
435
  "epoch": 12.921348314606742,
436
- "grad_norm": 789958.0,
437
  "learning_rate": 2.058823529411765e-05,
438
- "loss": 0.149,
439
  "step": 1150
440
  },
441
  {
442
  "epoch": 13.0,
443
- "eval_loss": 0.1393509805202484,
444
- "eval_rmse": 0.37329748272895813,
445
- "eval_runtime": 9.3848,
446
- "eval_samples_per_second": 106.556,
447
- "eval_steps_per_second": 1.705,
448
  "step": 1157
449
  },
450
  {
451
  "epoch": 13.202247191011235,
452
- "grad_norm": 1956022.25,
453
  "learning_rate": 1.977124183006536e-05,
454
- "loss": 0.1439,
455
  "step": 1175
456
  },
457
  {
458
  "epoch": 13.48314606741573,
459
- "grad_norm": 1201509.0,
460
  "learning_rate": 1.895424836601307e-05,
461
- "loss": 0.1423,
462
  "step": 1200
463
  },
464
  {
465
  "epoch": 13.764044943820224,
466
- "grad_norm": 1603021.5,
467
  "learning_rate": 1.8137254901960785e-05,
468
- "loss": 0.139,
469
  "step": 1225
470
  },
471
  {
472
  "epoch": 14.0,
473
- "eval_loss": 0.14120306074619293,
474
- "eval_rmse": 0.3757699429988861,
475
- "eval_runtime": 9.4961,
476
- "eval_samples_per_second": 105.307,
477
- "eval_steps_per_second": 1.685,
478
  "step": 1246
479
  },
480
  {
481
  "epoch": 14.044943820224718,
482
- "grad_norm": 750973.0625,
483
  "learning_rate": 1.7320261437908496e-05,
484
- "loss": 0.1442,
485
  "step": 1250
486
  },
487
  {
488
  "epoch": 14.325842696629213,
489
- "grad_norm": 380421.21875,
490
  "learning_rate": 1.650326797385621e-05,
491
- "loss": 0.1413,
492
  "step": 1275
493
  },
494
  {
495
  "epoch": 14.606741573033707,
496
- "grad_norm": 451599.53125,
497
  "learning_rate": 1.568627450980392e-05,
498
- "loss": 0.141,
499
  "step": 1300
500
  },
501
  {
502
  "epoch": 14.887640449438202,
503
- "grad_norm": 1830802.625,
504
  "learning_rate": 1.4869281045751634e-05,
505
- "loss": 0.1433,
506
  "step": 1325
507
  },
508
  {
509
  "epoch": 15.0,
510
- "eval_loss": 0.13972964882850647,
511
- "eval_rmse": 0.3738043010234833,
512
- "eval_runtime": 9.4238,
513
- "eval_samples_per_second": 106.114,
514
- "eval_steps_per_second": 1.698,
515
  "step": 1335
516
  },
517
  {
518
  "epoch": 15.168539325842696,
519
- "grad_norm": 241513.265625,
520
  "learning_rate": 1.4052287581699347e-05,
521
- "loss": 0.142,
522
  "step": 1350
523
  },
524
  {
525
  "epoch": 15.44943820224719,
526
- "grad_norm": 1214894.5,
527
  "learning_rate": 1.323529411764706e-05,
528
  "loss": 0.145,
529
  "step": 1375
530
  },
531
  {
532
  "epoch": 15.730337078651685,
533
- "grad_norm": 692089.5625,
534
  "learning_rate": 1.2418300653594772e-05,
535
- "loss": 0.1426,
536
  "step": 1400
537
  },
538
  {
539
  "epoch": 16.0,
540
- "eval_loss": 0.13960076868534088,
541
- "eval_rmse": 0.37363186478614807,
542
- "eval_runtime": 9.48,
543
- "eval_samples_per_second": 105.485,
544
- "eval_steps_per_second": 1.688,
545
  "step": 1424
546
  },
547
  {
548
  "epoch": 16.01123595505618,
549
- "grad_norm": 21341.8984375,
550
  "learning_rate": 1.1601307189542485e-05,
551
- "loss": 0.1419,
552
  "step": 1425
553
  },
554
  {
555
  "epoch": 16.292134831460675,
556
- "grad_norm": 361124.03125,
557
  "learning_rate": 1.0784313725490197e-05,
558
- "loss": 0.1409,
559
  "step": 1450
560
  },
561
  {
562
  "epoch": 16.573033707865168,
563
- "grad_norm": 563769.3125,
564
  "learning_rate": 9.96732026143791e-06,
565
- "loss": 0.1427,
566
  "step": 1475
567
  },
568
  {
569
  "epoch": 16.853932584269664,
570
- "grad_norm": 67665.75,
571
  "learning_rate": 9.150326797385621e-06,
572
- "loss": 0.1403,
573
  "step": 1500
574
  },
575
  {
576
  "epoch": 17.0,
577
- "eval_loss": 0.13963405787944794,
578
- "eval_rmse": 0.3736764192581177,
579
- "eval_runtime": 9.4904,
580
- "eval_samples_per_second": 105.37,
581
- "eval_steps_per_second": 1.686,
582
  "step": 1513
583
  },
584
  {
585
  "epoch": 17.134831460674157,
586
- "grad_norm": 661210.5625,
587
  "learning_rate": 8.333333333333334e-06,
588
  "loss": 0.1424,
589
  "step": 1525
590
  },
591
  {
592
  "epoch": 17.415730337078653,
593
- "grad_norm": 708130.875,
594
  "learning_rate": 7.5163398692810456e-06,
595
- "loss": 0.1424,
596
  "step": 1550
597
  },
598
  {
599
  "epoch": 17.696629213483146,
600
- "grad_norm": 93833.1875,
601
  "learning_rate": 6.699346405228758e-06,
602
- "loss": 0.139,
603
  "step": 1575
604
  },
605
  {
606
  "epoch": 17.97752808988764,
607
- "grad_norm": 154237.1875,
608
  "learning_rate": 5.882352941176471e-06,
609
- "loss": 0.1393,
610
  "step": 1600
611
  },
612
  {
613
  "epoch": 18.0,
614
- "eval_loss": 0.139328271150589,
615
- "eval_rmse": 0.3732670545578003,
616
- "eval_runtime": 9.2376,
617
- "eval_samples_per_second": 108.253,
618
- "eval_steps_per_second": 1.732,
619
  "step": 1602
620
  },
621
  {
622
  "epoch": 18.258426966292134,
623
- "grad_norm": 451971.6875,
624
  "learning_rate": 5.065359477124184e-06,
625
- "loss": 0.1388,
626
  "step": 1625
627
  },
628
  {
629
  "epoch": 18.53932584269663,
630
- "grad_norm": 516973.15625,
631
  "learning_rate": 4.2483660130718954e-06,
632
- "loss": 0.1383,
633
  "step": 1650
634
  },
635
  {
636
  "epoch": 18.820224719101123,
637
- "grad_norm": 111973.78125,
638
  "learning_rate": 3.431372549019608e-06,
639
- "loss": 0.1426,
640
  "step": 1675
641
  },
642
  {
643
  "epoch": 19.0,
644
- "eval_loss": 0.13913311064243317,
645
- "eval_rmse": 0.3730055093765259,
646
- "eval_runtime": 9.3902,
647
- "eval_samples_per_second": 106.494,
648
- "eval_steps_per_second": 1.704,
649
  "step": 1691
650
  },
651
  {
652
  "epoch": 19.10112359550562,
653
- "grad_norm": 390392.875,
654
  "learning_rate": 2.6143790849673204e-06,
655
- "loss": 0.1404,
656
  "step": 1700
657
  },
658
  {
659
  "epoch": 19.382022471910112,
660
- "grad_norm": 167872.421875,
661
  "learning_rate": 1.7973856209150326e-06,
662
- "loss": 0.1402,
663
  "step": 1725
664
  },
665
  {
666
  "epoch": 19.662921348314608,
667
- "grad_norm": 446064.03125,
668
  "learning_rate": 9.80392156862745e-07,
669
- "loss": 0.14,
670
  "step": 1750
671
  },
672
  {
673
  "epoch": 19.9438202247191,
674
- "grad_norm": 133050.875,
675
  "learning_rate": 1.6339869281045752e-07,
676
- "loss": 0.1387,
677
  "step": 1775
678
  },
679
  {
680
  "epoch": 20.0,
681
- "eval_loss": 0.13906994462013245,
682
- "eval_rmse": 0.37292084097862244,
683
- "eval_runtime": 9.3255,
684
- "eval_samples_per_second": 107.233,
685
- "eval_steps_per_second": 1.716,
686
  "step": 1780
687
  },
688
  {
689
  "epoch": 20.0,
690
  "step": 1780,
691
  "total_flos": 0.0,
692
- "train_loss": 0.15599652267573924,
693
- "train_runtime": 3320.1272,
694
- "train_samples_per_second": 34.107,
695
- "train_steps_per_second": 0.536
696
  }
697
  ],
698
  "logging_steps": 25,
 
1
  {
2
+ "best_metric": 0.1402168720960617,
3
  "best_model_checkpoint": "limbxy_pose/checkpoint-1780",
4
  "epoch": 20.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.2808988764044944,
13
+ "grad_norm": 1.4306482431017398e-12,
14
  "learning_rate": 5e-06,
15
  "loss": 0.3282,
16
  "step": 25
17
  },
18
  {
19
  "epoch": 0.5617977528089888,
20
+ "grad_norm": 7.286453206209118e-14,
21
  "learning_rate": 1e-05,
22
+ "loss": 0.3304,
23
  "step": 50
24
  },
25
  {
26
  "epoch": 0.8426966292134831,
27
+ "grad_norm": 1.7020333059614146e-11,
28
  "learning_rate": 1.5e-05,
29
+ "loss": 0.3414,
30
  "step": 75
31
  },
32
  {
33
  "epoch": 1.0,
34
+ "eval_loss": 0.33112141489982605,
35
+ "eval_rmse": 0.5754314661026001,
36
+ "eval_runtime": 9.461,
37
+ "eval_samples_per_second": 105.697,
38
+ "eval_steps_per_second": 1.691,
39
  "step": 89
40
  },
41
  {
42
  "epoch": 1.1235955056179776,
43
+ "grad_norm": 4.0378258462508976e-13,
44
  "learning_rate": 2e-05,
45
+ "loss": 0.3408,
46
  "step": 100
47
  },
48
  {
49
  "epoch": 1.404494382022472,
50
+ "grad_norm": 38181.53515625,
51
  "learning_rate": 2.5e-05,
52
+ "loss": 0.2674,
53
  "step": 125
54
  },
55
  {
56
  "epoch": 1.6853932584269664,
57
+ "grad_norm": 1195767.75,
58
  "learning_rate": 3e-05,
59
+ "loss": 0.1862,
60
  "step": 150
61
  },
62
  {
63
  "epoch": 1.9662921348314608,
64
+ "grad_norm": 4560497.0,
65
  "learning_rate": 3.5e-05,
66
+ "loss": 0.1834,
67
  "step": 175
68
  },
69
  {
70
  "epoch": 2.0,
71
+ "eval_loss": 0.2025543600320816,
72
+ "eval_rmse": 0.4500603973865509,
73
+ "eval_runtime": 9.155,
74
+ "eval_samples_per_second": 109.23,
75
+ "eval_steps_per_second": 1.748,
76
  "step": 178
77
  },
78
  {
79
  "epoch": 2.247191011235955,
80
+ "grad_norm": 2378441.75,
81
  "learning_rate": 4e-05,
82
+ "loss": 0.1947,
83
  "step": 200
84
  },
85
  {
86
  "epoch": 2.5280898876404496,
87
+ "grad_norm": 1214816.375,
88
  "learning_rate": 4.5e-05,
89
+ "loss": 0.1846,
90
  "step": 225
91
  },
92
  {
93
  "epoch": 2.808988764044944,
94
+ "grad_norm": 596520.4375,
95
  "learning_rate": 5e-05,
96
+ "loss": 0.1645,
97
  "step": 250
98
  },
99
  {
100
  "epoch": 3.0,
101
+ "eval_loss": 0.14911922812461853,
102
+ "eval_rmse": 0.38615959882736206,
103
+ "eval_runtime": 9.1063,
104
+ "eval_samples_per_second": 109.815,
105
+ "eval_steps_per_second": 1.757,
106
  "step": 267
107
  },
108
  {
109
  "epoch": 3.0898876404494384,
110
+ "grad_norm": 789416.375,
111
  "learning_rate": 4.918300653594771e-05,
112
+ "loss": 0.1692,
113
  "step": 275
114
  },
115
  {
116
  "epoch": 3.370786516853933,
117
+ "grad_norm": 3581939.75,
118
  "learning_rate": 4.8366013071895424e-05,
119
+ "loss": 0.1954,
120
  "step": 300
121
  },
122
  {
123
  "epoch": 3.6516853932584272,
124
+ "grad_norm": 2062457.125,
125
  "learning_rate": 4.7549019607843135e-05,
126
+ "loss": 0.1924,
127
  "step": 325
128
  },
129
  {
130
  "epoch": 3.932584269662921,
131
+ "grad_norm": 2538814.0,
132
  "learning_rate": 4.673202614379085e-05,
133
+ "loss": 0.1729,
134
  "step": 350
135
  },
136
  {
137
  "epoch": 4.0,
138
+ "eval_loss": 0.2542487680912018,
139
+ "eval_rmse": 0.5042308568954468,
140
+ "eval_runtime": 9.4787,
141
+ "eval_samples_per_second": 105.5,
142
+ "eval_steps_per_second": 1.688,
143
  "step": 356
144
  },
145
  {
146
  "epoch": 4.213483146067416,
147
+ "grad_norm": 5207696.0,
148
  "learning_rate": 4.5915032679738564e-05,
149
+ "loss": 0.1845,
150
  "step": 375
151
  },
152
  {
153
  "epoch": 4.49438202247191,
154
+ "grad_norm": 2629999.75,
155
  "learning_rate": 4.5098039215686275e-05,
156
+ "loss": 0.1854,
157
  "step": 400
158
  },
159
  {
160
  "epoch": 4.775280898876405,
161
+ "grad_norm": 764233.0625,
162
  "learning_rate": 4.4281045751633986e-05,
163
+ "loss": 0.1612,
164
  "step": 425
165
  },
166
  {
167
  "epoch": 5.0,
168
+ "eval_loss": 0.15393178164958954,
169
+ "eval_rmse": 0.39234140515327454,
170
+ "eval_runtime": 9.1662,
171
+ "eval_samples_per_second": 109.097,
172
+ "eval_steps_per_second": 1.746,
173
  "step": 445
174
  },
175
  {
176
  "epoch": 5.056179775280899,
177
+ "grad_norm": 5359111.0,
178
  "learning_rate": 4.3464052287581704e-05,
179
+ "loss": 0.1679,
180
  "step": 450
181
  },
182
  {
183
  "epoch": 5.337078651685394,
184
+ "grad_norm": 5016195.0,
185
  "learning_rate": 4.2647058823529415e-05,
186
+ "loss": 0.1658,
187
  "step": 475
188
  },
189
  {
190
  "epoch": 5.617977528089888,
191
+ "grad_norm": 1101185.75,
192
  "learning_rate": 4.1830065359477126e-05,
193
+ "loss": 0.1567,
194
  "step": 500
195
  },
196
  {
197
  "epoch": 5.898876404494382,
198
+ "grad_norm": 2115526.0,
199
  "learning_rate": 4.101307189542484e-05,
200
+ "loss": 0.1578,
201
  "step": 525
202
  },
203
  {
204
  "epoch": 6.0,
205
+ "eval_loss": 0.14643920958042145,
206
+ "eval_rmse": 0.38267379999160767,
207
+ "eval_runtime": 9.2072,
208
+ "eval_samples_per_second": 108.61,
209
+ "eval_steps_per_second": 1.738,
210
  "step": 534
211
  },
212
  {
213
  "epoch": 6.179775280898877,
214
+ "grad_norm": 1441159.25,
215
  "learning_rate": 4.0196078431372555e-05,
216
+ "loss": 0.1535,
217
  "step": 550
218
  },
219
  {
220
  "epoch": 6.460674157303371,
221
+ "grad_norm": 1516853.375,
222
  "learning_rate": 3.9379084967320266e-05,
223
+ "loss": 0.164,
224
  "step": 575
225
  },
226
  {
227
  "epoch": 6.741573033707866,
228
+ "grad_norm": 668387.4375,
229
  "learning_rate": 3.8562091503267977e-05,
230
+ "loss": 0.1594,
231
  "step": 600
232
  },
233
  {
234
  "epoch": 7.0,
235
+ "eval_loss": 0.18601085245609283,
236
+ "eval_rmse": 0.43128976225852966,
237
+ "eval_runtime": 9.5674,
238
+ "eval_samples_per_second": 104.522,
239
+ "eval_steps_per_second": 1.672,
240
  "step": 623
241
  },
242
  {
243
  "epoch": 7.022471910112359,
244
+ "grad_norm": 3315089.5,
245
  "learning_rate": 3.774509803921569e-05,
246
+ "loss": 0.1673,
247
  "step": 625
248
  },
249
  {
250
  "epoch": 7.303370786516854,
251
+ "grad_norm": 178060.9375,
252
  "learning_rate": 3.6928104575163405e-05,
253
+ "loss": 0.1514,
254
  "step": 650
255
  },
256
  {
257
  "epoch": 7.584269662921348,
258
+ "grad_norm": 924463.8125,
259
  "learning_rate": 3.611111111111111e-05,
260
+ "loss": 0.1454,
261
  "step": 675
262
  },
263
  {
264
  "epoch": 7.865168539325842,
265
+ "grad_norm": 44355.859375,
266
  "learning_rate": 3.529411764705883e-05,
267
+ "loss": 0.1546,
268
  "step": 700
269
  },
270
  {
271
  "epoch": 8.0,
272
+ "eval_loss": 0.14328204095363617,
273
+ "eval_rmse": 0.37852615118026733,
274
+ "eval_runtime": 9.3971,
275
+ "eval_samples_per_second": 106.416,
276
+ "eval_steps_per_second": 1.703,
277
  "step": 712
278
  },
279
  {
280
  "epoch": 8.146067415730338,
281
+ "grad_norm": 2000040.75,
282
  "learning_rate": 3.447712418300654e-05,
283
+ "loss": 0.1503,
284
  "step": 725
285
  },
286
  {
287
  "epoch": 8.426966292134832,
288
+ "grad_norm": 1668223.5,
289
  "learning_rate": 3.366013071895425e-05,
290
+ "loss": 0.1469,
291
  "step": 750
292
  },
293
  {
294
  "epoch": 8.707865168539326,
295
+ "grad_norm": 969523.8125,
296
  "learning_rate": 3.284313725490196e-05,
297
+ "loss": 0.1525,
298
  "step": 775
299
  },
300
  {
301
  "epoch": 8.98876404494382,
302
+ "grad_norm": 399384.6875,
303
  "learning_rate": 3.202614379084967e-05,
304
+ "loss": 0.1517,
305
  "step": 800
306
  },
307
  {
308
  "epoch": 9.0,
309
+ "eval_loss": 0.14162829518318176,
310
+ "eval_rmse": 0.37633535265922546,
311
+ "eval_runtime": 9.2193,
312
+ "eval_samples_per_second": 108.468,
313
+ "eval_steps_per_second": 1.735,
314
  "step": 801
315
  },
316
  {
317
  "epoch": 9.269662921348315,
318
+ "grad_norm": 1699236.625,
319
  "learning_rate": 3.120915032679739e-05,
320
+ "loss": 0.1475,
321
  "step": 825
322
  },
323
  {
324
  "epoch": 9.55056179775281,
325
+ "grad_norm": 200852.296875,
326
  "learning_rate": 3.0392156862745097e-05,
327
+ "loss": 0.1481,
328
  "step": 850
329
  },
330
  {
331
  "epoch": 9.831460674157304,
332
+ "grad_norm": 837677.625,
333
  "learning_rate": 2.957516339869281e-05,
334
+ "loss": 0.1461,
335
  "step": 875
336
  },
337
  {
338
  "epoch": 10.0,
339
+ "eval_loss": 0.15756502747535706,
340
+ "eval_rmse": 0.3969446122646332,
341
+ "eval_runtime": 9.8254,
342
+ "eval_samples_per_second": 101.777,
343
+ "eval_steps_per_second": 1.628,
344
  "step": 890
345
  },
346
  {
347
  "epoch": 10.112359550561798,
348
+ "grad_norm": 1678043.0,
349
  "learning_rate": 2.8758169934640522e-05,
350
+ "loss": 0.1571,
351
  "step": 900
352
  },
353
  {
354
  "epoch": 10.393258426966293,
355
+ "grad_norm": 417715.75,
356
  "learning_rate": 2.7941176470588236e-05,
357
+ "loss": 0.1508,
358
  "step": 925
359
  },
360
  {
361
  "epoch": 10.674157303370787,
362
+ "grad_norm": 296561.8125,
363
  "learning_rate": 2.7124183006535947e-05,
364
+ "loss": 0.1456,
365
  "step": 950
366
  },
367
  {
368
  "epoch": 10.955056179775282,
369
+ "grad_norm": 1708742.5,
370
  "learning_rate": 2.630718954248366e-05,
371
+ "loss": 0.1519,
372
  "step": 975
373
  },
374
  {
375
  "epoch": 11.0,
376
+ "eval_loss": 0.1622958481311798,
377
+ "eval_rmse": 0.4028595983982086,
378
+ "eval_runtime": 9.6192,
379
+ "eval_samples_per_second": 103.958,
380
+ "eval_steps_per_second": 1.663,
381
  "step": 979
382
  },
383
  {
384
  "epoch": 11.235955056179776,
385
+ "grad_norm": 923283.0,
386
  "learning_rate": 2.5490196078431373e-05,
387
+ "loss": 0.1527,
388
  "step": 1000
389
  },
390
  {
391
  "epoch": 11.51685393258427,
392
+ "grad_norm": 78371.8671875,
393
  "learning_rate": 2.4673202614379087e-05,
394
+ "loss": 0.1499,
395
  "step": 1025
396
  },
397
  {
398
  "epoch": 11.797752808988765,
399
+ "grad_norm": 1358486.375,
400
  "learning_rate": 2.38562091503268e-05,
401
+ "loss": 0.1491,
402
  "step": 1050
403
  },
404
  {
405
  "epoch": 12.0,
406
+ "eval_loss": 0.14110486209392548,
407
+ "eval_rmse": 0.37563925981521606,
408
+ "eval_runtime": 9.459,
409
+ "eval_samples_per_second": 105.719,
410
+ "eval_steps_per_second": 1.692,
411
  "step": 1068
412
  },
413
  {
414
  "epoch": 12.07865168539326,
415
+ "grad_norm": 677287.375,
416
  "learning_rate": 2.303921568627451e-05,
417
+ "loss": 0.1458,
418
  "step": 1075
419
  },
420
  {
421
  "epoch": 12.359550561797754,
422
+ "grad_norm": 1671154.75,
423
  "learning_rate": 2.2222222222222223e-05,
424
+ "loss": 0.1446,
425
  "step": 1100
426
  },
427
  {
428
  "epoch": 12.640449438202246,
429
+ "grad_norm": 1566995.875,
430
  "learning_rate": 2.1405228758169934e-05,
431
+ "loss": 0.1449,
432
  "step": 1125
433
  },
434
  {
435
  "epoch": 12.921348314606742,
436
+ "grad_norm": 457053.46875,
437
  "learning_rate": 2.058823529411765e-05,
438
+ "loss": 0.1489,
439
  "step": 1150
440
  },
441
  {
442
  "epoch": 13.0,
443
+ "eval_loss": 0.1416008174419403,
444
+ "eval_rmse": 0.3762988746166229,
445
+ "eval_runtime": 9.5408,
446
+ "eval_samples_per_second": 104.813,
447
+ "eval_steps_per_second": 1.677,
448
  "step": 1157
449
  },
450
  {
451
  "epoch": 13.202247191011235,
452
+ "grad_norm": 1004067.375,
453
  "learning_rate": 1.977124183006536e-05,
454
+ "loss": 0.1459,
455
  "step": 1175
456
  },
457
  {
458
  "epoch": 13.48314606741573,
459
+ "grad_norm": 2169427.0,
460
  "learning_rate": 1.895424836601307e-05,
461
+ "loss": 0.1453,
462
  "step": 1200
463
  },
464
  {
465
  "epoch": 13.764044943820224,
466
+ "grad_norm": 408455.125,
467
  "learning_rate": 1.8137254901960785e-05,
468
+ "loss": 0.1425,
469
  "step": 1225
470
  },
471
  {
472
  "epoch": 14.0,
473
+ "eval_loss": 0.14256992936134338,
474
+ "eval_rmse": 0.3775843381881714,
475
+ "eval_runtime": 9.5439,
476
+ "eval_samples_per_second": 104.779,
477
+ "eval_steps_per_second": 1.676,
478
  "step": 1246
479
  },
480
  {
481
  "epoch": 14.044943820224718,
482
+ "grad_norm": 582970.4375,
483
  "learning_rate": 1.7320261437908496e-05,
484
+ "loss": 0.147,
485
  "step": 1250
486
  },
487
  {
488
  "epoch": 14.325842696629213,
489
+ "grad_norm": 524589.5625,
490
  "learning_rate": 1.650326797385621e-05,
491
+ "loss": 0.1424,
492
  "step": 1275
493
  },
494
  {
495
  "epoch": 14.606741573033707,
496
+ "grad_norm": 1342928.875,
497
  "learning_rate": 1.568627450980392e-05,
498
+ "loss": 0.1433,
499
  "step": 1300
500
  },
501
  {
502
  "epoch": 14.887640449438202,
503
+ "grad_norm": 1165010.25,
504
  "learning_rate": 1.4869281045751634e-05,
505
+ "loss": 0.145,
506
  "step": 1325
507
  },
508
  {
509
  "epoch": 15.0,
510
+ "eval_loss": 0.14070571959018707,
511
+ "eval_rmse": 0.37510761618614197,
512
+ "eval_runtime": 9.3011,
513
+ "eval_samples_per_second": 107.514,
514
+ "eval_steps_per_second": 1.72,
515
  "step": 1335
516
  },
517
  {
518
  "epoch": 15.168539325842696,
519
+ "grad_norm": 1536645.125,
520
  "learning_rate": 1.4052287581699347e-05,
521
+ "loss": 0.1397,
522
  "step": 1350
523
  },
524
  {
525
  "epoch": 15.44943820224719,
526
+ "grad_norm": 121041.3984375,
527
  "learning_rate": 1.323529411764706e-05,
528
  "loss": 0.145,
529
  "step": 1375
530
  },
531
  {
532
  "epoch": 15.730337078651685,
533
+ "grad_norm": 850638.375,
534
  "learning_rate": 1.2418300653594772e-05,
535
+ "loss": 0.1418,
536
  "step": 1400
537
  },
538
  {
539
  "epoch": 16.0,
540
+ "eval_loss": 0.144333153963089,
541
+ "eval_rmse": 0.37991204857826233,
542
+ "eval_runtime": 9.7712,
543
+ "eval_samples_per_second": 102.342,
544
+ "eval_steps_per_second": 1.637,
545
  "step": 1424
546
  },
547
  {
548
  "epoch": 16.01123595505618,
549
+ "grad_norm": 1658046.0,
550
  "learning_rate": 1.1601307189542485e-05,
551
+ "loss": 0.1432,
552
  "step": 1425
553
  },
554
  {
555
  "epoch": 16.292134831460675,
556
+ "grad_norm": 291296.34375,
557
  "learning_rate": 1.0784313725490197e-05,
558
+ "loss": 0.1422,
559
  "step": 1450
560
  },
561
  {
562
  "epoch": 16.573033707865168,
563
+ "grad_norm": 71275.6953125,
564
  "learning_rate": 9.96732026143791e-06,
565
+ "loss": 0.1429,
566
  "step": 1475
567
  },
568
  {
569
  "epoch": 16.853932584269664,
570
+ "grad_norm": 717235.9375,
571
  "learning_rate": 9.150326797385621e-06,
572
+ "loss": 0.1411,
573
  "step": 1500
574
  },
575
  {
576
  "epoch": 17.0,
577
+ "eval_loss": 0.1402604728937149,
578
+ "eval_rmse": 0.3745136559009552,
579
+ "eval_runtime": 9.4156,
580
+ "eval_samples_per_second": 106.207,
581
+ "eval_steps_per_second": 1.699,
582
  "step": 1513
583
  },
584
  {
585
  "epoch": 17.134831460674157,
586
+ "grad_norm": 956427.5625,
587
  "learning_rate": 8.333333333333334e-06,
588
  "loss": 0.1424,
589
  "step": 1525
590
  },
591
  {
592
  "epoch": 17.415730337078653,
593
+ "grad_norm": 391686.90625,
594
  "learning_rate": 7.5163398692810456e-06,
595
+ "loss": 0.1434,
596
  "step": 1550
597
  },
598
  {
599
  "epoch": 17.696629213483146,
600
+ "grad_norm": 178194.640625,
601
  "learning_rate": 6.699346405228758e-06,
602
+ "loss": 0.1396,
603
  "step": 1575
604
  },
605
  {
606
  "epoch": 17.97752808988764,
607
+ "grad_norm": 315803.8125,
608
  "learning_rate": 5.882352941176471e-06,
609
+ "loss": 0.1398,
610
  "step": 1600
611
  },
612
  {
613
  "epoch": 18.0,
614
+ "eval_loss": 0.14033755660057068,
615
+ "eval_rmse": 0.37461650371551514,
616
+ "eval_runtime": 9.5116,
617
+ "eval_samples_per_second": 105.135,
618
+ "eval_steps_per_second": 1.682,
619
  "step": 1602
620
  },
621
  {
622
  "epoch": 18.258426966292134,
623
+ "grad_norm": 201845.0,
624
  "learning_rate": 5.065359477124184e-06,
625
+ "loss": 0.1396,
626
  "step": 1625
627
  },
628
  {
629
  "epoch": 18.53932584269663,
630
+ "grad_norm": 346229.6875,
631
  "learning_rate": 4.2483660130718954e-06,
632
+ "loss": 0.1386,
633
  "step": 1650
634
  },
635
  {
636
  "epoch": 18.820224719101123,
637
+ "grad_norm": 75540.28125,
638
  "learning_rate": 3.431372549019608e-06,
639
+ "loss": 0.143,
640
  "step": 1675
641
  },
642
  {
643
  "epoch": 19.0,
644
+ "eval_loss": 0.14052481949329376,
645
+ "eval_rmse": 0.37486639618873596,
646
+ "eval_runtime": 9.3754,
647
+ "eval_samples_per_second": 106.662,
648
+ "eval_steps_per_second": 1.707,
649
  "step": 1691
650
  },
651
  {
652
  "epoch": 19.10112359550562,
653
+ "grad_norm": 240692.8125,
654
  "learning_rate": 2.6143790849673204e-06,
655
+ "loss": 0.1409,
656
  "step": 1700
657
  },
658
  {
659
  "epoch": 19.382022471910112,
660
+ "grad_norm": 214180.84375,
661
  "learning_rate": 1.7973856209150326e-06,
662
+ "loss": 0.141,
663
  "step": 1725
664
  },
665
  {
666
  "epoch": 19.662921348314608,
667
+ "grad_norm": 108064.1484375,
668
  "learning_rate": 9.80392156862745e-07,
669
+ "loss": 0.1407,
670
  "step": 1750
671
  },
672
  {
673
  "epoch": 19.9438202247191,
674
+ "grad_norm": 161086.03125,
675
  "learning_rate": 1.6339869281045752e-07,
676
+ "loss": 0.1395,
677
  "step": 1775
678
  },
679
  {
680
  "epoch": 20.0,
681
+ "eval_loss": 0.1402168720960617,
682
+ "eval_rmse": 0.37445545196533203,
683
+ "eval_runtime": 9.6855,
684
+ "eval_samples_per_second": 103.247,
685
+ "eval_steps_per_second": 1.652,
686
  "step": 1780
687
  },
688
  {
689
  "epoch": 20.0,
690
  "step": 1780,
691
  "total_flos": 0.0,
692
+ "train_loss": 0.16565035152971075,
693
+ "train_runtime": 3332.9842,
694
+ "train_samples_per_second": 33.976,
695
+ "train_steps_per_second": 0.534
696
  }
697
  ],
698
  "logging_steps": 25,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a833d6b8877da24229a846f77bfc460ab88620d8d896555622b43c3b2361c829
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05c8cfbfdac1a6fbec5f480d224bdc6c1661cce2f8a79aa338338c61cdc82d55
3
  size 5240