SystemAdmin123 commited on
Commit
f198c7c
·
verified ·
1 Parent(s): 1859deb

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63489c1e38f1ac5a2629813f26123fa744473881124f9e98887d7060c3c099d6
3
  size 4990951248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28b34625a8715b00a596425b15ee66b124a4a7c3839e8e8697370b2b5307b014
3
  size 4990951248
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d1646b92d5f7acc41fc74dccc5ebd7e37ff62d6dd42249826fea2808dd90f21
3
  size 559197598
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e102159806ad9af0c0ec6c0199516dcc7b34bd576b8eec9fe119f832253f214a
3
  size 559197598
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:daebfbcaf4226179e95a7a2928839a94f01a317bcb32a0021d250f6a3602bfce
3
  size 5640674860
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa5fe98727f7cf6b7ef710f9d8b2f8806f581b27b46fa2671c2078a23ec08ffa
3
  size 5640674860
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d37ebc224a7a4705cea1613dd8fd1f8f471218f082721f531c7167abec9ee3a5
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4aa11e1920a65bd6f9d3d7705a39b4eb273d4b1d13425f4d93bdc9a32a38cf4
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce78d042223b902aa422256a43e83b469f7892bf3a43f13e3a19d2fa900c9406
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b16a3f4fd0bbb2ef54173b8f8f9b473c05114aa603c01260cd9c7f8be1b6a9da
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f719e5477984d5abea386f4b60be21f4d2ad98970ac0de30ce8a64b3cdb8cb02
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47fb916367dfd73f6e83cfd68c3949b4fc2131be1fd25bfd542ffd71c9e3d87b
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97ed4133b2ea99a43dbced219876975f15888ea761a0db49a0afa3f636f6ca78
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98bbea1742a7fc36512eb73ae38f6fa9b654a9feabba8b189726de9331137352
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:776a540436bd1dedef12a8b7642374faac449931a1c15952fb54741875421490
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:898092e0bf0f6dbc7c89ddc78136fe76ce924561e71751216da91be467d8b5d1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.5806451612903225,
5
  "eval_steps": 20,
6
- "global_step": 80,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -103,6 +103,28 @@
103
  "eval_samples_per_second": 73.94,
104
  "eval_steps_per_second": 3.103,
105
  "step": 80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  }
107
  ],
108
  "logging_steps": 10,
@@ -122,7 +144,7 @@
122
  "attributes": {}
123
  }
124
  },
125
- "total_flos": 6.236887688885043e+16,
126
  "train_batch_size": 6,
127
  "trial_name": null,
128
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.225806451612903,
5
  "eval_steps": 20,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
103
  "eval_samples_per_second": 73.94,
104
  "eval_steps_per_second": 3.103,
105
  "step": 80
106
+ },
107
+ {
108
+ "epoch": 2.903225806451613,
109
+ "grad_norm": 32.25,
110
+ "learning_rate": 0.00019090065350491626,
111
+ "loss": 2.4592,
112
+ "step": 90
113
+ },
114
+ {
115
+ "epoch": 3.225806451612903,
116
+ "grad_norm": 61.25,
117
+ "learning_rate": 0.0001879473751206489,
118
+ "loss": 2.3718,
119
+ "step": 100
120
+ },
121
+ {
122
+ "epoch": 3.225806451612903,
123
+ "eval_loss": 2.7737066745758057,
124
+ "eval_runtime": 20.2807,
125
+ "eval_samples_per_second": 74.011,
126
+ "eval_steps_per_second": 3.106,
127
+ "step": 100
128
  }
129
  ],
130
  "logging_steps": 10,
 
144
  "attributes": {}
145
  }
146
  },
147
+ "total_flos": 7.791231950965965e+16,
148
  "train_batch_size": 6,
149
  "trial_name": null,
150
  "trial_params": null