lesso13 commited on
Commit
6000713
·
verified ·
1 Parent(s): c2ff7f0

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3aa94ece7f7c47c5b684fbfb60fe0fd9bfbdf64ed5cb737f3d4618a769438ad
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb4c70d0eb54db30135f9e1181d3f60ef363e7554f594d3aff36f5332327f77
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a420f9495ce9945bd903a46170b45dfbca0a2861bfb304aa84460493f6e3f421
3
  size 341314196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12ce5ff0caf4b3c5b30e1697d042e35909ab085e1846099cf7133e73c04fb15f
3
  size 341314196
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b60307a8384f8d4e62ca6a0bde4b4018d9c22875125249ce3404f126cd927cc
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fad1d524fc47db40541de4bb31ebd2dbf7fbe2adb11e5083bb2c6cc16b4b3293
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0585a94ca770852d904d0a057ee7d0f13731dc026d439d9add35f155aff77fb2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a29a98d7724cf179d3beb2d49a34c568e23cf47a86cb77f3cd39efdcdbcc2de
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.2177441269159317,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
- "epoch": 0.0637890708058686,
5
  "eval_steps": 50,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -144,6 +144,49 @@
144
  "eval_samples_per_second": 13.671,
145
  "eval_steps_per_second": 3.425,
146
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
  ],
149
  "logging_steps": 10,
@@ -172,7 +215,7 @@
172
  "attributes": {}
173
  }
174
  },
175
- "total_flos": 3.3564427127095296e+16,
176
  "train_batch_size": 4,
177
  "trial_name": null,
178
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.20021717250347137,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
+ "epoch": 0.0850520944078248,
5
  "eval_steps": 50,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
144
  "eval_samples_per_second": 13.671,
145
  "eval_steps_per_second": 3.425,
146
  "step": 150
147
+ },
148
+ {
149
+ "epoch": 0.06804167552625984,
150
+ "grad_norm": 0.3854157626628876,
151
+ "learning_rate": 0.00018310968873606635,
152
+ "loss": 0.1658,
153
+ "step": 160
154
+ },
155
+ {
156
+ "epoch": 0.07229428024665108,
157
+ "grad_norm": 0.4845024049282074,
158
+ "learning_rate": 0.0001777624095772184,
159
+ "loss": 0.1831,
160
+ "step": 170
161
+ },
162
+ {
163
+ "epoch": 0.07654688496704232,
164
+ "grad_norm": 0.9749467968940735,
165
+ "learning_rate": 0.0001720679471221826,
166
+ "loss": 0.1933,
167
+ "step": 180
168
+ },
169
+ {
170
+ "epoch": 0.08079948968743356,
171
+ "grad_norm": 0.8443153500556946,
172
+ "learning_rate": 0.00016605404421963453,
173
+ "loss": 0.2047,
174
+ "step": 190
175
+ },
176
+ {
177
+ "epoch": 0.0850520944078248,
178
+ "grad_norm": 1.4500705003738403,
179
+ "learning_rate": 0.00015975,
180
+ "loss": 0.2931,
181
+ "step": 200
182
+ },
183
+ {
184
+ "epoch": 0.0850520944078248,
185
+ "eval_loss": 0.20021717250347137,
186
+ "eval_runtime": 72.6074,
187
+ "eval_samples_per_second": 13.635,
188
+ "eval_steps_per_second": 3.416,
189
+ "step": 200
190
  }
191
  ],
192
  "logging_steps": 10,
 
215
  "attributes": {}
216
  }
217
  },
218
+ "total_flos": 4.47839968690176e+16,
219
  "train_batch_size": 4,
220
  "trial_name": null,
221
  "trial_params": null