lesso05 commited on
Commit
08d03d8
·
verified ·
1 Parent(s): 82e077d

Training in progress, step 2000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6eb746b375916d993fdbd038976e286207ed000be26a8b6d441ff024e7356e19
3
  size 295488936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07bee832f06780a5be4e32fbb7f07653d8bf525239d17dbcf3682f3367f4ecad
3
  size 295488936
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28e375fef72ce3620c2a48a468e9ae068020b22d3a68f06824f1b0359423810c
3
  size 591208618
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:676fc96f6dd8b3bec153347640f6f09d1d511aea4f6654e3824ce1e9eebdb448
3
  size 591208618
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:985c3c780693d1e6bbb0e8f5ec9df28d42d9a9e6436b616b48b9e067157d0c2b
3
  size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47c8baa6870e4a9e07f813aaa5b50fca6773957dad188216f155ee519be011b2
3
  size 14308
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e3319cd7470c264bee54329e29b5ec91d9c9a4808c2d538532945e481104091
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:480dd149a7e539c29c101c27ba4934fb8fdb062e60fdebf07dc67a0615db8e80
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.07232455164194107,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-1500",
4
- "epoch": 1.5357051446122343,
5
  "eval_steps": 500,
6
- "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -249,6 +249,84 @@
249
  "eval_samples_per_second": 22.096,
250
  "eval_steps_per_second": 5.534,
251
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  }
253
  ],
254
  "logging_steps": 50,
@@ -272,12 +350,12 @@
272
  "should_evaluate": false,
273
  "should_log": false,
274
  "should_save": true,
275
- "should_training_stop": false
276
  },
277
  "attributes": {}
278
  }
279
  },
280
- "total_flos": 4.535802208242893e+17,
281
  "train_batch_size": 4,
282
  "trial_name": null,
283
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.07142073661088943,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-2000",
4
+ "epoch": 2.047606859482979,
5
  "eval_steps": 500,
6
+ "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
249
  "eval_samples_per_second": 22.096,
250
  "eval_steps_per_second": 5.534,
251
  "step": 1500
252
+ },
253
+ {
254
+ "epoch": 1.586895316099309,
255
+ "grad_norm": 0.05019761994481087,
256
+ "learning_rate": 2.708829915600401e-05,
257
+ "loss": 0.0643,
258
+ "step": 1550
259
+ },
260
+ {
261
+ "epoch": 1.6380854875863835,
262
+ "grad_norm": 0.05420317500829697,
263
+ "learning_rate": 2.161309778686967e-05,
264
+ "loss": 0.0586,
265
+ "step": 1600
266
+ },
267
+ {
268
+ "epoch": 1.689275659073458,
269
+ "grad_norm": 0.05063829943537712,
270
+ "learning_rate": 1.6690435978090822e-05,
271
+ "loss": 0.0623,
272
+ "step": 1650
273
+ },
274
+ {
275
+ "epoch": 1.7404658305605323,
276
+ "grad_norm": 0.05515941604971886,
277
+ "learning_rate": 1.2353940501334881e-05,
278
+ "loss": 0.06,
279
+ "step": 1700
280
+ },
281
+ {
282
+ "epoch": 1.7916560020476069,
283
+ "grad_norm": 0.05263717472553253,
284
+ "learning_rate": 8.633234017856605e-06,
285
+ "loss": 0.0612,
286
+ "step": 1750
287
+ },
288
+ {
289
+ "epoch": 1.8428461735346815,
290
+ "grad_norm": 0.05191710218787193,
291
+ "learning_rate": 5.55373272568495e-06,
292
+ "loss": 0.0592,
293
+ "step": 1800
294
+ },
295
+ {
296
+ "epoch": 1.8940363450217559,
297
+ "grad_norm": 0.055449921637773514,
298
+ "learning_rate": 3.1364727412186366e-06,
299
+ "loss": 0.0603,
300
+ "step": 1850
301
+ },
302
+ {
303
+ "epoch": 1.9452265165088303,
304
+ "grad_norm": 0.0455612912774086,
305
+ "learning_rate": 1.3979664012209619e-06,
306
+ "loss": 0.0621,
307
+ "step": 1900
308
+ },
309
+ {
310
+ "epoch": 1.9964166879959047,
311
+ "grad_norm": 0.09055913984775543,
312
+ "learning_rate": 3.5008946681634066e-07,
313
+ "loss": 0.0642,
314
+ "step": 1950
315
+ },
316
+ {
317
+ "epoch": 2.047606859482979,
318
+ "grad_norm": 0.0834808498620987,
319
+ "learning_rate": 0.0,
320
+ "loss": 0.0524,
321
+ "step": 2000
322
+ },
323
+ {
324
+ "epoch": 2.047606859482979,
325
+ "eval_loss": 0.07142073661088943,
326
+ "eval_runtime": 75.0403,
327
+ "eval_samples_per_second": 21.922,
328
+ "eval_steps_per_second": 5.49,
329
+ "step": 2000
330
  }
331
  ],
332
  "logging_steps": 50,
 
350
  "should_evaluate": false,
351
  "should_log": false,
352
  "should_save": true,
353
+ "should_training_stop": true
354
  },
355
  "attributes": {}
356
  }
357
  },
358
+ "total_flos": 6.036170126603059e+17,
359
  "train_batch_size": 4,
360
  "trial_name": null,
361
  "trial_params": null