lesso03 commited on
Commit
f2ad331
·
verified ·
1 Parent(s): 9906bdc

Training in progress, step 450, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0af1b765e71bd846d26acd0c4fd2bdb0482cb61872789ba0e8a4be3e2dbd5cc1
3
  size 645975704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6a0a5b55534350d56712884ba104a70662f3037e676f23fc4409aeb22caeccd
3
  size 645975704
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c4ad1f6d0ec8f5d9a8bbb869e2e3fdacbcd9fc504fb25bbe145c502c33b3a5a
3
  size 328468852
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa8e96f4a50676171d1e42c954a30811a105697cca5914f838c82970b37e9b2f
3
  size 328468852
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:716ce76fb7afaf5d474ae7ddddef1a9649af0a2191550281b695741b894128e4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e1dcbd4ca11878e182b2a443ac16eb53a385e713b9af55f8a27917cd0d74d93
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:303e83c678e93581d63a3650d04963a2cf914298a9658799e3e1f49cf7c8604f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f03fffc232f1b8a6305d6fa9076c1e1bca28e4d7666df088caa1812431cf9135
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.3000512421131134,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-400",
4
- "epoch": 0.350109409190372,
5
  "eval_steps": 50,
6
- "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -359,6 +359,49 @@
359
  "eval_samples_per_second": 10.029,
360
  "eval_steps_per_second": 2.523,
361
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
  }
363
  ],
364
  "logging_steps": 10,
@@ -387,7 +430,7 @@
387
  "attributes": {}
388
  }
389
  },
390
- "total_flos": 1.3090262709436416e+17,
391
  "train_batch_size": 4,
392
  "trial_name": null,
393
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.29808175563812256,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-450",
4
+ "epoch": 0.3938730853391685,
5
  "eval_steps": 50,
6
+ "global_step": 450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
359
  "eval_samples_per_second": 10.029,
360
  "eval_steps_per_second": 2.523,
361
  "step": 400
362
+ },
363
+ {
364
+ "epoch": 0.3588621444201313,
365
+ "grad_norm": 0.36653754115104675,
366
+ "learning_rate": 1.9384775070942844e-05,
367
+ "loss": 0.7181,
368
+ "step": 410
369
+ },
370
+ {
371
+ "epoch": 0.3676148796498906,
372
+ "grad_norm": 0.229636549949646,
373
+ "learning_rate": 1.5423118240122765e-05,
374
+ "loss": 0.5265,
375
+ "step": 420
376
+ },
377
+ {
378
+ "epoch": 0.37636761487964987,
379
+ "grad_norm": 0.2803157567977905,
380
+ "learning_rate": 1.188081932481891e-05,
381
+ "loss": 0.1247,
382
+ "step": 430
383
+ },
384
+ {
385
+ "epoch": 0.3851203501094092,
386
+ "grad_norm": 0.01452224887907505,
387
+ "learning_rate": 8.775136049276001e-06,
388
+ "loss": 0.0002,
389
+ "step": 440
390
+ },
391
+ {
392
+ "epoch": 0.3938730853391685,
393
+ "grad_norm": 0.00235812459141016,
394
+ "learning_rate": 6.121198990230306e-06,
395
+ "loss": 0.0002,
396
+ "step": 450
397
+ },
398
+ {
399
+ "epoch": 0.3938730853391685,
400
+ "eval_loss": 0.29808175563812256,
401
+ "eval_runtime": 47.9228,
402
+ "eval_samples_per_second": 10.037,
403
+ "eval_steps_per_second": 2.525,
404
+ "step": 450
405
  }
406
  ],
407
  "logging_steps": 10,
 
430
  "attributes": {}
431
  }
432
  },
433
+ "total_flos": 1.4734321501863936e+17,
434
  "train_batch_size": 4,
435
  "trial_name": null,
436
  "trial_params": null