ardaspear commited on
Commit
613eb3c
·
verified ·
1 Parent(s): 0079a4c

Training in progress, step 153, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a720b6a6c737c244779f324a7a933055b8a6b1988a5156e8fd556928b0ac95dd
3
  size 159967880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcdf10964fbea563f7585add757a1eded783fdea527b5b19c6c581195c5c3eb6
3
  size 159967880
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cef617551b0809384d4da27a93c36dcd3801b5789a571da1dce0485f8e7f3302
3
  size 81730196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d9ce8f5d2c229bb84c86a6f32a33598b0f8c59ea40233c2dca6776e522db316
3
  size 81730196
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e554c518f56e093745f46f37d2f62ebbef88c44083698dc01d4c782697c6d2b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8376b1a21f60d7891b89de2e0d8b9ab5dc44b12e00ba3ecc733ed71c6230518d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffd0ef2a827b219b75915f5a88a30c53ebe86f536eec93a6252baab983329eb7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d99dc7a150ff6ed818c8735e9e9061e757b4b841b8d74bde2c9d7a2195ff136
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.03490535771575232,
5
  "eval_steps": 17,
6
- "global_step": 136,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -394,6 +394,56 @@
394
  "eval_samples_per_second": 13.281,
395
  "eval_steps_per_second": 1.661,
396
  "step": 136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
  }
398
  ],
399
  "logging_steps": 3,
@@ -413,7 +463,7 @@
413
  "attributes": {}
414
  }
415
  },
416
- "total_flos": 2.004061139364741e+17,
417
  "train_batch_size": 8,
418
  "trial_name": null,
419
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.03926852743022137,
5
  "eval_steps": 17,
6
+ "global_step": 153,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
394
  "eval_samples_per_second": 13.281,
395
  "eval_steps_per_second": 1.661,
396
  "step": 136
397
+ },
398
+ {
399
+ "epoch": 0.03541867179980751,
400
+ "grad_norm": 0.3809681534767151,
401
+ "learning_rate": 2.405152131093926e-05,
402
+ "loss": 2.3106,
403
+ "step": 138
404
+ },
405
+ {
406
+ "epoch": 0.03618864292589028,
407
+ "grad_norm": 0.3716019093990326,
408
+ "learning_rate": 2.196424713241637e-05,
409
+ "loss": 2.3156,
410
+ "step": 141
411
+ },
412
+ {
413
+ "epoch": 0.03695861405197305,
414
+ "grad_norm": 0.3578968048095703,
415
+ "learning_rate": 1.9945942635848748e-05,
416
+ "loss": 2.2621,
417
+ "step": 144
418
+ },
419
+ {
420
+ "epoch": 0.03772858517805582,
421
+ "grad_norm": 0.37218406796455383,
422
+ "learning_rate": 1.800157297483417e-05,
423
+ "loss": 2.3245,
424
+ "step": 147
425
+ },
426
+ {
427
+ "epoch": 0.03849855630413859,
428
+ "grad_norm": 0.3655867278575897,
429
+ "learning_rate": 1.6135921418712956e-05,
430
+ "loss": 2.2425,
431
+ "step": 150
432
+ },
433
+ {
434
+ "epoch": 0.03926852743022137,
435
+ "grad_norm": 0.33499404788017273,
436
+ "learning_rate": 1.435357758543015e-05,
437
+ "loss": 2.2394,
438
+ "step": 153
439
+ },
440
+ {
441
+ "epoch": 0.03926852743022137,
442
+ "eval_loss": 2.2859363555908203,
443
+ "eval_runtime": 494.1942,
444
+ "eval_samples_per_second": 13.28,
445
+ "eval_steps_per_second": 1.661,
446
+ "step": 153
447
  }
448
  ],
449
  "logging_steps": 3,
 
463
  "attributes": {}
464
  }
465
  },
466
+ "total_flos": 2.2578649349750784e+17,
467
  "train_batch_size": 8,
468
  "trial_name": null,
469
  "trial_params": null