ardaspear commited on
Commit
7bf2982
·
verified ·
1 Parent(s): 4ec8462

Training in progress, step 136, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c51bb1bd493fedab904030a7482e3c21bba78bd2533b6b08b9c73123b0a35d3a
3
  size 159967880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a720b6a6c737c244779f324a7a933055b8a6b1988a5156e8fd556928b0ac95dd
3
  size 159967880
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b99ee001fbe78e978d9a8949cfacde0ed77f2b37b4f24cd265a5eb26ee84e83d
3
  size 81730196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cef617551b0809384d4da27a93c36dcd3801b5789a571da1dce0485f8e7f3302
3
  size 81730196
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5232f719bd209899927dac4f0c85d3dc677af3c55e9d27e5ace57effddf642d1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e554c518f56e093745f46f37d2f62ebbef88c44083698dc01d4c782697c6d2b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c91934808157be4b4581cbac88c1dcb8ab73e7092f7b8aa05c4fbac8ab77615f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffd0ef2a827b219b75915f5a88a30c53ebe86f536eec93a6252baab983329eb7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.030542188001283284,
5
  "eval_steps": 17,
6
- "global_step": 119,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -344,6 +344,56 @@
344
  "eval_samples_per_second": 13.282,
345
  "eval_steps_per_second": 1.662,
346
  "step": 119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
  }
348
  ],
349
  "logging_steps": 3,
@@ -363,7 +413,7 @@
363
  "attributes": {}
364
  }
365
  },
366
- "total_flos": 1.7469611905646592e+17,
367
  "train_batch_size": 8,
368
  "trial_name": null,
369
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.03490535771575232,
5
  "eval_steps": 17,
6
+ "global_step": 136,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
344
  "eval_samples_per_second": 13.282,
345
  "eval_steps_per_second": 1.662,
346
  "step": 119
347
+ },
348
+ {
349
+ "epoch": 0.030798845043310877,
350
+ "grad_norm": 0.41164785623550415,
351
+ "learning_rate": 3.772572564296005e-05,
352
+ "loss": 2.2888,
353
+ "step": 120
354
+ },
355
+ {
356
+ "epoch": 0.031568816169393644,
357
+ "grad_norm": 0.3949745297431946,
358
+ "learning_rate": 3.533749813077677e-05,
359
+ "loss": 2.2659,
360
+ "step": 123
361
+ },
362
+ {
363
+ "epoch": 0.03233878729547642,
364
+ "grad_norm": 0.31058645248413086,
365
+ "learning_rate": 3.298534127791785e-05,
366
+ "loss": 2.3101,
367
+ "step": 126
368
+ },
369
+ {
370
+ "epoch": 0.03310875842155919,
371
+ "grad_norm": 0.3842872977256775,
372
+ "learning_rate": 3.0675041535377405e-05,
373
+ "loss": 2.1922,
374
+ "step": 129
375
+ },
376
+ {
377
+ "epoch": 0.03387872954764196,
378
+ "grad_norm": 0.37319129705429077,
379
+ "learning_rate": 2.8412282383075363e-05,
380
+ "loss": 2.251,
381
+ "step": 132
382
+ },
383
+ {
384
+ "epoch": 0.03464870067372473,
385
+ "grad_norm": 0.3240547180175781,
386
+ "learning_rate": 2.6202630348146324e-05,
387
+ "loss": 2.3064,
388
+ "step": 135
389
+ },
390
+ {
391
+ "epoch": 0.03490535771575232,
392
+ "eval_loss": 2.2887308597564697,
393
+ "eval_runtime": 494.1479,
394
+ "eval_samples_per_second": 13.281,
395
+ "eval_steps_per_second": 1.661,
396
+ "step": 136
397
  }
398
  ],
399
  "logging_steps": 3,
 
413
  "attributes": {}
414
  }
415
  },
416
+ "total_flos": 2.004061139364741e+17,
417
  "train_batch_size": 8,
418
  "trial_name": null,
419
  "trial_params": null