leixa commited on
Commit
154c2f4
·
verified ·
1 Parent(s): 8aab5c3

Training in progress, step 150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:038346555883593068d400099f443ffbe19927b96a76193c9f52144a8eeaf1ea
3
  size 95402480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84cb9b4e3cc2701b2275671820e05cc4b29e6a7942990b4f01dc83ab2b9b20ad
3
  size 95402480
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bc08782f957537aa75152c9aaa95fbeba86f1175ac2bc7ade44bed1d68365ef
3
  size 48843572
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fcb6169f724813efc0836f537d87a9c6d31737ad8f1e2b3646c99a21b1e3f8b
3
  size 48843572
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:001ebc33bb12515ad9bf608baa754af5e1565c46e663d6cda25210150092d5bf
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2a7f225c9ae6999207d2502cf8d9088baf71b692579a31c0187571fc4d1e4ee
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:431c6c53bdba57f808cd9c2e6f738bda5a26247416d68b6f96cb4f3eef6f54ca
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1320cc0c91c904e1b37627facbc7bdd09ad072c5e9a4fe376e5569a5cdf3a73
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.48639455782312924,
5
  "eval_steps": 13,
6
- "global_step": 143,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -432,6 +432,27 @@
432
  "eval_samples_per_second": 22.549,
433
  "eval_steps_per_second": 2.824,
434
  "step": 143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
435
  }
436
  ],
437
  "logging_steps": 3,
@@ -446,12 +467,12 @@
446
  "should_evaluate": false,
447
  "should_log": false,
448
  "should_save": true,
449
- "should_training_stop": false
450
  },
451
  "attributes": {}
452
  }
453
  },
454
- "total_flos": 1.051690368195625e+17,
455
  "train_batch_size": 8,
456
  "trial_name": null,
457
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5102040816326531,
5
  "eval_steps": 13,
6
+ "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
432
  "eval_samples_per_second": 22.549,
433
  "eval_steps_per_second": 2.824,
434
  "step": 143
435
+ },
436
+ {
437
+ "epoch": 0.4897959183673469,
438
+ "grad_norm": 6.776644706726074,
439
+ "learning_rate": 2.262559558016325e-07,
440
+ "loss": 11.1286,
441
+ "step": 144
442
+ },
443
+ {
444
+ "epoch": 0.5,
445
+ "grad_norm": 11.629470825195312,
446
+ "learning_rate": 5.662812383859795e-08,
447
+ "loss": 11.9387,
448
+ "step": 147
449
+ },
450
+ {
451
+ "epoch": 0.5102040816326531,
452
+ "grad_norm": 8.106785774230957,
453
+ "learning_rate": 0.0,
454
+ "loss": 11.5558,
455
+ "step": 150
456
  }
457
  ],
458
  "logging_steps": 3,
 
467
  "should_evaluate": false,
468
  "should_log": false,
469
  "should_save": true,
470
+ "should_training_stop": true
471
  },
472
  "attributes": {}
473
  }
474
  },
475
+ "total_flos": 1.1030619465606758e+17,
476
  "train_batch_size": 8,
477
  "trial_name": null,
478
  "trial_params": null