DuongTrongChi commited on
Commit
ac94efe
·
verified ·
1 Parent(s): 62832c2

Training in progress, step 89, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f243f6b3616c8433386fc88ddf6327a577ba6287c4ed295e830c228fda35beb6
3
  size 73911112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4214ef0ee7f7d582fe2add79fbe93b96823ee44d17df992afcac821c8420d811
3
  size 73911112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4f72706d984d21bee0f467c65e8399002645accd78908dfaafc210e8643a0d2
3
  size 37430836
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26d1dcdcb23846ee16e878f1b8dc7af0f4f4ec206a90a4b73dde0ee17fe5e626
3
  size 37430836
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55d1c8adee9796452e76469ace59653b3f2ebccea698756c2275e8b6cf74ba43
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf4df7a6f01855e97e0632ba1247237d498e93e227d9db04c0584a2eb317fae9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.23045267489711935,
5
  "eval_steps": 500,
6
- "global_step": 84,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -595,6 +595,41 @@
595
  "learning_rate": 0.000168,
596
  "loss": 1.1982,
597
  "step": 84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
598
  }
599
  ],
600
  "logging_steps": 1,
@@ -614,7 +649,7 @@
614
  "attributes": {}
615
  }
616
  },
617
- "total_flos": 1.0405766029849805e+17,
618
  "train_batch_size": 4,
619
  "trial_name": null,
620
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.24417009602194786,
5
  "eval_steps": 500,
6
+ "global_step": 89,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
595
  "learning_rate": 0.000168,
596
  "loss": 1.1982,
597
  "step": 84
598
+ },
599
+ {
600
+ "epoch": 0.23319615912208505,
601
+ "grad_norm": 0.10137173533439636,
602
+ "learning_rate": 0.00017,
603
+ "loss": 1.2697,
604
+ "step": 85
605
+ },
606
+ {
607
+ "epoch": 0.23593964334705075,
608
+ "grad_norm": 0.09000196307897568,
609
+ "learning_rate": 0.000172,
610
+ "loss": 1.3122,
611
+ "step": 86
612
+ },
613
+ {
614
+ "epoch": 0.23868312757201646,
615
+ "grad_norm": 0.12272510677576065,
616
+ "learning_rate": 0.000174,
617
+ "loss": 1.3238,
618
+ "step": 87
619
+ },
620
+ {
621
+ "epoch": 0.24142661179698216,
622
+ "grad_norm": 0.10101604461669922,
623
+ "learning_rate": 0.00017600000000000002,
624
+ "loss": 1.3637,
625
+ "step": 88
626
+ },
627
+ {
628
+ "epoch": 0.24417009602194786,
629
+ "grad_norm": 0.10622192174196243,
630
+ "learning_rate": 0.00017800000000000002,
631
+ "loss": 1.2577,
632
+ "step": 89
633
  }
634
  ],
635
  "logging_steps": 1,
 
649
  "attributes": {}
650
  }
651
  },
652
+ "total_flos": 1.1049531277218202e+17,
653
  "train_batch_size": 4,
654
  "trial_name": null,
655
  "trial_params": null