DuongTrongChi commited on
Commit
7beec86
·
verified ·
1 Parent(s): 71feb52

Training in progress, step 77, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c7e9d3c3e74148fcbe3d1504ece756be6c3c6fe0d872ef0635623b925b7b3fb
3
  size 73911112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7b95e136cd383ff83a7fda7bce3f8d53c1f6228c6ca6113f38ec12331a42d32
3
  size 73911112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:214e3967dd76a65f1ee4e67fa3feb439083185eb59fbb68630ee9333c94d3e8d
3
  size 37430836
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dea7e4397d798a0d3605b4fe8bdc6a65d3a5f84c31a429d5720772de4b001d2
3
  size 37430836
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84d461e68a163c787fff150b87639893e5b81a899f6f8be260318c9e2277a5f4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:725ee499aaa0bc04b490ac3af0c734c514c976dd8cd2f204b00fdb43d2a90bf8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.20027434842249658,
5
  "eval_steps": 500,
6
- "global_step": 73,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -518,6 +518,34 @@
518
  "learning_rate": 0.000146,
519
  "loss": 1.3128,
520
  "step": 73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
521
  }
522
  ],
523
  "logging_steps": 1,
@@ -537,7 +565,7 @@
537
  "attributes": {}
538
  }
539
  },
540
- "total_flos": 9.034931398791168e+16,
541
  "train_batch_size": 4,
542
  "trial_name": null,
543
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.2112482853223594,
5
  "eval_steps": 500,
6
+ "global_step": 77,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
518
  "learning_rate": 0.000146,
519
  "loss": 1.3128,
520
  "step": 73
521
+ },
522
+ {
523
+ "epoch": 0.2030178326474623,
524
+ "grad_norm": 0.08815225213766098,
525
+ "learning_rate": 0.000148,
526
+ "loss": 1.3321,
527
+ "step": 74
528
+ },
529
+ {
530
+ "epoch": 0.205761316872428,
531
+ "grad_norm": 0.09394700825214386,
532
+ "learning_rate": 0.00015000000000000001,
533
+ "loss": 1.3341,
534
+ "step": 75
535
+ },
536
+ {
537
+ "epoch": 0.2085048010973937,
538
+ "grad_norm": 0.10041660070419312,
539
+ "learning_rate": 0.000152,
540
+ "loss": 1.2944,
541
+ "step": 76
542
+ },
543
+ {
544
+ "epoch": 0.2112482853223594,
545
+ "grad_norm": 0.09344102442264557,
546
+ "learning_rate": 0.000154,
547
+ "loss": 1.3226,
548
+ "step": 77
549
  }
550
  ],
551
  "logging_steps": 1,
 
565
  "attributes": {}
566
  }
567
  },
568
+ "total_flos": 9.536105939440435e+16,
569
  "train_batch_size": 4,
570
  "trial_name": null,
571
  "trial_params": null