DuongTrongChi commited on
Commit
8654b33
·
verified ·
1 Parent(s): 2335a02

Training in progress, step 357, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a375c24028700bd557f667d40973690c740ce729b6073aa79b5a7971a2f2cc78
3
  size 73911112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc2624a8ffcf1841a9421ca1f17277f4c8d9b072e238a8603e30cdfdae2cb8ca
3
  size 73911112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22f7b1a71febb4b763b6acf5823fda57ab9eca3184ec8cbb2a84cd959eb1b39b
3
  size 37431220
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:210d9ceabe7e26d81c6de62edaa8bf5373afa8e36c75a23d9b951a596eda2c05
3
  size 37431220
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77b94402f5dd54b877f7ef15b329b04f5ba33d08b944e588160c2e2559f18f1f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56085f5a51542351a94b27a3e2abb809af51b595632f72dc4fabbe92e8070a60
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9711934156378601,
5
  "eval_steps": 500,
6
- "global_step": 354,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2485,6 +2485,27 @@
2485
  "learning_rate": 7.5757575757575764e-06,
2486
  "loss": 1.175,
2487
  "step": 354
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2488
  }
2489
  ],
2490
  "logging_steps": 1,
@@ -2504,7 +2525,7 @@
2504
  "attributes": {}
2505
  }
2506
  },
2507
- "total_flos": 4.38308805541675e+17,
2508
  "train_batch_size": 4,
2509
  "trial_name": null,
2510
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9794238683127572,
5
  "eval_steps": 500,
6
+ "global_step": 357,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2485
  "learning_rate": 7.5757575757575764e-06,
2486
  "loss": 1.175,
2487
  "step": 354
2488
+ },
2489
+ {
2490
+ "epoch": 0.9739368998628258,
2491
+ "grad_norm": 0.16342799365520477,
2492
+ "learning_rate": 6.818181818181818e-06,
2493
+ "loss": 1.2179,
2494
+ "step": 355
2495
+ },
2496
+ {
2497
+ "epoch": 0.9766803840877915,
2498
+ "grad_norm": 0.1597851812839508,
2499
+ "learning_rate": 6.060606060606061e-06,
2500
+ "loss": 1.1208,
2501
+ "step": 356
2502
+ },
2503
+ {
2504
+ "epoch": 0.9794238683127572,
2505
+ "grad_norm": 0.16428720951080322,
2506
+ "learning_rate": 5.303030303030304e-06,
2507
+ "loss": 1.1197,
2508
+ "step": 357
2509
  }
2510
  ],
2511
  "logging_steps": 1,
 
2525
  "attributes": {}
2526
  }
2527
  },
2528
+ "total_flos": 4.4192512511902925e+17,
2529
  "train_batch_size": 4,
2530
  "trial_name": null,
2531
  "trial_params": null