DuongTrongChi commited on
Commit
50b703f
·
verified ·
1 Parent(s): 60ba539

Training in progress, step 337, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e0972e809f636f714cabaf7760abced4e8ac62b9b0d7d1e5af1957fd879d856
3
  size 73911112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fa01b3abb7656551224fc4fbeb611a0886c86685343fa6406fe831c1db71f04
3
  size 73911112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39e62a3caea9d1213a790ce42d8942f8d073a999f93a21cc68ae51d4b4cda784
3
  size 37431220
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88fab551c9bf6c469ad35ab513b4416c08bfdf970747a1f23bd5b53582b807c8
3
  size 37431220
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72ad2ac46faf761aed288b5b7843e4fc844c5885618fe90eebd496352649b23c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b12f765a366db773a3e801ac0d1a41ae53b843966dba1e15b6484a9f4d83233f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9080932784636488,
5
  "eval_steps": 500,
6
- "global_step": 331,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2324,6 +2324,48 @@
2324
  "learning_rate": 2.5e-05,
2325
  "loss": 1.1528,
2326
  "step": 331
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2327
  }
2328
  ],
2329
  "logging_steps": 1,
@@ -2343,7 +2385,7 @@
2343
  "attributes": {}
2344
  }
2345
  },
2346
- "total_flos": 4.1040816234142925e+17,
2347
  "train_batch_size": 4,
2348
  "trial_name": null,
2349
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9245541838134431,
5
  "eval_steps": 500,
6
+ "global_step": 337,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2324
  "learning_rate": 2.5e-05,
2325
  "loss": 1.1528,
2326
  "step": 331
2327
+ },
2328
+ {
2329
+ "epoch": 0.9108367626886146,
2330
+ "grad_norm": 0.16983263194561005,
2331
+ "learning_rate": 2.4242424242424244e-05,
2332
+ "loss": 1.1711,
2333
+ "step": 332
2334
+ },
2335
+ {
2336
+ "epoch": 0.9135802469135802,
2337
+ "grad_norm": 0.15927070379257202,
2338
+ "learning_rate": 2.3484848484848487e-05,
2339
+ "loss": 1.1595,
2340
+ "step": 333
2341
+ },
2342
+ {
2343
+ "epoch": 0.9163237311385459,
2344
+ "grad_norm": 0.16879941523075104,
2345
+ "learning_rate": 2.272727272727273e-05,
2346
+ "loss": 1.1557,
2347
+ "step": 334
2348
+ },
2349
+ {
2350
+ "epoch": 0.9190672153635117,
2351
+ "grad_norm": 0.15948091447353363,
2352
+ "learning_rate": 2.1969696969696972e-05,
2353
+ "loss": 1.149,
2354
+ "step": 335
2355
+ },
2356
+ {
2357
+ "epoch": 0.9218106995884774,
2358
+ "grad_norm": 0.16563957929611206,
2359
+ "learning_rate": 2.1212121212121215e-05,
2360
+ "loss": 1.1512,
2361
+ "step": 336
2362
+ },
2363
+ {
2364
+ "epoch": 0.9245541838134431,
2365
+ "grad_norm": 0.16133056581020355,
2366
+ "learning_rate": 2.0454545454545457e-05,
2367
+ "loss": 1.1601,
2368
+ "step": 337
2369
  }
2370
  ],
2371
  "logging_steps": 1,
 
2385
  "attributes": {}
2386
  }
2387
  },
2388
+ "total_flos": 4.1769332820525466e+17,
2389
  "train_batch_size": 4,
2390
  "trial_name": null,
2391
  "trial_params": null