DuongTrongChi commited on
Commit
0ce6e2b
·
verified ·
1 Parent(s): 7959aea

Training in progress, step 349, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29f690d14438f6e3cdf833fa2f4ab04a5fb0513845d4e9725f23ef543afb0ec1
3
  size 73911112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6778c35d7237cda41f150fe226d7ad2b42a18571675e22e19b37130222805000
3
  size 73911112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb96b39f96671416517824843ba7f65a0cc13e6fd2e4a1ff0f867425e9967b36
3
  size 37431220
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0a4ae9caa1f7c350f9c7e85a253ed14dc3b50d5c7f44e3ac1655fbd5849f58f
3
  size 37431220
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:248bdad94673d0171b6613a2054004ce1fbd7cc6609011663f62eb8bd70a3480
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ca002fadce970e2797672d78741b69114f9a264197843fa3e3305d06f80f89f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9465020576131687,
5
  "eval_steps": 500,
6
- "global_step": 345,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2422,6 +2422,34 @@
2422
  "learning_rate": 1.4393939393939396e-05,
2423
  "loss": 1.1708,
2424
  "step": 345
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2425
  }
2426
  ],
2427
  "logging_steps": 1,
@@ -2441,7 +2469,7 @@
2441
  "attributes": {}
2442
  }
2443
  },
2444
- "total_flos": 4.277394643660677e+17,
2445
  "train_batch_size": 4,
2446
  "trial_name": null,
2447
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9574759945130316,
5
  "eval_steps": 500,
6
+ "global_step": 349,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2422
  "learning_rate": 1.4393939393939396e-05,
2423
  "loss": 1.1708,
2424
  "step": 345
2425
+ },
2426
+ {
2427
+ "epoch": 0.9492455418381345,
2428
+ "grad_norm": 0.15814326703548431,
2429
+ "learning_rate": 1.3636363636363637e-05,
2430
+ "loss": 1.1695,
2431
+ "step": 346
2432
+ },
2433
+ {
2434
+ "epoch": 0.9519890260631001,
2435
+ "grad_norm": 0.15282317996025085,
2436
+ "learning_rate": 1.287878787878788e-05,
2437
+ "loss": 1.2195,
2438
+ "step": 347
2439
+ },
2440
+ {
2441
+ "epoch": 0.9547325102880658,
2442
+ "grad_norm": 0.16384905576705933,
2443
+ "learning_rate": 1.2121212121212122e-05,
2444
+ "loss": 1.1989,
2445
+ "step": 348
2446
+ },
2447
+ {
2448
+ "epoch": 0.9574759945130316,
2449
+ "grad_norm": 0.16359828412532806,
2450
+ "learning_rate": 1.1363636363636365e-05,
2451
+ "loss": 1.0967,
2452
+ "step": 349
2453
  }
2454
  ],
2455
  "logging_steps": 1,
 
2469
  "attributes": {}
2470
  }
2471
  },
2472
+ "total_flos": 4.323960827284808e+17,
2473
  "train_batch_size": 4,
2474
  "trial_name": null,
2475
  "trial_params": null