MohamedAhmedAE commited on
Commit
1a2ed40
·
verified ·
1 Parent(s): 810896c

Training in progress, step 800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bff897368000117674ab917c547b982f586c6107309cfbb2c96448dc97e48098
3
  size 389081912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfda392f7a94c935387f9bf12a827b16f6fffd7d46c14248e603312888236d00
3
  size 389081912
last-checkpoint/global_step800/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0dabd7e346b6aceb3fd0e119cf55b4c38dc05a6fb22da69f8a6186be397daea
3
+ size 1167094117
last-checkpoint/global_step800/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed8f1bb9ff29c9f4cf6775190b74624fec599750d4793a75a4aef36014f2c490
3
+ size 1167094245
last-checkpoint/global_step800/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e90d3b640128be11fec02d7f42f6fcf9fd8023afb3081c6b5b5278004fb84ef
3
+ size 1222740115
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step600
 
1
+ global_step800
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e9d16aa7e6166e2439dbd61297667a7a16a2094ce8dd6fa6e0711599b36bc4e
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4ef64e1574edf5b383c0a9ec160d79a8b77abcef4ff433f7cbbf54c1c492691
3
  size 14917
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3cd81cc8241929aa48fca6f3fadc52245b3ac08a33a6d1e16a6ef3e1b487ea1
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:988a2f6ca79ecc74ec145b907cf959d1629de03ec03686b8f87073791df23729
3
  size 14917
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.08276432857438444,
6
  "eval_steps": 500,
7
- "global_step": 600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -35,6 +35,15 @@
35
  "mean_token_accuracy": 0.5912152025103569,
36
  "num_tokens": 39065992.0,
37
  "step": 600
 
 
 
 
 
 
 
 
 
38
  }
39
  ],
40
  "logging_steps": 200,
@@ -54,7 +63,7 @@
54
  "attributes": {}
55
  }
56
  },
57
- "total_flos": 7.062955348973322e+17,
58
  "train_batch_size": 4,
59
  "trial_name": null,
60
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.11035243809917926,
6
  "eval_steps": 500,
7
+ "global_step": 800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
35
  "mean_token_accuracy": 0.5912152025103569,
36
  "num_tokens": 39065992.0,
37
  "step": 600
38
+ },
39
+ {
40
+ "epoch": 0.11035243809917926,
41
+ "grad_norm": 0.35369324684143066,
42
+ "learning_rate": 1.9561870602841773e-05,
43
+ "loss": 1.8443,
44
+ "mean_token_accuracy": 0.5940310730040074,
45
+ "num_tokens": 52073767.0,
46
+ "step": 800
47
  }
48
  ],
49
  "logging_steps": 200,
 
63
  "attributes": {}
64
  }
65
  },
66
+ "total_flos": 9.414702487610327e+17,
67
  "train_batch_size": 4,
68
  "trial_name": null,
69
  "trial_params": null