Pranay17 commited on
Commit
205e908
·
verified ·
1 Parent(s): 9cc2749

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04a023d8eae2b5ccc492ba729567d59e031019da2e3ca4bafb6d61d0d3388d86
3
  size 42002584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f6c9491adb6089056fda920e27d5868f9c470a97da0823064432f6788c44756
3
  size 42002584
last-checkpoint/global_step1500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4d6134158e7c13a72c0bffc0be795bbfef434634617e8cf04f155ed2264f145
3
+ size 251710672
last-checkpoint/global_step1500/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daf5aaaab0959575e566af04b3c87174d7001e7459c813c42a628f90466e3b99
3
+ size 47955328
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1000
 
1
+ global_step1500
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c98879aba5b00d81dca96ce732b312683376f3923af7bda8eb2a40d4da8d98d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:721c0a2243fa57a1e6ca760a9478078a0f6bb44a899a4293351badcaa44577ca
3
  size 14244
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.698005698005698,
5
  "eval_steps": 1000,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -154,6 +154,76 @@
154
  "learning_rate": 0.00015025025025025026,
155
  "loss": 0.0876,
156
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  }
158
  ],
159
  "logging_steps": 50,
@@ -173,7 +243,7 @@
173
  "attributes": {}
174
  }
175
  },
176
- "total_flos": 2.7161049585156096e+16,
177
  "train_batch_size": 2,
178
  "trial_name": null,
179
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.547008547008547,
5
  "eval_steps": 1000,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
154
  "learning_rate": 0.00015025025025025026,
155
  "loss": 0.0876,
156
  "step": 1000
157
+ },
158
+ {
159
+ "epoch": 5.982905982905983,
160
+ "grad_norm": 1.0592001676559448,
161
+ "learning_rate": 0.00014774774774774775,
162
+ "loss": 0.0909,
163
+ "step": 1050
164
+ },
165
+ {
166
+ "epoch": 6.267806267806268,
167
+ "grad_norm": 1.4785758256912231,
168
+ "learning_rate": 0.00014524524524524526,
169
+ "loss": 0.0838,
170
+ "step": 1100
171
+ },
172
+ {
173
+ "epoch": 6.552706552706553,
174
+ "grad_norm": 0.6601145267486572,
175
+ "learning_rate": 0.00014274274274274275,
176
+ "loss": 0.0827,
177
+ "step": 1150
178
+ },
179
+ {
180
+ "epoch": 6.837606837606837,
181
+ "grad_norm": 1.1280301809310913,
182
+ "learning_rate": 0.00014024024024024024,
183
+ "loss": 0.0762,
184
+ "step": 1200
185
+ },
186
+ {
187
+ "epoch": 7.122507122507122,
188
+ "grad_norm": 1.467143177986145,
189
+ "learning_rate": 0.00013773773773773776,
190
+ "loss": 0.0799,
191
+ "step": 1250
192
+ },
193
+ {
194
+ "epoch": 7.407407407407407,
195
+ "grad_norm": 1.773697018623352,
196
+ "learning_rate": 0.00013523523523523525,
197
+ "loss": 0.0765,
198
+ "step": 1300
199
+ },
200
+ {
201
+ "epoch": 7.6923076923076925,
202
+ "grad_norm": 3.1821281909942627,
203
+ "learning_rate": 0.00013273273273273274,
204
+ "loss": 0.0787,
205
+ "step": 1350
206
+ },
207
+ {
208
+ "epoch": 7.977207977207978,
209
+ "grad_norm": 0.9911046028137207,
210
+ "learning_rate": 0.00013023023023023023,
211
+ "loss": 0.0731,
212
+ "step": 1400
213
+ },
214
+ {
215
+ "epoch": 8.262108262108262,
216
+ "grad_norm": 2.560380458831787,
217
+ "learning_rate": 0.00012772772772772775,
218
+ "loss": 0.069,
219
+ "step": 1450
220
+ },
221
+ {
222
+ "epoch": 8.547008547008547,
223
+ "grad_norm": 1.88752281665802,
224
+ "learning_rate": 0.00012522522522522524,
225
+ "loss": 0.0708,
226
+ "step": 1500
227
  }
228
  ],
229
  "logging_steps": 50,
 
243
  "attributes": {}
244
  }
245
  },
246
+ "total_flos": 4.082014482962842e+16,
247
  "train_batch_size": 2,
248
  "trial_name": null,
249
  "trial_params": null