lesso17 commited on
Commit
f1261cc
·
verified ·
1 Parent(s): 30a5fbe

Training in progress, step 350, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36ca7af37aa73a2ce754fb4321387f7903cf5422390968a44f25bd510f4fd463
3
  size 60576160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2072adb64bf4f50b48418c52096cf79779fcaaf6e8413cf8be2acc91f7095217
3
  size 60576160
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce0ab8dff3ab63e4a1635efb311da442dc371b438ea1a2e49349f70aa62728f8
3
  size 30896058
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a6315e6f80a70dcc3ec3fc68841684f4ce6815fbd126520b2393416bfe5b361
3
  size 30896058
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffd4231aa69ac0041a75bebdb3bf0e58964679bc60ec5990f41dc7abe2258772
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d232fa33de0a3f36fb8c3451d877ed11305baa6ced4ec91f5259002aadce72f2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c06b275438271ccceb2a71be30d8208b5cee039dd2f8805e8f89da23f2373c89
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bfba9f6f71d6bf8d4b22bdc118ed4ab52735b4c3d041fc8533613a4cc45feba
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 8.317304611206055,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
- "epoch": 0.7662835249042146,
5
  "eval_steps": 50,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -273,6 +273,49 @@
273
  "eval_samples_per_second": 123.555,
274
  "eval_steps_per_second": 31.45,
275
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  }
277
  ],
278
  "logging_steps": 10,
@@ -301,7 +344,7 @@
301
  "attributes": {}
302
  }
303
  },
304
- "total_flos": 3097030142459904.0,
305
  "train_batch_size": 4,
306
  "trial_name": null,
307
  "trial_params": null
 
1
  {
2
+ "best_metric": 8.305614471435547,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-350",
4
+ "epoch": 0.8939974457215837,
5
  "eval_steps": 50,
6
+ "global_step": 350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
273
  "eval_samples_per_second": 123.555,
274
  "eval_steps_per_second": 31.45,
275
  "step": 300
276
+ },
277
+ {
278
+ "epoch": 0.7918263090676884,
279
+ "grad_norm": 0.7110276222229004,
280
+ "learning_rate": 2.9352343615079657e-05,
281
+ "loss": 8.3509,
282
+ "step": 310
283
+ },
284
+ {
285
+ "epoch": 0.8173690932311622,
286
+ "grad_norm": 0.5742895007133484,
287
+ "learning_rate": 2.287825473049131e-05,
288
+ "loss": 8.2672,
289
+ "step": 320
290
+ },
291
+ {
292
+ "epoch": 0.842911877394636,
293
+ "grad_norm": 0.6707843542098999,
294
+ "learning_rate": 1.7126147564349132e-05,
295
+ "loss": 8.2678,
296
+ "step": 330
297
+ },
298
+ {
299
+ "epoch": 0.8684546615581098,
300
+ "grad_norm": 0.69368976354599,
301
+ "learning_rate": 1.214452517161218e-05,
302
+ "loss": 8.2203,
303
+ "step": 340
304
+ },
305
+ {
306
+ "epoch": 0.8939974457215837,
307
+ "grad_norm": 1.245832920074463,
308
+ "learning_rate": 7.975393707194009e-06,
309
+ "loss": 8.2846,
310
+ "step": 350
311
+ },
312
+ {
313
+ "epoch": 0.8939974457215837,
314
+ "eval_loss": 8.305614471435547,
315
+ "eval_runtime": 1.3367,
316
+ "eval_samples_per_second": 123.435,
317
+ "eval_steps_per_second": 31.42,
318
+ "step": 350
319
  }
320
  ],
321
  "logging_steps": 10,
 
344
  "attributes": {}
345
  }
346
  },
347
+ "total_flos": 3615550202707968.0,
348
  "train_batch_size": 4,
349
  "trial_name": null,
350
  "trial_params": null