lesso01 commited on
Commit
cbb02bb
·
verified ·
1 Parent(s): 5778833

Training in progress, step 400, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41013d67c2894365b1744d3213536ab9f340e0dd6210e2adb0c7f32b73160d37
3
  size 645975704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7aaa39214ca050f54ac1bc3faa35b4e17f32d60fdfd1448d3fa427972caeab53
3
  size 645975704
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a77c813b8ebeac5d9bb131be6ba2116acd5558040a69c763b3f56d09ef6ede2c
3
  size 328468852
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:408fc7e1cfcccbd01a534e8797503f14942d3b639b37e0e778195ae8124318fe
3
  size 328468852
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c8cc9090b5cc5d4b4246c49707d356f87bdf6b8f409039134d01740469d1b72
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e224c14664601a8f0eb60590a28a54e325cd887941e1d119e02e8c408128fff3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4c15c93e093cba090b2f3583592294b602741c9988a1980b16636e603079f25
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c0c9560ea4ec378bff0927b20fa3ef4645adc76e93a49024ae06865cbf33e40
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.359244704246521,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-350",
4
- "epoch": 0.12874747103181902,
5
  "eval_steps": 50,
6
- "global_step": 350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -316,6 +316,49 @@
316
  "eval_samples_per_second": 17.547,
317
  "eval_steps_per_second": 4.398,
318
  "step": 350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
  }
320
  ],
321
  "logging_steps": 10,
@@ -344,7 +387,7 @@
344
  "attributes": {}
345
  }
346
  },
347
- "total_flos": 6.2207629983744e+16,
348
  "train_batch_size": 4,
349
  "trial_name": null,
350
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.332943081855774,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-400",
4
+ "epoch": 0.14713996689350745,
5
  "eval_steps": 50,
6
+ "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
316
  "eval_samples_per_second": 17.547,
317
  "eval_steps_per_second": 4.398,
318
  "step": 350
319
+ },
320
+ {
321
+ "epoch": 0.1324259702041567,
322
+ "grad_norm": 0.766905665397644,
323
+ "learning_rate": 4.430111320118996e-05,
324
+ "loss": 1.451,
325
+ "step": 360
326
+ },
327
+ {
328
+ "epoch": 0.1361044693764944,
329
+ "grad_norm": 0.7716265916824341,
330
+ "learning_rate": 3.862602172977134e-05,
331
+ "loss": 1.4025,
332
+ "step": 370
333
+ },
334
+ {
335
+ "epoch": 0.13978296854883207,
336
+ "grad_norm": 0.7445082664489746,
337
+ "learning_rate": 3.325237406093478e-05,
338
+ "loss": 1.2896,
339
+ "step": 380
340
+ },
341
+ {
342
+ "epoch": 0.14346146772116977,
343
+ "grad_norm": 0.8825977444648743,
344
+ "learning_rate": 2.820635006596558e-05,
345
+ "loss": 1.3365,
346
+ "step": 390
347
+ },
348
+ {
349
+ "epoch": 0.14713996689350745,
350
+ "grad_norm": 1.0006340742111206,
351
+ "learning_rate": 2.351253346654272e-05,
352
+ "loss": 1.2482,
353
+ "step": 400
354
+ },
355
+ {
356
+ "epoch": 0.14713996689350745,
357
+ "eval_loss": 1.332943081855774,
358
+ "eval_runtime": 65.309,
359
+ "eval_samples_per_second": 17.532,
360
+ "eval_steps_per_second": 4.394,
361
+ "step": 400
362
  }
363
  ],
364
  "logging_steps": 10,
 
387
  "attributes": {}
388
  }
389
  },
390
+ "total_flos": 7.1094434267136e+16,
391
  "train_batch_size": 4,
392
  "trial_name": null,
393
  "trial_params": null