lesso10 commited on
Commit
ee799bd
·
verified ·
1 Parent(s): bb6da05

Training in progress, step 48, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b512b8f8dd84c82511e13703546d65352fbeedf85b00778aa962eb94247306a5
3
  size 56662952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b797e4c04a27bb4f820646affd379009423f611f8b84c4da5a6a21b2cb569e7
3
  size 56662952
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e281b50eec9dde9a8aee6b26d7f7ae25a259241e928a0d8f1d1433692fcea73
3
  size 113488058
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe9c3285f18a6121809a78555dfb7758167ad6bd78d4b52a0e1935eefab90007
3
  size 113488058
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05876e4112429c9cb34f012e95c06bfada72620e7e36eec7151452095fdb183a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c0377efdbadccebd5ac05073acd31f5b44130e94c311bd33e5ebbfe5cf20c4f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bca862ccc612a682aa6409083a4ab9232ac8edbd3669bc8f7ff49ceb68afa56
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba6fd801ed573bc507b53e922ba337a0ba782f238e16ef13f7ef24143336ab21
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.86515212059021,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-40",
4
- "epoch": 0.8421052631578947,
5
  "eval_steps": 5,
6
- "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -359,6 +359,70 @@
359
  "eval_samples_per_second": 14.639,
360
  "eval_steps_per_second": 3.927,
361
  "step": 40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
  }
363
  ],
364
  "logging_steps": 1,
@@ -373,7 +437,7 @@
373
  "early_stopping_threshold": 0.0
374
  },
375
  "attributes": {
376
- "early_stopping_patience_counter": 0
377
  }
378
  },
379
  "TrainerControl": {
@@ -382,12 +446,12 @@
382
  "should_evaluate": false,
383
  "should_log": false,
384
  "should_save": true,
385
- "should_training_stop": false
386
  },
387
  "attributes": {}
388
  }
389
  },
390
- "total_flos": 4808081670144000.0,
391
  "train_batch_size": 4,
392
  "trial_name": null,
393
  "trial_params": null
 
1
  {
2
  "best_metric": 0.86515212059021,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-40",
4
+ "epoch": 1.0105263157894737,
5
  "eval_steps": 5,
6
+ "global_step": 48,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
359
  "eval_samples_per_second": 14.639,
360
  "eval_steps_per_second": 3.927,
361
  "step": 40
362
+ },
363
+ {
364
+ "epoch": 0.8631578947368421,
365
+ "grad_norm": 6.8620100021362305,
366
+ "learning_rate": 6.397529592809614e-06,
367
+ "loss": 3.6803,
368
+ "step": 41
369
+ },
370
+ {
371
+ "epoch": 0.8842105263157894,
372
+ "grad_norm": 8.952465057373047,
373
+ "learning_rate": 4.727588125342669e-06,
374
+ "loss": 3.8928,
375
+ "step": 42
376
+ },
377
+ {
378
+ "epoch": 0.9052631578947369,
379
+ "grad_norm": 7.425169944763184,
380
+ "learning_rate": 3.299194563372604e-06,
381
+ "loss": 3.8333,
382
+ "step": 43
383
+ },
384
+ {
385
+ "epoch": 0.9263157894736842,
386
+ "grad_norm": 6.198794364929199,
387
+ "learning_rate": 2.1199700045797077e-06,
388
+ "loss": 2.8467,
389
+ "step": 44
390
+ },
391
+ {
392
+ "epoch": 0.9473684210526315,
393
+ "grad_norm": 7.5470170974731445,
394
+ "learning_rate": 1.196206122203647e-06,
395
+ "loss": 3.5179,
396
+ "step": 45
397
+ },
398
+ {
399
+ "epoch": 0.9473684210526315,
400
+ "eval_loss": 0.8671186566352844,
401
+ "eval_runtime": 2.7976,
402
+ "eval_samples_per_second": 14.655,
403
+ "eval_steps_per_second": 3.932,
404
+ "step": 45
405
+ },
406
+ {
407
+ "epoch": 0.968421052631579,
408
+ "grad_norm": 6.172430038452148,
409
+ "learning_rate": 5.328315962444874e-07,
410
+ "loss": 2.8701,
411
+ "step": 46
412
+ },
413
+ {
414
+ "epoch": 0.9894736842105263,
415
+ "grad_norm": 6.897263526916504,
416
+ "learning_rate": 1.333858168224178e-07,
417
+ "loss": 3.4665,
418
+ "step": 47
419
+ },
420
+ {
421
+ "epoch": 1.0105263157894737,
422
+ "grad_norm": 7.529054641723633,
423
+ "learning_rate": 0.0,
424
+ "loss": 4.1975,
425
+ "step": 48
426
  }
427
  ],
428
  "logging_steps": 1,
 
437
  "early_stopping_threshold": 0.0
438
  },
439
  "attributes": {
440
+ "early_stopping_patience_counter": 1
441
  }
442
  },
443
  "TrainerControl": {
 
446
  "should_evaluate": false,
447
  "should_log": false,
448
  "should_save": true,
449
+ "should_training_stop": true
450
  },
451
  "attributes": {}
452
  }
453
  },
454
+ "total_flos": 5769698004172800.0,
455
  "train_batch_size": 4,
456
  "trial_name": null,
457
  "trial_params": null