Training in progress, step 550, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e4f480d1202a174c70b1202204e7b19ce30e680e9aa677d4a6aa9b51470f4816
 size 323014168

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d702064a54a83cfe7011e94312c6740c68631ec38198bb112e73283b5b9325d
 size 323014168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0bed13abc506cd54f99481dbc28a31b45e3993ee70918e050ddaa7b4666bf34a
 size 165484738

 version https://git-lfs.github.com/spec/v1
+oid sha256:2354310a4c79f7892941438ca7a0bf30a918f8b24d806f087ecb08bdf61012e2
 size 165484738

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:de2a2fb86838d3020d5803839893bd1dcef4db60ee5326a49eb5f9bfb377bf78
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:42b5bf42e6137ca7b21462d382307354d023593dbf4c25759316f573752c36b8
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dd4448a479fe2c3c13bb81ad3c5c2101e846d955cf940ee0558a49a098dd9051
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:1d1caf05e3c7d3f6b37ac8a69117422ba2bf5b941e02ad9725bc89f907b4e6ef
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.372147798538208,
-  "best_model_checkpoint": "miner_id_24/checkpoint-500",
-  "epoch": 0.8487163165711861,
   "eval_steps": 50,
-  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3595,6 +3595,364 @@
       "eval_samples_per_second": 2.925,
       "eval_steps_per_second": 2.925,
       "step": 500
     }
   ],
   "logging_steps": 1,
@@ -3623,7 +3981,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.2402043214390886e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.37203362584114075,
+  "best_model_checkpoint": "miner_id_24/checkpoint-550",
+  "epoch": 0.9335879482283047,
   "eval_steps": 50,
+  "global_step": 550,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 2.925,
       "eval_steps_per_second": 2.925,
       "step": 500
+    },
+    {
+      "epoch": 0.8504137492043284,
+      "grad_norm": 0.23917846381664276,
+      "learning_rate": 0.00015190394441942843,
+      "loss": 0.8684,
+      "step": 501
+    },
+    {
+      "epoch": 0.8521111818374708,
+      "grad_norm": 0.27708154916763306,
+      "learning_rate": 0.0001514279750909365,
+      "loss": 1.0004,
+      "step": 502
+    },
+    {
+      "epoch": 0.8538086144706132,
+      "grad_norm": 0.25657930970191956,
+      "learning_rate": 0.00015095199138275128,
+      "loss": 0.7568,
+      "step": 503
+    },
+    {
+      "epoch": 0.8555060471037556,
+      "grad_norm": 0.2313452512025833,
+      "learning_rate": 0.00015047599808802332,
+      "loss": 0.8288,
+      "step": 504
+    },
+    {
+      "epoch": 0.8572034797368979,
+      "grad_norm": 0.2528156042098999,
+      "learning_rate": 0.00015,
+      "loss": 1.011,
+      "step": 505
+    },
+    {
+      "epoch": 0.8589009123700403,
+      "grad_norm": 0.1938907355070114,
+      "learning_rate": 0.00014952400191197665,
+      "loss": 0.5598,
+      "step": 506
+    },
+    {
+      "epoch": 0.8605983450031827,
+      "grad_norm": 0.23120371997356415,
+      "learning_rate": 0.00014904800861724872,
+      "loss": 0.6959,
+      "step": 507
+    },
+    {
+      "epoch": 0.862295777636325,
+      "grad_norm": 0.23072639107704163,
+      "learning_rate": 0.00014857202490906347,
+      "loss": 0.7868,
+      "step": 508
+    },
+    {
+      "epoch": 0.8639932102694674,
+      "grad_norm": 0.21651454269886017,
+      "learning_rate": 0.00014809605558057157,
+      "loss": 0.6725,
+      "step": 509
+    },
+    {
+      "epoch": 0.8656906429026098,
+      "grad_norm": 0.1905306875705719,
+      "learning_rate": 0.0001476201054247788,
+      "loss": 0.5756,
+      "step": 510
+    },
+    {
+      "epoch": 0.8673880755357521,
+      "grad_norm": 0.35889434814453125,
+      "learning_rate": 0.00014714417923449797,
+      "loss": 0.6115,
+      "step": 511
+    },
+    {
+      "epoch": 0.8690855081688945,
+      "grad_norm": 0.19174250960350037,
+      "learning_rate": 0.00014666828180230057,
+      "loss": 0.4859,
+      "step": 512
+    },
+    {
+      "epoch": 0.8707829408020369,
+      "grad_norm": 0.2138870805501938,
+      "learning_rate": 0.0001461924179204684,
+      "loss": 0.6636,
+      "step": 513
+    },
+    {
+      "epoch": 0.8724803734351793,
+      "grad_norm": 0.17262116074562073,
+      "learning_rate": 0.00014571659238094556,
+      "loss": 0.42,
+      "step": 514
+    },
+    {
+      "epoch": 0.8741778060683216,
+      "grad_norm": 0.22339358925819397,
+      "learning_rate": 0.00014524080997528987,
+      "loss": 0.6612,
+      "step": 515
+    },
+    {
+      "epoch": 0.875875238701464,
+      "grad_norm": 0.1979471892118454,
+      "learning_rate": 0.0001447650754946249,
+      "loss": 0.5441,
+      "step": 516
+    },
+    {
+      "epoch": 0.8775726713346064,
+      "grad_norm": 0.20259279012680054,
+      "learning_rate": 0.00014428939372959152,
+      "loss": 0.5254,
+      "step": 517
+    },
+    {
+      "epoch": 0.8792701039677487,
+      "grad_norm": 0.12251409888267517,
+      "learning_rate": 0.0001438137694702999,
+      "loss": 0.2171,
+      "step": 518
+    },
+    {
+      "epoch": 0.8809675366008911,
+      "grad_norm": 0.16714578866958618,
+      "learning_rate": 0.00014333820750628105,
+      "loss": 0.311,
+      "step": 519
+    },
+    {
+      "epoch": 0.8826649692340335,
+      "grad_norm": 0.24203087389469147,
+      "learning_rate": 0.00014286271262643866,
+      "loss": 0.6175,
+      "step": 520
+    },
+    {
+      "epoch": 0.8843624018671759,
+      "grad_norm": 0.1858789026737213,
+      "learning_rate": 0.00014238728961900088,
+      "loss": 0.3565,
+      "step": 521
+    },
+    {
+      "epoch": 0.8860598345003182,
+      "grad_norm": 0.09111540019512177,
+      "learning_rate": 0.00014191194327147212,
+      "loss": 0.1199,
+      "step": 522
+    },
+    {
+      "epoch": 0.8877572671334606,
+      "grad_norm": 0.13533198833465576,
+      "learning_rate": 0.00014143667837058477,
+      "loss": 0.2471,
+      "step": 523
+    },
+    {
+      "epoch": 0.889454699766603,
+      "grad_norm": 0.17338241636753082,
+      "learning_rate": 0.00014096149970225122,
+      "loss": 0.3255,
+      "step": 524
+    },
+    {
+      "epoch": 0.8911521323997453,
+      "grad_norm": 0.05573137849569321,
+      "learning_rate": 0.00014048641205151533,
+      "loss": 0.0455,
+      "step": 525
+    },
+    {
+      "epoch": 0.8928495650328877,
+      "grad_norm": 0.007357200141996145,
+      "learning_rate": 0.0001400114202025044,
+      "loss": 0.0004,
+      "step": 526
+    },
+    {
+      "epoch": 0.8945469976660301,
+      "grad_norm": 0.00043303659185767174,
+      "learning_rate": 0.00013953652893838119,
+      "loss": 0.0,
+      "step": 527
+    },
+    {
+      "epoch": 0.8962444302991726,
+      "grad_norm": 0.02541971206665039,
+      "learning_rate": 0.0001390617430412954,
+      "loss": 0.0028,
+      "step": 528
+    },
+    {
+      "epoch": 0.8979418629323149,
+      "grad_norm": 0.010525004006922245,
+      "learning_rate": 0.0001385870672923357,
+      "loss": 0.0005,
+      "step": 529
+    },
+    {
+      "epoch": 0.8996392955654573,
+      "grad_norm": 0.03903070092201233,
+      "learning_rate": 0.0001381125064714817,
+      "loss": 0.0003,
+      "step": 530
+    },
+    {
+      "epoch": 0.9013367281985997,
+      "grad_norm": 0.010076366364955902,
+      "learning_rate": 0.00013763806535755562,
+      "loss": 0.0002,
+      "step": 531
+    },
+    {
+      "epoch": 0.903034160831742,
+      "grad_norm": 0.0008758578333072364,
+      "learning_rate": 0.00013716374872817407,
+      "loss": 0.0,
+      "step": 532
+    },
+    {
+      "epoch": 0.9047315934648844,
+      "grad_norm": 0.0009034467511810362,
+      "learning_rate": 0.0001366895613597003,
+      "loss": 0.0,
+      "step": 533
+    },
+    {
+      "epoch": 0.9064290260980268,
+      "grad_norm": 0.0004988125874660909,
+      "learning_rate": 0.00013621550802719588,
+      "loss": 0.0,
+      "step": 534
+    },
+    {
+      "epoch": 0.9081264587311692,
+      "grad_norm": 0.012061301618814468,
+      "learning_rate": 0.00013574159350437261,
+      "loss": 0.0006,
+      "step": 535
+    },
+    {
+      "epoch": 0.9098238913643115,
+      "grad_norm": 0.0005069606122560799,
+      "learning_rate": 0.0001352678225635444,
+      "loss": 0.0,
+      "step": 536
+    },
+    {
+      "epoch": 0.9115213239974539,
+      "grad_norm": 0.003097748151049018,
+      "learning_rate": 0.00013479419997557948,
+      "loss": 0.0001,
+      "step": 537
+    },
+    {
+      "epoch": 0.9132187566305963,
+      "grad_norm": 0.010489325039088726,
+      "learning_rate": 0.000134320730509852,
+      "loss": 0.0002,
+      "step": 538
+    },
+    {
+      "epoch": 0.9149161892637386,
+      "grad_norm": 0.00030282657826319337,
+      "learning_rate": 0.00013384741893419415,
+      "loss": 0.0,
+      "step": 539
+    },
+    {
+      "epoch": 0.916613621896881,
+      "grad_norm": 0.0403389073908329,
+      "learning_rate": 0.00013337427001484836,
+      "loss": 0.0005,
+      "step": 540
+    },
+    {
+      "epoch": 0.9183110545300234,
+      "grad_norm": 0.003200069535523653,
+      "learning_rate": 0.0001329012885164189,
+      "loss": 0.0001,
+      "step": 541
+    },
+    {
+      "epoch": 0.9200084871631657,
+      "grad_norm": 0.007805091328918934,
+      "learning_rate": 0.00013242847920182424,
+      "loss": 0.0002,
+      "step": 542
+    },
+    {
+      "epoch": 0.9217059197963081,
+      "grad_norm": 0.004255454055964947,
+      "learning_rate": 0.000131955846832249,
+      "loss": 0.0001,
+      "step": 543
+    },
+    {
+      "epoch": 0.9234033524294505,
+      "grad_norm": 0.0008626742055639625,
+      "learning_rate": 0.00013148339616709577,
+      "loss": 0.0,
+      "step": 544
+    },
+    {
+      "epoch": 0.9251007850625929,
+      "grad_norm": 0.005825830157846212,
+      "learning_rate": 0.00013101113196393758,
+      "loss": 0.0002,
+      "step": 545
+    },
+    {
+      "epoch": 0.9267982176957352,
+      "grad_norm": 0.00038926751585677266,
+      "learning_rate": 0.00013053905897846972,
+      "loss": 0.0,
+      "step": 546
+    },
+    {
+      "epoch": 0.9284956503288776,
+      "grad_norm": 0.025748664513230324,
+      "learning_rate": 0.00013006718196446188,
+      "loss": 0.0007,
+      "step": 547
+    },
+    {
+      "epoch": 0.93019308296202,
+      "grad_norm": 0.000722411903552711,
+      "learning_rate": 0.0001295955056737104,
+      "loss": 0.0,
+      "step": 548
+    },
+    {
+      "epoch": 0.9318905155951623,
+      "grad_norm": 0.013827555812895298,
+      "learning_rate": 0.0001291240348559902,
+      "loss": 0.0003,
+      "step": 549
+    },
+    {
+      "epoch": 0.9335879482283047,
+      "grad_norm": 0.0006142717902548611,
+      "learning_rate": 0.00012865277425900724,
+      "loss": 0.0,
+      "step": 550
+    },
+    {
+      "epoch": 0.9335879482283047,
+      "eval_loss": 0.37203362584114075,
+      "eval_runtime": 65.9103,
+      "eval_samples_per_second": 2.928,
+      "eval_steps_per_second": 2.928,
+      "step": 550
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 3.562049824924631e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null