Training in progress, step 400, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aba9407221c1f891085692796698f7a341f8458f3b8a9dbe7e6741501bb3713a
 size 323014168

 version https://git-lfs.github.com/spec/v1
+oid sha256:78753ab537273541f6cd45d76e5c1871d220cbd5009e6b6482631c3c20a93b0b
 size 323014168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:832eaca613f0bbac9047e6faee88be5b5faa5cb1cac23ff7616bd970e065616e
 size 164465012

 version https://git-lfs.github.com/spec/v1
+oid sha256:b5d1a9cd9ded4881a65505bdf5c118594bd0f7af5436648ab7f5f7c4a3fd4217
 size 164465012

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e8ce40886c9babbebea3e7d91df4df2be439bdea45ab11f237bc0e4cdaf7a98b
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:37553bbc12e5a59786b13569ace17196557ba6326aba9cbcada11c4ecab2bcdf
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cc4359ecb90a50bbea116b9a12b6a99effac889cc2d40f6093f2c443d61fb593
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:acaca55091cad13358c11632689bee2ba722202048435717d3d6f988abffbf55
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.39342138171195984,
-  "best_model_checkpoint": "miner_id_24/checkpoint-250",
-  "epoch": 0.5941014215998303,
   "eval_steps": 50,
-  "global_step": 350,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2521,6 +2521,364 @@
       "eval_samples_per_second": 2.943,
       "eval_steps_per_second": 2.943,
       "step": 350
     }
   ],
   "logging_steps": 1,
@@ -2535,7 +2893,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
@@ -2549,7 +2907,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.2671983590244352e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.3837679922580719,
+  "best_model_checkpoint": "miner_id_24/checkpoint-400",
+  "epoch": 0.6789730532569489,
   "eval_steps": 50,
+  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 2.943,
       "eval_steps_per_second": 2.943,
       "step": 350
+    },
+    {
+      "epoch": 0.5957988542329726,
+      "grad_norm": 0.4255245625972748,
+      "learning_rate": 0.00022042073441788358,
+      "loss": 1.1602,
+      "step": 351
+    },
+    {
+      "epoch": 0.597496286866115,
+      "grad_norm": 1.3056901693344116,
+      "learning_rate": 0.00022000009848385105,
+      "loss": 0.9796,
+      "step": 352
+    },
+    {
+      "epoch": 0.5991937194992574,
+      "grad_norm": 0.21184608340263367,
+      "learning_rate": 0.0002195787576496039,
+      "loss": 0.6262,
+      "step": 353
+    },
+    {
+      "epoch": 0.6008911521323997,
+      "grad_norm": 0.24157075583934784,
+      "learning_rate": 0.00021915671615803966,
+      "loss": 0.8544,
+      "step": 354
+    },
+    {
+      "epoch": 0.6025885847655421,
+      "grad_norm": 0.20036348700523376,
+      "learning_rate": 0.00021873397825911153,
+      "loss": 0.6267,
+      "step": 355
+    },
+    {
+      "epoch": 0.6042860173986845,
+      "grad_norm": 0.20190277695655823,
+      "learning_rate": 0.00021831054820978544,
+      "loss": 0.6421,
+      "step": 356
+    },
+    {
+      "epoch": 0.6059834500318269,
+      "grad_norm": 0.2218364179134369,
+      "learning_rate": 0.00021788643027399724,
+      "loss": 0.7318,
+      "step": 357
+    },
+    {
+      "epoch": 0.6076808826649692,
+      "grad_norm": 0.2376060038805008,
+      "learning_rate": 0.00021746162872260985,
+      "loss": 0.8077,
+      "step": 358
+    },
+    {
+      "epoch": 0.6093783152981116,
+      "grad_norm": 0.19997930526733398,
+      "learning_rate": 0.0002170361478333702,
+      "loss": 0.6109,
+      "step": 359
+    },
+    {
+      "epoch": 0.611075747931254,
+      "grad_norm": 0.25473812222480774,
+      "learning_rate": 0.0002166099918908661,
+      "loss": 0.7628,
+      "step": 360
+    },
+    {
+      "epoch": 0.6127731805643963,
+      "grad_norm": 0.16967284679412842,
+      "learning_rate": 0.00021618316518648317,
+      "loss": 0.3639,
+      "step": 361
+    },
+    {
+      "epoch": 0.6144706131975387,
+      "grad_norm": 0.2045927196741104,
+      "learning_rate": 0.0002157556720183616,
+      "loss": 0.5688,
+      "step": 362
+    },
+    {
+      "epoch": 0.6161680458306811,
+      "grad_norm": 0.30063769221305847,
+      "learning_rate": 0.00021532751669135284,
+      "loss": 0.8787,
+      "step": 363
+    },
+    {
+      "epoch": 0.6178654784638234,
+      "grad_norm": 0.22912342846393585,
+      "learning_rate": 0.00021489870351697622,
+      "loss": 0.5724,
+      "step": 364
+    },
+    {
+      "epoch": 0.6195629110969658,
+      "grad_norm": 0.1712283343076706,
+      "learning_rate": 0.00021446923681337575,
+      "loss": 0.3771,
+      "step": 365
+    },
+    {
+      "epoch": 0.6212603437301082,
+      "grad_norm": 0.22983159124851227,
+      "learning_rate": 0.00021403912090527623,
+      "loss": 0.6274,
+      "step": 366
+    },
+    {
+      "epoch": 0.6229577763632506,
+      "grad_norm": 0.2075144350528717,
+      "learning_rate": 0.00021360836012394025,
+      "loss": 0.5276,
+      "step": 367
+    },
+    {
+      "epoch": 0.6246552089963929,
+      "grad_norm": 0.1731417328119278,
+      "learning_rate": 0.00021317695880712398,
+      "loss": 0.314,
+      "step": 368
+    },
+    {
+      "epoch": 0.6263526416295353,
+      "grad_norm": 0.1949385702610016,
+      "learning_rate": 0.0002127449212990339,
+      "loss": 0.4633,
+      "step": 369
+    },
+    {
+      "epoch": 0.6280500742626777,
+      "grad_norm": 0.20679379999637604,
+      "learning_rate": 0.00021231225195028297,
+      "loss": 0.4547,
+      "step": 370
+    },
+    {
+      "epoch": 0.62974750689582,
+      "grad_norm": 0.1853644698858261,
+      "learning_rate": 0.00021187895511784666,
+      "loss": 0.3758,
+      "step": 371
+    },
+    {
+      "epoch": 0.6314449395289624,
+      "grad_norm": 0.2339005470275879,
+      "learning_rate": 0.00021144503516501927,
+      "loss": 0.4251,
+      "step": 372
+    },
+    {
+      "epoch": 0.6331423721621048,
+      "grad_norm": 0.12531976401805878,
+      "learning_rate": 0.00021101049646137003,
+      "loss": 0.1716,
+      "step": 373
+    },
+    {
+      "epoch": 0.6348398047952472,
+      "grad_norm": 0.12999360263347626,
+      "learning_rate": 0.00021057534338269872,
+      "loss": 0.2032,
+      "step": 374
+    },
+    {
+      "epoch": 0.6365372374283895,
+      "grad_norm": 0.1318761557340622,
+      "learning_rate": 0.00021013958031099205,
+      "loss": 0.1968,
+      "step": 375
+    },
+    {
+      "epoch": 0.6382346700615319,
+      "grad_norm": 0.06346186250448227,
+      "learning_rate": 0.00020970321163437934,
+      "loss": 0.05,
+      "step": 376
+    },
+    {
+      "epoch": 0.6399321026946743,
+      "grad_norm": 0.12222940474748611,
+      "learning_rate": 0.00020926624174708827,
+      "loss": 0.1675,
+      "step": 377
+    },
+    {
+      "epoch": 0.6416295353278166,
+      "grad_norm": 0.12370602786540985,
+      "learning_rate": 0.0002088286750494008,
+      "loss": 0.0724,
+      "step": 378
+    },
+    {
+      "epoch": 0.643326967960959,
+      "grad_norm": 0.0037796611431986094,
+      "learning_rate": 0.00020839051594760872,
+      "loss": 0.0002,
+      "step": 379
+    },
+    {
+      "epoch": 0.6450244005941014,
+      "grad_norm": 0.08905645459890366,
+      "learning_rate": 0.00020795176885396926,
+      "loss": 0.0305,
+      "step": 380
+    },
+    {
+      "epoch": 0.6467218332272437,
+      "grad_norm": 0.11877533793449402,
+      "learning_rate": 0.00020751243818666087,
+      "loss": 0.1818,
+      "step": 381
+    },
+    {
+      "epoch": 0.6484192658603861,
+      "grad_norm": 0.0044077117927372456,
+      "learning_rate": 0.00020707252836973844,
+      "loss": 0.0001,
+      "step": 382
+    },
+    {
+      "epoch": 0.6501166984935285,
+      "grad_norm": 0.0014336027670651674,
+      "learning_rate": 0.00020663204383308898,
+      "loss": 0.0001,
+      "step": 383
+    },
+    {
+      "epoch": 0.6518141311266709,
+      "grad_norm": 0.025872627273201942,
+      "learning_rate": 0.0002061909890123868,
+      "loss": 0.0031,
+      "step": 384
+    },
+    {
+      "epoch": 0.6535115637598132,
+      "grad_norm": 0.007007645908743143,
+      "learning_rate": 0.0002057493683490491,
+      "loss": 0.0002,
+      "step": 385
+    },
+    {
+      "epoch": 0.6552089963929556,
+      "grad_norm": 0.01107161957770586,
+      "learning_rate": 0.0002053071862901911,
+      "loss": 0.0003,
+      "step": 386
+    },
+    {
+      "epoch": 0.6569064290260981,
+      "grad_norm": 0.014977892860770226,
+      "learning_rate": 0.00020486444728858117,
+      "loss": 0.0003,
+      "step": 387
+    },
+    {
+      "epoch": 0.6586038616592405,
+      "grad_norm": 0.011495725251734257,
+      "learning_rate": 0.00020442115580259613,
+      "loss": 0.0002,
+      "step": 388
+    },
+    {
+      "epoch": 0.6603012942923828,
+      "grad_norm": 0.0011856303317472339,
+      "learning_rate": 0.00020397731629617636,
+      "loss": 0.0001,
+      "step": 389
+    },
+    {
+      "epoch": 0.6619987269255252,
+      "grad_norm": 0.000914178614038974,
+      "learning_rate": 0.00020353293323878074,
+      "loss": 0.0,
+      "step": 390
+    },
+    {
+      "epoch": 0.6636961595586676,
+      "grad_norm": 0.01322512049227953,
+      "learning_rate": 0.00020308801110534178,
+      "loss": 0.0003,
+      "step": 391
+    },
+    {
+      "epoch": 0.6653935921918099,
+      "grad_norm": 0.07302884012460709,
+      "learning_rate": 0.00020264255437622036,
+      "loss": 0.0008,
+      "step": 392
+    },
+    {
+      "epoch": 0.6670910248249523,
+      "grad_norm": 0.002658440498635173,
+      "learning_rate": 0.00020219656753716074,
+      "loss": 0.0001,
+      "step": 393
+    },
+    {
+      "epoch": 0.6687884574580947,
+      "grad_norm": 0.0038816186133772135,
+      "learning_rate": 0.00020175005507924558,
+      "loss": 0.0001,
+      "step": 394
+    },
+    {
+      "epoch": 0.670485890091237,
+      "grad_norm": 0.0019201135728508234,
+      "learning_rate": 0.00020130302149885031,
+      "loss": 0.0,
+      "step": 395
+    },
+    {
+      "epoch": 0.6721833227243794,
+      "grad_norm": 0.005636914633214474,
+      "learning_rate": 0.00020085547129759806,
+      "loss": 0.0002,
+      "step": 396
+    },
+    {
+      "epoch": 0.6738807553575218,
+      "grad_norm": 0.003746110713109374,
+      "learning_rate": 0.00020040740898231448,
+      "loss": 0.0001,
+      "step": 397
+    },
+    {
+      "epoch": 0.6755781879906642,
+      "grad_norm": 0.004497275687754154,
+      "learning_rate": 0.0001999588390649821,
+      "loss": 0.0001,
+      "step": 398
+    },
+    {
+      "epoch": 0.6772756206238065,
+      "grad_norm": 0.004322202410548925,
+      "learning_rate": 0.00019950976606269497,
+      "loss": 0.0001,
+      "step": 399
+    },
+    {
+      "epoch": 0.6789730532569489,
+      "grad_norm": 0.025630857795476913,
+      "learning_rate": 0.00019906019449761325,
+      "loss": 0.0003,
+      "step": 400
+    },
+    {
+      "epoch": 0.6789730532569489,
+      "eval_loss": 0.3837679922580719,
+      "eval_runtime": 65.6798,
+      "eval_samples_per_second": 2.938,
+      "eval_steps_per_second": 2.938,
+      "step": 400
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 2.5934376577794048e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null