Training in progress, step 500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d60909b29ceb3b539e9fd3f0399ddb3186fe912a2f8f72272826a3db71984951
 size 323014168

 version https://git-lfs.github.com/spec/v1
+oid sha256:e4f480d1202a174c70b1202204e7b19ce30e680e9aa677d4a6aa9b51470f4816
 size 323014168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2ea12b2d96783b23a448e8905f6a84734ead74e60030d27eead274abc81ed259
 size 165484738

 version https://git-lfs.github.com/spec/v1
+oid sha256:0bed13abc506cd54f99481dbc28a31b45e3993ee70918e050ddaa7b4666bf34a
 size 165484738

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:39249c7fc00247911b7de0a38a23b7b2480ce69929410038d29eeeca928212de
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:de2a2fb86838d3020d5803839893bd1dcef4db60ee5326a49eb5f9bfb377bf78
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:efd8621281655b616461c780336136f3bf393ae00286c357e0aaf5646c2af9f4
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:dd4448a479fe2c3c13bb81ad3c5c2101e846d955cf940ee0558a49a098dd9051
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.38282719254493713,
-  "best_model_checkpoint": "miner_id_24/checkpoint-450",
-  "epoch": 0.7638446849140674,
   "eval_steps": 50,
-  "global_step": 450,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3237,6 +3237,364 @@
       "eval_samples_per_second": 2.928,
       "eval_steps_per_second": 2.928,
       "step": 450
     }
   ],
   "logging_steps": 1,
@@ -3265,7 +3623,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.919457266770903e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.372147798538208,
+  "best_model_checkpoint": "miner_id_24/checkpoint-500",
+  "epoch": 0.8487163165711861,
   "eval_steps": 50,
+  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 2.928,
       "eval_steps_per_second": 2.928,
       "step": 450
+    },
+    {
+      "epoch": 0.7655421175472098,
+      "grad_norm": 0.35101303458213806,
+      "learning_rate": 0.00017557832889489357,
+      "loss": 1.093,
+      "step": 451
+    },
+    {
+      "epoch": 0.7672395501803522,
+      "grad_norm": 0.23341582715511322,
+      "learning_rate": 0.0001751091735662596,
+      "loss": 0.7267,
+      "step": 452
+    },
+    {
+      "epoch": 0.7689369828134945,
+      "grad_norm": 0.25353920459747314,
+      "learning_rate": 0.00017463976538852654,
+      "loss": 0.8581,
+      "step": 453
+    },
+    {
+      "epoch": 0.7706344154466369,
+      "grad_norm": 0.21722197532653809,
+      "learning_rate": 0.00017417010908862962,
+      "loss": 0.7378,
+      "step": 454
+    },
+    {
+      "epoch": 0.7723318480797793,
+      "grad_norm": 0.22062261402606964,
+      "learning_rate": 0.00017370020939600248,
+      "loss": 0.7099,
+      "step": 455
+    },
+    {
+      "epoch": 0.7740292807129217,
+      "grad_norm": 0.1780662089586258,
+      "learning_rate": 0.00017323007104252984,
+      "loss": 0.4919,
+      "step": 456
+    },
+    {
+      "epoch": 0.775726713346064,
+      "grad_norm": 0.20236726105213165,
+      "learning_rate": 0.00017275969876249974,
+      "loss": 0.5919,
+      "step": 457
+    },
+    {
+      "epoch": 0.7774241459792065,
+      "grad_norm": 0.22523203492164612,
+      "learning_rate": 0.00017228909729255574,
+      "loss": 0.7686,
+      "step": 458
+    },
+    {
+      "epoch": 0.7791215786123489,
+      "grad_norm": 0.23676562309265137,
+      "learning_rate": 0.00017181827137164953,
+      "loss": 0.8026,
+      "step": 459
+    },
+    {
+      "epoch": 0.7808190112454912,
+      "grad_norm": 0.21583965420722961,
+      "learning_rate": 0.00017134722574099276,
+      "loss": 0.7097,
+      "step": 460
+    },
+    {
+      "epoch": 0.7825164438786336,
+      "grad_norm": 0.2776244282722473,
+      "learning_rate": 0.0001708759651440098,
+      "loss": 0.9476,
+      "step": 461
+    },
+    {
+      "epoch": 0.784213876511776,
+      "grad_norm": 0.2028312236070633,
+      "learning_rate": 0.00017040449432628962,
+      "loss": 0.6013,
+      "step": 462
+    },
+    {
+      "epoch": 0.7859113091449184,
+      "grad_norm": 0.2275046855211258,
+      "learning_rate": 0.0001699328180355381,
+      "loss": 0.7551,
+      "step": 463
+    },
+    {
+      "epoch": 0.7876087417780607,
+      "grad_norm": 0.20202623307704926,
+      "learning_rate": 0.00016946094102153025,
+      "loss": 0.4759,
+      "step": 464
+    },
+    {
+      "epoch": 0.7893061744112031,
+      "grad_norm": 0.17477299273014069,
+      "learning_rate": 0.00016898886803606237,
+      "loss": 0.4537,
+      "step": 465
+    },
+    {
+      "epoch": 0.7910036070443455,
+      "grad_norm": 0.08333531022071838,
+      "learning_rate": 0.0001685166038329042,
+      "loss": 0.1224,
+      "step": 466
+    },
+    {
+      "epoch": 0.7927010396774878,
+      "grad_norm": 0.21820639073848724,
+      "learning_rate": 0.000168044153167751,
+      "loss": 0.5658,
+      "step": 467
+    },
+    {
+      "epoch": 0.7943984723106302,
+      "grad_norm": 0.14012931287288666,
+      "learning_rate": 0.00016757152079817573,
+      "loss": 0.2818,
+      "step": 468
+    },
+    {
+      "epoch": 0.7960959049437726,
+      "grad_norm": 0.1741451919078827,
+      "learning_rate": 0.00016709871148358108,
+      "loss": 0.3492,
+      "step": 469
+    },
+    {
+      "epoch": 0.797793337576915,
+      "grad_norm": 0.1527792066335678,
+      "learning_rate": 0.00016662572998515164,
+      "loss": 0.2187,
+      "step": 470
+    },
+    {
+      "epoch": 0.7994907702100573,
+      "grad_norm": 0.1383330523967743,
+      "learning_rate": 0.00016615258106580585,
+      "loss": 0.2405,
+      "step": 471
+    },
+    {
+      "epoch": 0.8011882028431997,
+      "grad_norm": 0.13245010375976562,
+      "learning_rate": 0.000165679269490148,
+      "loss": 0.2295,
+      "step": 472
+    },
+    {
+      "epoch": 0.8028856354763421,
+      "grad_norm": 0.13676372170448303,
+      "learning_rate": 0.0001652058000244205,
+      "loss": 0.2516,
+      "step": 473
+    },
+    {
+      "epoch": 0.8045830681094844,
+      "grad_norm": 0.07976588606834412,
+      "learning_rate": 0.00016473217743645556,
+      "loss": 0.0916,
+      "step": 474
+    },
+    {
+      "epoch": 0.8062805007426268,
+      "grad_norm": 0.11341172456741333,
+      "learning_rate": 0.00016425840649562736,
+      "loss": 0.152,
+      "step": 475
+    },
+    {
+      "epoch": 0.8079779333757692,
+      "grad_norm": 0.12954847514629364,
+      "learning_rate": 0.00016378449197280412,
+      "loss": 0.1525,
+      "step": 476
+    },
+    {
+      "epoch": 0.8096753660089115,
+      "grad_norm": 0.08243954181671143,
+      "learning_rate": 0.0001633104386402997,
+      "loss": 0.0708,
+      "step": 477
+    },
+    {
+      "epoch": 0.8113727986420539,
+      "grad_norm": 0.0030563257168978453,
+      "learning_rate": 0.00016283625127182596,
+      "loss": 0.0001,
+      "step": 478
+    },
+    {
+      "epoch": 0.8130702312751963,
+      "grad_norm": 0.0008315025479532778,
+      "learning_rate": 0.00016236193464244444,
+      "loss": 0.0,
+      "step": 479
+    },
+    {
+      "epoch": 0.8147676639083387,
+      "grad_norm": 0.006789859849959612,
+      "learning_rate": 0.00016188749352851825,
+      "loss": 0.0002,
+      "step": 480
+    },
+    {
+      "epoch": 0.816465096541481,
+      "grad_norm": 0.009863720275461674,
+      "learning_rate": 0.00016141293270766424,
+      "loss": 0.0002,
+      "step": 481
+    },
+    {
+      "epoch": 0.8181625291746234,
+      "grad_norm": 0.059226732701063156,
+      "learning_rate": 0.00016093825695870462,
+      "loss": 0.0008,
+      "step": 482
+    },
+    {
+      "epoch": 0.8198599618077658,
+      "grad_norm": 0.0056890202686190605,
+      "learning_rate": 0.00016046347106161876,
+      "loss": 0.0003,
+      "step": 483
+    },
+    {
+      "epoch": 0.8215573944409081,
+      "grad_norm": 0.0004136976203881204,
+      "learning_rate": 0.0001599885797974956,
+      "loss": 0.0,
+      "step": 484
+    },
+    {
+      "epoch": 0.8232548270740505,
+      "grad_norm": 0.016998767852783203,
+      "learning_rate": 0.00015951358794848465,
+      "loss": 0.0004,
+      "step": 485
+    },
+    {
+      "epoch": 0.8249522597071929,
+      "grad_norm": 0.0017255417769774795,
+      "learning_rate": 0.00015903850029774878,
+      "loss": 0.0001,
+      "step": 486
+    },
+    {
+      "epoch": 0.8266496923403353,
+      "grad_norm": 0.0012270875740796328,
+      "learning_rate": 0.0001585633216294152,
+      "loss": 0.0001,
+      "step": 487
+    },
+    {
+      "epoch": 0.8283471249734776,
+      "grad_norm": 0.0009051132365129888,
+      "learning_rate": 0.0001580880567285279,
+      "loss": 0.0,
+      "step": 488
+    },
+    {
+      "epoch": 0.83004455760662,
+      "grad_norm": 0.0009166182717308402,
+      "learning_rate": 0.00015761271038099912,
+      "loss": 0.0001,
+      "step": 489
+    },
+    {
+      "epoch": 0.8317419902397624,
+      "grad_norm": 0.002149962354451418,
+      "learning_rate": 0.00015713728737356137,
+      "loss": 0.0001,
+      "step": 490
+    },
+    {
+      "epoch": 0.8334394228729047,
+      "grad_norm": 0.008419407531619072,
+      "learning_rate": 0.00015666179249371892,
+      "loss": 0.0004,
+      "step": 491
+    },
+    {
+      "epoch": 0.8351368555060471,
+      "grad_norm": 0.0006536226137541234,
+      "learning_rate": 0.00015618623052970006,
+      "loss": 0.0,
+      "step": 492
+    },
+    {
+      "epoch": 0.8368342881391895,
+      "grad_norm": 0.035275768488645554,
+      "learning_rate": 0.0001557106062704085,
+      "loss": 0.0064,
+      "step": 493
+    },
+    {
+      "epoch": 0.8385317207723318,
+      "grad_norm": 0.002518226159736514,
+      "learning_rate": 0.00015523492450537517,
+      "loss": 0.0001,
+      "step": 494
+    },
+    {
+      "epoch": 0.8402291534054742,
+      "grad_norm": 0.00048825182602740824,
+      "learning_rate": 0.00015475919002471016,
+      "loss": 0.0,
+      "step": 495
+    },
+    {
+      "epoch": 0.8419265860386166,
+      "grad_norm": 0.007141390815377235,
+      "learning_rate": 0.0001542834076190544,
+      "loss": 0.0001,
+      "step": 496
+    },
+    {
+      "epoch": 0.843624018671759,
+      "grad_norm": 0.048018842935562134,
+      "learning_rate": 0.00015380758207953155,
+      "loss": 0.0004,
+      "step": 497
+    },
+    {
+      "epoch": 0.8453214513049013,
+      "grad_norm": 0.0009412519866600633,
+      "learning_rate": 0.0001533317181976994,
+      "loss": 0.0,
+      "step": 498
+    },
+    {
+      "epoch": 0.8470188839380437,
+      "grad_norm": 0.031883303076028824,
+      "learning_rate": 0.00015285582076550198,
+      "loss": 0.0037,
+      "step": 499
+    },
+    {
+      "epoch": 0.8487163165711861,
+      "grad_norm": 0.016174526885151863,
+      "learning_rate": 0.00015237989457522118,
+      "loss": 0.0007,
+      "step": 500
+    },
+    {
+      "epoch": 0.8487163165711861,
+      "eval_loss": 0.372147798538208,
+      "eval_runtime": 65.9897,
+      "eval_samples_per_second": 2.925,
+      "eval_steps_per_second": 2.925,
+      "step": 500
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 3.2402043214390886e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null