Training in progress, step 300, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +362 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b81c89bf5a89dea220e294a4652d890349bf12553800a49024591d26ca2757a4
 size 323014168

 version https://git-lfs.github.com/spec/v1
+oid sha256:628a3a2f90570264e045b0c66a72c37a82feda1da057e5fc7746cb522fe7b00b
 size 323014168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4536e6d49005fda0a95151faebf86801ba2e3b8da30b27d77d184347ee694363
-size 164464564

 version https://git-lfs.github.com/spec/v1
+oid sha256:0f6898c2c71088c4ef94a63a3c700b35c379e61100088540569424e9dffecf07
+size 164465012

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:356dcfdc03c399d2e663c95cf1133f32813c707adc33ad61bc750dcc5222213f
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3459ab28cb7371e035b392fdf49bc74d305172fd2f97aa43d33e13d80b0a5de7
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:517cde929c0b918b0e53e0ffd764ecc43637194fb41b83640993bbae7c21d100
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:83f30fc7a303c581db8790c0d4f8638955c788c0b8516a8046c52d38b258639f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.39342138171195984,
   "best_model_checkpoint": "miner_id_24/checkpoint-250",
-  "epoch": 0.42435815828559303,
   "eval_steps": 50,
-  "global_step": 250,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1805,6 +1805,364 @@
       "eval_samples_per_second": 2.941,
       "eval_steps_per_second": 2.941,
       "step": 250
     }
   ],
   "logging_steps": 1,
@@ -1819,7 +2177,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -1833,7 +2191,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.6186741772569805e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.39342138171195984,
   "best_model_checkpoint": "miner_id_24/checkpoint-250",
+  "epoch": 0.5092297899427116,
   "eval_steps": 50,
+  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 2.941,
       "eval_steps_per_second": 2.941,
       "step": 250
+    },
+    {
+      "epoch": 0.4260555909187354,
+      "grad_norm": 0.3402771055698395,
+      "learning_rate": 0.0002582310828261803,
+      "loss": 0.955,
+      "step": 251
+    },
+    {
+      "epoch": 0.4277530235518778,
+      "grad_norm": 0.2694092392921448,
+      "learning_rate": 0.00025790097005079764,
+      "loss": 0.7843,
+      "step": 252
+    },
+    {
+      "epoch": 0.42945045618502015,
+      "grad_norm": 0.22484031319618225,
+      "learning_rate": 0.00025756977071384455,
+      "loss": 0.6626,
+      "step": 253
+    },
+    {
+      "epoch": 0.4311478888181625,
+      "grad_norm": 0.25034627318382263,
+      "learning_rate": 0.0002572374881504945,
+      "loss": 0.8865,
+      "step": 254
+    },
+    {
+      "epoch": 0.4328453214513049,
+      "grad_norm": 0.25369909405708313,
+      "learning_rate": 0.00025690412570682946,
+      "loss": 0.7099,
+      "step": 255
+    },
+    {
+      "epoch": 0.43454275408444726,
+      "grad_norm": 0.22795934975147247,
+      "learning_rate": 0.0002565696867398053,
+      "loss": 0.7818,
+      "step": 256
+    },
+    {
+      "epoch": 0.43624018671758963,
+      "grad_norm": 0.2158069759607315,
+      "learning_rate": 0.00025623417461721884,
+      "loss": 0.6434,
+      "step": 257
+    },
+    {
+      "epoch": 0.437937619350732,
+      "grad_norm": 0.2332068681716919,
+      "learning_rate": 0.00025589759271767344,
+      "loss": 0.8126,
+      "step": 258
+    },
+    {
+      "epoch": 0.43963505198387437,
+      "grad_norm": 0.21993213891983032,
+      "learning_rate": 0.00025555994443054504,
+      "loss": 0.6689,
+      "step": 259
+    },
+    {
+      "epoch": 0.44133248461701674,
+      "grad_norm": 0.26037323474884033,
+      "learning_rate": 0.0002552212331559482,
+      "loss": 0.992,
+      "step": 260
+    },
+    {
+      "epoch": 0.4430299172501591,
+      "grad_norm": 0.2357717603445053,
+      "learning_rate": 0.00025488146230470156,
+      "loss": 0.7212,
+      "step": 261
+    },
+    {
+      "epoch": 0.4447273498833015,
+      "grad_norm": 0.22752051055431366,
+      "learning_rate": 0.00025454063529829405,
+      "loss": 0.7759,
+      "step": 262
+    },
+    {
+      "epoch": 0.44642478251644385,
+      "grad_norm": 0.20641978085041046,
+      "learning_rate": 0.0002541987555688496,
+      "loss": 0.6029,
+      "step": 263
+    },
+    {
+      "epoch": 0.4481222151495863,
+      "grad_norm": 1.728589415550232,
+      "learning_rate": 0.0002538558265590934,
+      "loss": 0.8527,
+      "step": 264
+    },
+    {
+      "epoch": 0.44981964778272865,
+      "grad_norm": 0.3176920711994171,
+      "learning_rate": 0.0002535118517223168,
+      "loss": 1.0045,
+      "step": 265
+    },
+    {
+      "epoch": 0.451517080415871,
+      "grad_norm": 0.22813205420970917,
+      "learning_rate": 0.00025316683452234254,
+      "loss": 0.5755,
+      "step": 266
+    },
+    {
+      "epoch": 0.4532145130490134,
+      "grad_norm": 0.27417638897895813,
+      "learning_rate": 0.00025282077843349,
+      "loss": 0.6442,
+      "step": 267
+    },
+    {
+      "epoch": 0.45491194568215576,
+      "grad_norm": 0.23180553317070007,
+      "learning_rate": 0.00025247368694054017,
+      "loss": 0.3961,
+      "step": 268
+    },
+    {
+      "epoch": 0.45660937831529813,
+      "grad_norm": 0.21716707944869995,
+      "learning_rate": 0.0002521255635387005,
+      "loss": 0.5498,
+      "step": 269
+    },
+    {
+      "epoch": 0.4583068109484405,
+      "grad_norm": 0.1608789563179016,
+      "learning_rate": 0.0002517764117335698,
+      "loss": 0.3229,
+      "step": 270
+    },
+    {
+      "epoch": 0.4600042435815829,
+      "grad_norm": 0.06968680769205093,
+      "learning_rate": 0.00025142623504110286,
+      "loss": 0.0545,
+      "step": 271
+    },
+    {
+      "epoch": 0.46170167621472524,
+      "grad_norm": 0.17753787338733673,
+      "learning_rate": 0.0002510750369875752,
+      "loss": 0.3944,
+      "step": 272
+    },
+    {
+      "epoch": 0.4633991088478676,
+      "grad_norm": 0.1846492886543274,
+      "learning_rate": 0.0002507228211095471,
+      "loss": 0.4219,
+      "step": 273
+    },
+    {
+      "epoch": 0.46509654148101,
+      "grad_norm": 0.12200163304805756,
+      "learning_rate": 0.0002503695909538287,
+      "loss": 0.1832,
+      "step": 274
+    },
+    {
+      "epoch": 0.46679397411415235,
+      "grad_norm": 0.08617426455020905,
+      "learning_rate": 0.00025001535007744373,
+      "loss": 0.0833,
+      "step": 275
+    },
+    {
+      "epoch": 0.4684914067472947,
+      "grad_norm": 0.10826346278190613,
+      "learning_rate": 0.0002496601020475938,
+      "loss": 0.1379,
+      "step": 276
+    },
+    {
+      "epoch": 0.4701888393804371,
+      "grad_norm": 0.09130895137786865,
+      "learning_rate": 0.00024930385044162276,
+      "loss": 0.0909,
+      "step": 277
+    },
+    {
+      "epoch": 0.47188627201357947,
+      "grad_norm": 0.01284122746437788,
+      "learning_rate": 0.0002489465988469802,
+      "loss": 0.0011,
+      "step": 278
+    },
+    {
+      "epoch": 0.47358370464672184,
+      "grad_norm": 0.03364328294992447,
+      "learning_rate": 0.0002485883508611858,
+      "loss": 0.0151,
+      "step": 279
+    },
+    {
+      "epoch": 0.4752811372798642,
+      "grad_norm": 0.005700011737644672,
+      "learning_rate": 0.00024822911009179276,
+      "loss": 0.0004,
+      "step": 280
+    },
+    {
+      "epoch": 0.4769785699130066,
+      "grad_norm": 0.026785628870129585,
+      "learning_rate": 0.0002478688801563516,
+      "loss": 0.0022,
+      "step": 281
+    },
+    {
+      "epoch": 0.47867600254614895,
+      "grad_norm": 0.014533424749970436,
+      "learning_rate": 0.00024750766468237387,
+      "loss": 0.0009,
+      "step": 282
+    },
+    {
+      "epoch": 0.4803734351792913,
+      "grad_norm": 0.02565724588930607,
+      "learning_rate": 0.0002471454673072953,
+      "loss": 0.0015,
+      "step": 283
+    },
+    {
+      "epoch": 0.4820708678124337,
+      "grad_norm": 0.01476586889475584,
+      "learning_rate": 0.0002467822916784394,
+      "loss": 0.0007,
+      "step": 284
+    },
+    {
+      "epoch": 0.48376830044557606,
+      "grad_norm": 0.0030139784794300795,
+      "learning_rate": 0.0002464181414529809,
+      "loss": 0.0001,
+      "step": 285
+    },
+    {
+      "epoch": 0.48546573307871843,
+      "grad_norm": 0.013711950741708279,
+      "learning_rate": 0.00024605302029790836,
+      "loss": 0.0002,
+      "step": 286
+    },
+    {
+      "epoch": 0.4871631657118608,
+      "grad_norm": 0.0045742918737232685,
+      "learning_rate": 0.00024568693188998776,
+      "loss": 0.0002,
+      "step": 287
+    },
+    {
+      "epoch": 0.48886059834500317,
+      "grad_norm": 0.01617550477385521,
+      "learning_rate": 0.00024531987991572543,
+      "loss": 0.0008,
+      "step": 288
+    },
+    {
+      "epoch": 0.49055803097814554,
+      "grad_norm": 0.0015017741825431585,
+      "learning_rate": 0.00024495186807133056,
+      "loss": 0.0001,
+      "step": 289
+    },
+    {
+      "epoch": 0.4922554636112879,
+      "grad_norm": 0.04141293093562126,
+      "learning_rate": 0.00024458290006267833,
+      "loss": 0.0021,
+      "step": 290
+    },
+    {
+      "epoch": 0.4939528962444303,
+      "grad_norm": 0.03005625680088997,
+      "learning_rate": 0.0002442129796052726,
+      "loss": 0.0015,
+      "step": 291
+    },
+    {
+      "epoch": 0.49565032887757265,
+      "grad_norm": 0.0004941977094858885,
+      "learning_rate": 0.00024384211042420822,
+      "loss": 0.0,
+      "step": 292
+    },
+    {
+      "epoch": 0.497347761510715,
+      "grad_norm": 0.0018400073749944568,
+      "learning_rate": 0.00024347029625413364,
+      "loss": 0.0001,
+      "step": 293
+    },
+    {
+      "epoch": 0.4990451941438574,
+      "grad_norm": 0.023964567109942436,
+      "learning_rate": 0.00024309754083921354,
+      "loss": 0.0008,
+      "step": 294
+    },
+    {
+      "epoch": 0.5007426267769998,
+      "grad_norm": 0.00808011181652546,
+      "learning_rate": 0.00024272384793309077,
+      "loss": 0.0003,
+      "step": 295
+    },
+    {
+      "epoch": 0.5024400594101421,
+      "grad_norm": 0.026506319642066956,
+      "learning_rate": 0.0002423492212988487,
+      "loss": 0.0011,
+      "step": 296
+    },
+    {
+      "epoch": 0.5041374920432845,
+      "grad_norm": 0.0033183887135237455,
+      "learning_rate": 0.0002419736647089735,
+      "loss": 0.0001,
+      "step": 297
+    },
+    {
+      "epoch": 0.5058349246764269,
+      "grad_norm": 0.0012288711732253432,
+      "learning_rate": 0.00024159718194531572,
+      "loss": 0.0001,
+      "step": 298
+    },
+    {
+      "epoch": 0.5075323573095692,
+      "grad_norm": 0.001825229381211102,
+      "learning_rate": 0.00024121977679905266,
+      "loss": 0.0001,
+      "step": 299
+    },
+    {
+      "epoch": 0.5092297899427116,
+      "grad_norm": 0.0013848728267475963,
+      "learning_rate": 0.00024084145307064997,
+      "loss": 0.0001,
+      "step": 300
+    },
+    {
+      "epoch": 0.5092297899427116,
+      "eval_loss": 0.408105731010437,
+      "eval_runtime": 65.5854,
+      "eval_samples_per_second": 2.943,
+      "eval_steps_per_second": 2.943,
+      "step": 300
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 1.9420575090868224e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null