Training in progress, step 800, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8e2d3e565906909995f0316ab1b42c5b8582f202c5b46bdb3debada08aad9f23
 size 323014168

 version https://git-lfs.github.com/spec/v1
+oid sha256:2316cbf7807803c10504f0552a3df41c6728ec9769dcfef615a415ea7c583af0
 size 323014168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:65d3780db8a939784f55f6ed16bff2c90230ce0f04e95f25dd2d77d968c0cf27
 size 165484738

 version https://git-lfs.github.com/spec/v1
+oid sha256:833cdbd9a1b69a3478278dfc0e64ccecf27f27d112d125d68306b87027ce427a
 size 165484738

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:324895416f240bdb5ec1dbc24af23e6a273244e76245fac846a838380db560e5
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e7b315b29c598de44532516d3ac125be85a14a2f551aba8e3d144136258f1dba
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:512b8e162e341e64fd4843838fdd946779531701440bd44036ec363f546a8e5f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2c17f8131c6c5ea7f0a3b5c0825855397d8dc22a4373cf0983d7dd31b7657c7
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.3638736605644226,
   "best_model_checkpoint": "miner_id_24/checkpoint-650",
-  "epoch": 1.273074474856779,
   "eval_steps": 50,
-  "global_step": 750,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -5385,6 +5385,364 @@
       "eval_samples_per_second": 2.928,
       "eval_steps_per_second": 2.928,
       "step": 750
     }
   ],
   "logging_steps": 1,
@@ -5399,7 +5757,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
@@ -5408,12 +5766,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.861295086094254e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.3638736605644226,
   "best_model_checkpoint": "miner_id_24/checkpoint-650",
+  "epoch": 1.3579461065138978,
   "eval_steps": 50,
+  "global_step": 800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 2.928,
       "eval_steps_per_second": 2.928,
       "step": 750
+    },
+    {
+      "epoch": 1.2747719074899215,
+      "grad_norm": 0.2629989981651306,
+      "learning_rate": 4.4440055569454936e-05,
+      "loss": 0.5687,
+      "step": 751
+    },
+    {
+      "epoch": 1.2764693401230638,
+      "grad_norm": 0.29386061429977417,
+      "learning_rate": 4.410240728232653e-05,
+      "loss": 0.4559,
+      "step": 752
+    },
+    {
+      "epoch": 1.2781667727562063,
+      "grad_norm": 0.32051095366477966,
+      "learning_rate": 4.376582538278114e-05,
+      "loss": 0.7155,
+      "step": 753
+    },
+    {
+      "epoch": 1.2798642053893485,
+      "grad_norm": 0.2592981159687042,
+      "learning_rate": 4.3430313260194697e-05,
+      "loss": 0.4972,
+      "step": 754
+    },
+    {
+      "epoch": 1.281561638022491,
+      "grad_norm": 0.23039335012435913,
+      "learning_rate": 4.309587429317061e-05,
+      "loss": 0.4049,
+      "step": 755
+    },
+    {
+      "epoch": 1.2832590706556333,
+      "grad_norm": 0.2293672114610672,
+      "learning_rate": 4.2762511849505476e-05,
+      "loss": 0.4085,
+      "step": 756
+    },
+    {
+      "epoch": 1.2849565032887758,
+      "grad_norm": 0.2180144041776657,
+      "learning_rate": 4.2430229286155484e-05,
+      "loss": 0.3829,
+      "step": 757
+    },
+    {
+      "epoch": 1.286653935921918,
+      "grad_norm": 0.22328434884548187,
+      "learning_rate": 4.209902994920235e-05,
+      "loss": 0.381,
+      "step": 758
+    },
+    {
+      "epoch": 1.2883513685550605,
+      "grad_norm": 0.19186857342720032,
+      "learning_rate": 4.176891717381967e-05,
+      "loss": 0.2676,
+      "step": 759
+    },
+    {
+      "epoch": 1.290048801188203,
+      "grad_norm": 0.19644580781459808,
+      "learning_rate": 4.143989428423947e-05,
+      "loss": 0.2604,
+      "step": 760
+    },
+    {
+      "epoch": 1.2917462338213452,
+      "grad_norm": 0.1470167487859726,
+      "learning_rate": 4.111196459371862e-05,
+      "loss": 0.1697,
+      "step": 761
+    },
+    {
+      "epoch": 1.2934436664544875,
+      "grad_norm": 0.10344719886779785,
+      "learning_rate": 4.0785131404505376e-05,
+      "loss": 0.0742,
+      "step": 762
+    },
+    {
+      "epoch": 1.29514109908763,
+      "grad_norm": 0.08508791774511337,
+      "learning_rate": 4.045939800780639e-05,
+      "loss": 0.0492,
+      "step": 763
+    },
+    {
+      "epoch": 1.2968385317207725,
+      "grad_norm": 0.08410584181547165,
+      "learning_rate": 4.0134767683753385e-05,
+      "loss": 0.049,
+      "step": 764
+    },
+    {
+      "epoch": 1.2985359643539147,
+      "grad_norm": 0.001906770863570273,
+      "learning_rate": 3.981124370137001e-05,
+      "loss": 0.0001,
+      "step": 765
+    },
+    {
+      "epoch": 1.300233396987057,
+      "grad_norm": 0.14595600962638855,
+      "learning_rate": 3.948882931853924e-05,
+      "loss": 0.119,
+      "step": 766
+    },
+    {
+      "epoch": 1.3019308296201995,
+      "grad_norm": 0.00104467140045017,
+      "learning_rate": 3.916752778197039e-05,
+      "loss": 0.0001,
+      "step": 767
+    },
+    {
+      "epoch": 1.303628262253342,
+      "grad_norm": 0.00029723646002821624,
+      "learning_rate": 3.8847342327166244e-05,
+      "loss": 0.0,
+      "step": 768
+    },
+    {
+      "epoch": 1.3053256948864842,
+      "grad_norm": 0.07592643052339554,
+      "learning_rate": 3.852827617839084e-05,
+      "loss": 0.026,
+      "step": 769
+    },
+    {
+      "epoch": 1.3070231275196265,
+      "grad_norm": 0.00041246655746363103,
+      "learning_rate": 3.8210332548636796e-05,
+      "loss": 0.0,
+      "step": 770
+    },
+    {
+      "epoch": 1.308720560152769,
+      "grad_norm": 0.002659448655322194,
+      "learning_rate": 3.7893514639592895e-05,
+      "loss": 0.0001,
+      "step": 771
+    },
+    {
+      "epoch": 1.3104179927859114,
+      "grad_norm": 0.00021023498266004026,
+      "learning_rate": 3.757782564161191e-05,
+      "loss": 0.0,
+      "step": 772
+    },
+    {
+      "epoch": 1.3121154254190537,
+      "grad_norm": 0.0010800276650115848,
+      "learning_rate": 3.7263268733678606e-05,
+      "loss": 0.0,
+      "step": 773
+    },
+    {
+      "epoch": 1.313812858052196,
+      "grad_norm": 0.0006105787470005453,
+      "learning_rate": 3.694984708337756e-05,
+      "loss": 0.0,
+      "step": 774
+    },
+    {
+      "epoch": 1.3155102906853384,
+      "grad_norm": 0.0007423445931635797,
+      "learning_rate": 3.663756384686127e-05,
+      "loss": 0.0,
+      "step": 775
+    },
+    {
+      "epoch": 1.317207723318481,
+      "grad_norm": 0.0007892303401604295,
+      "learning_rate": 3.632642216881847e-05,
+      "loss": 0.0,
+      "step": 776
+    },
+    {
+      "epoch": 1.3189051559516232,
+      "grad_norm": 0.0015005484456196427,
+      "learning_rate": 3.601642518244247e-05,
+      "loss": 0.0,
+      "step": 777
+    },
+    {
+      "epoch": 1.3206025885847654,
+      "grad_norm": 0.0008817919879220426,
+      "learning_rate": 3.570757600939939e-05,
+      "loss": 0.0,
+      "step": 778
+    },
+    {
+      "epoch": 1.322300021217908,
+      "grad_norm": 0.0003715140337590128,
+      "learning_rate": 3.5399877759797e-05,
+      "loss": 0.0,
+      "step": 779
+    },
+    {
+      "epoch": 1.3239974538510504,
+      "grad_norm": 0.001832145731896162,
+      "learning_rate": 3.509333353215331e-05,
+      "loss": 0.0001,
+      "step": 780
+    },
+    {
+      "epoch": 1.3256948864841926,
+      "grad_norm": 0.0006237781490199268,
+      "learning_rate": 3.47879464133652e-05,
+      "loss": 0.0,
+      "step": 781
+    },
+    {
+      "epoch": 1.327392319117335,
+      "grad_norm": 0.0015418545808643103,
+      "learning_rate": 3.448371947867763e-05,
+      "loss": 0.0001,
+      "step": 782
+    },
+    {
+      "epoch": 1.3290897517504774,
+      "grad_norm": 0.0035702355671674013,
+      "learning_rate": 3.4180655791652476e-05,
+      "loss": 0.0001,
+      "step": 783
+    },
+    {
+      "epoch": 1.3307871843836199,
+      "grad_norm": 0.0013561249943450093,
+      "learning_rate": 3.3878758404137624e-05,
+      "loss": 0.0001,
+      "step": 784
+    },
+    {
+      "epoch": 1.3324846170167621,
+      "grad_norm": 0.001489428337663412,
+      "learning_rate": 3.3578030356236455e-05,
+      "loss": 0.0001,
+      "step": 785
+    },
+    {
+      "epoch": 1.3341820496499044,
+      "grad_norm": 0.0011818065540865064,
+      "learning_rate": 3.3278474676277114e-05,
+      "loss": 0.0,
+      "step": 786
+    },
+    {
+      "epoch": 1.3358794822830469,
+      "grad_norm": 0.0076867276802659035,
+      "learning_rate": 3.298009438078194e-05,
+      "loss": 0.0003,
+      "step": 787
+    },
+    {
+      "epoch": 1.3375769149161894,
+      "grad_norm": 0.002071813913062215,
+      "learning_rate": 3.268289247443713e-05,
+      "loss": 0.0001,
+      "step": 788
+    },
+    {
+      "epoch": 1.3392743475493316,
+      "grad_norm": 0.002192431129515171,
+      "learning_rate": 3.238687195006264e-05,
+      "loss": 0.0001,
+      "step": 789
+    },
+    {
+      "epoch": 1.3409717801824739,
+      "grad_norm": 0.24951210618019104,
+      "learning_rate": 3.209203578858191e-05,
+      "loss": 0.6115,
+      "step": 790
+    },
+    {
+      "epoch": 1.3426692128156164,
+      "grad_norm": 0.29264548420906067,
+      "learning_rate": 3.1798386958991714e-05,
+      "loss": 0.71,
+      "step": 791
+    },
+    {
+      "epoch": 1.3443666454487588,
+      "grad_norm": 0.2815876603126526,
+      "learning_rate": 3.1505928418332574e-05,
+      "loss": 0.6586,
+      "step": 792
+    },
+    {
+      "epoch": 1.346064078081901,
+      "grad_norm": 0.25446027517318726,
+      "learning_rate": 3.121466311165875e-05,
+      "loss": 0.5623,
+      "step": 793
+    },
+    {
+      "epoch": 1.3477615107150436,
+      "grad_norm": 0.2643551230430603,
+      "learning_rate": 3.092459397200861e-05,
+      "loss": 0.6298,
+      "step": 794
+    },
+    {
+      "epoch": 1.3494589433481858,
+      "grad_norm": 0.24555166065692902,
+      "learning_rate": 3.0635723920375164e-05,
+      "loss": 0.5237,
+      "step": 795
+    },
+    {
+      "epoch": 1.3511563759813283,
+      "grad_norm": 0.2268795222043991,
+      "learning_rate": 3.0348055865676707e-05,
+      "loss": 0.3814,
+      "step": 796
+    },
+    {
+      "epoch": 1.3528538086144706,
+      "grad_norm": 0.25621020793914795,
+      "learning_rate": 3.0061592704727257e-05,
+      "loss": 0.5183,
+      "step": 797
+    },
+    {
+      "epoch": 1.354551241247613,
+      "grad_norm": 0.2662016451358795,
+      "learning_rate": 2.9776337322207687e-05,
+      "loss": 0.5679,
+      "step": 798
+    },
+    {
+      "epoch": 1.3562486738807553,
+      "grad_norm": 0.20577247440814972,
+      "learning_rate": 2.9492292590636613e-05,
+      "loss": 0.3231,
+      "step": 799
+    },
+    {
+      "epoch": 1.3579461065138978,
+      "grad_norm": 0.23013556003570557,
+      "learning_rate": 2.9209461370341204e-05,
+      "loss": 0.4158,
+      "step": 800
+    },
+    {
+      "epoch": 1.3579461065138978,
+      "eval_loss": 0.36441436409950256,
+      "eval_runtime": 65.9028,
+      "eval_samples_per_second": 2.929,
+      "eval_steps_per_second": 2.929,
+      "step": 800
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.185996556504924e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null