Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c88362cfe50973d14e9c9cf971dba4d9f238bd2c8d4d4f48cd5917c81b19f673
 size 912336848

 version https://git-lfs.github.com/spec/v1
+oid sha256:a5ce9388ff5c0d016e84e5463befac62b67968957fff89127716a439a91513cd
 size 912336848

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:97a26c84d45812d78efe031ffabe77785f437ad0fe565afd9812d0af27f5825a
 size 463916180

 version https://git-lfs.github.com/spec/v1
+oid sha256:125c09852ff0efdf055c0b05d8491ab176667896b227314f13bcf828c1716814
 size 463916180

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:de688329c46c6b609a55be32c0d60fda95d166295363c68e32bec4af3b0f287a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:73802fadfe7a3f3281ac6f7349f29b843350a9fb51f5dee1bb39ff24037f453b
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ed340328d62f63f2d9d00f6904395875dd09851f050e03ae91b4b798d852ce41
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:99ca67f9a35cd1c50df3d93bedf5a6642db2c7847c021a0a8d3f44c1bf4993d3
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.8847057223320007,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.03170577045022194,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 8.924,
       "eval_steps_per_second": 2.231,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -401,7 +759,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.1662573078315008e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.8155827522277832,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.06341154090044387,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 8.924,
       "eval_steps_per_second": 2.231,
       "step": 50
+    },
+    {
+      "epoch": 0.032339885859226376,
+      "grad_norm": 3.129869222640991,
+      "learning_rate": 9.729774092143627e-05,
+      "loss": 3.7664,
+      "step": 51
+    },
+    {
+      "epoch": 0.032974001268230815,
+      "grad_norm": 2.0350043773651123,
+      "learning_rate": 9.716559066288715e-05,
+      "loss": 3.6251,
+      "step": 52
+    },
+    {
+      "epoch": 0.033608116677235254,
+      "grad_norm": 1.3122864961624146,
+      "learning_rate": 9.703037989675087e-05,
+      "loss": 3.5988,
+      "step": 53
+    },
+    {
+      "epoch": 0.03424223208623969,
+      "grad_norm": 1.3547205924987793,
+      "learning_rate": 9.689211739666023e-05,
+      "loss": 3.4852,
+      "step": 54
+    },
+    {
+      "epoch": 0.03487634749524413,
+      "grad_norm": 1.131834864616394,
+      "learning_rate": 9.675081213427076e-05,
+      "loss": 3.4682,
+      "step": 55
+    },
+    {
+      "epoch": 0.03551046290424857,
+      "grad_norm": 1.118656039237976,
+      "learning_rate": 9.66064732786784e-05,
+      "loss": 3.4695,
+      "step": 56
+    },
+    {
+      "epoch": 0.03614457831325301,
+      "grad_norm": 1.095979928970337,
+      "learning_rate": 9.645911019582467e-05,
+      "loss": 3.3906,
+      "step": 57
+    },
+    {
+      "epoch": 0.03677869372225745,
+      "grad_norm": 1.330224633216858,
+      "learning_rate": 9.630873244788883e-05,
+      "loss": 3.5858,
+      "step": 58
+    },
+    {
+      "epoch": 0.03741280913126189,
+      "grad_norm": 1.1612094640731812,
+      "learning_rate": 9.615534979266745e-05,
+      "loss": 3.5082,
+      "step": 59
+    },
+    {
+      "epoch": 0.03804692454026633,
+      "grad_norm": 1.1439632177352905,
+      "learning_rate": 9.599897218294122e-05,
+      "loss": 3.4478,
+      "step": 60
+    },
+    {
+      "epoch": 0.03868103994927077,
+      "grad_norm": 1.480470895767212,
+      "learning_rate": 9.583960976582913e-05,
+      "loss": 3.3747,
+      "step": 61
+    },
+    {
+      "epoch": 0.039315155358275206,
+      "grad_norm": 1.1436293125152588,
+      "learning_rate": 9.567727288213005e-05,
+      "loss": 3.3507,
+      "step": 62
+    },
+    {
+      "epoch": 0.039949270767279645,
+      "grad_norm": 1.1501903533935547,
+      "learning_rate": 9.551197206565173e-05,
+      "loss": 3.3903,
+      "step": 63
+    },
+    {
+      "epoch": 0.040583386176284084,
+      "grad_norm": 1.1135419607162476,
+      "learning_rate": 9.534371804252728e-05,
+      "loss": 3.2974,
+      "step": 64
+    },
+    {
+      "epoch": 0.04121750158528852,
+      "grad_norm": 1.1008535623550415,
+      "learning_rate": 9.517252173051911e-05,
+      "loss": 3.3343,
+      "step": 65
+    },
+    {
+      "epoch": 0.04185161699429296,
+      "grad_norm": 1.1229270696640015,
+      "learning_rate": 9.49983942383106e-05,
+      "loss": 3.3202,
+      "step": 66
+    },
+    {
+      "epoch": 0.0424857324032974,
+      "grad_norm": 1.129658579826355,
+      "learning_rate": 9.482134686478519e-05,
+      "loss": 3.4031,
+      "step": 67
+    },
+    {
+      "epoch": 0.04311984781230184,
+      "grad_norm": 1.1309914588928223,
+      "learning_rate": 9.464139109829321e-05,
+      "loss": 3.3727,
+      "step": 68
+    },
+    {
+      "epoch": 0.04375396322130628,
+      "grad_norm": 1.1195210218429565,
+      "learning_rate": 9.445853861590647e-05,
+      "loss": 3.3391,
+      "step": 69
+    },
+    {
+      "epoch": 0.04438807863031072,
+      "grad_norm": 1.14125657081604,
+      "learning_rate": 9.42728012826605e-05,
+      "loss": 3.2907,
+      "step": 70
+    },
+    {
+      "epoch": 0.04502219403931516,
+      "grad_norm": 1.1160163879394531,
+      "learning_rate": 9.408419115078471e-05,
+      "loss": 3.2616,
+      "step": 71
+    },
+    {
+      "epoch": 0.045656309448319596,
+      "grad_norm": 1.1449788808822632,
+      "learning_rate": 9.389272045892024e-05,
+      "loss": 3.3349,
+      "step": 72
+    },
+    {
+      "epoch": 0.046290424857324035,
+      "grad_norm": 1.096792221069336,
+      "learning_rate": 9.36984016313259e-05,
+      "loss": 3.2215,
+      "step": 73
+    },
+    {
+      "epoch": 0.046924540266328474,
+      "grad_norm": 1.1410120725631714,
+      "learning_rate": 9.350124727707197e-05,
+      "loss": 3.3399,
+      "step": 74
+    },
+    {
+      "epoch": 0.04755865567533291,
+      "grad_norm": 1.1690126657485962,
+      "learning_rate": 9.330127018922194e-05,
+      "loss": 3.354,
+      "step": 75
+    },
+    {
+      "epoch": 0.04819277108433735,
+      "grad_norm": 1.2746645212173462,
+      "learning_rate": 9.309848334400246e-05,
+      "loss": 3.3572,
+      "step": 76
+    },
+    {
+      "epoch": 0.04882688649334179,
+      "grad_norm": 1.1271394491195679,
+      "learning_rate": 9.289289989996133e-05,
+      "loss": 3.3658,
+      "step": 77
+    },
+    {
+      "epoch": 0.04946100190234623,
+      "grad_norm": 1.2954553365707397,
+      "learning_rate": 9.268453319711363e-05,
+      "loss": 3.2274,
+      "step": 78
+    },
+    {
+      "epoch": 0.05009511731135067,
+      "grad_norm": 1.1216834783554077,
+      "learning_rate": 9.247339675607605e-05,
+      "loss": 3.267,
+      "step": 79
+    },
+    {
+      "epoch": 0.05072923272035511,
+      "grad_norm": 1.1674860715866089,
+      "learning_rate": 9.225950427718975e-05,
+      "loss": 3.3223,
+      "step": 80
+    },
+    {
+      "epoch": 0.05136334812935954,
+      "grad_norm": 1.0981123447418213,
+      "learning_rate": 9.204286963963111e-05,
+      "loss": 3.1553,
+      "step": 81
+    },
+    {
+      "epoch": 0.05199746353836398,
+      "grad_norm": 1.2098571062088013,
+      "learning_rate": 9.182350690051133e-05,
+      "loss": 3.287,
+      "step": 82
+    },
+    {
+      "epoch": 0.05263157894736842,
+      "grad_norm": 1.128381371498108,
+      "learning_rate": 9.160143029396422e-05,
+      "loss": 3.26,
+      "step": 83
+    },
+    {
+      "epoch": 0.05326569435637286,
+      "grad_norm": 1.2328784465789795,
+      "learning_rate": 9.13766542302225e-05,
+      "loss": 3.3351,
+      "step": 84
+    },
+    {
+      "epoch": 0.053899809765377296,
+      "grad_norm": 1.1560747623443604,
+      "learning_rate": 9.114919329468282e-05,
+      "loss": 3.268,
+      "step": 85
+    },
+    {
+      "epoch": 0.054533925174381735,
+      "grad_norm": 1.191244125366211,
+      "learning_rate": 9.091906224695935e-05,
+      "loss": 3.1744,
+      "step": 86
+    },
+    {
+      "epoch": 0.055168040583386174,
+      "grad_norm": 1.1172856092453003,
+      "learning_rate": 9.068627601992598e-05,
+      "loss": 3.1647,
+      "step": 87
+    },
+    {
+      "epoch": 0.05580215599239061,
+      "grad_norm": 1.1811506748199463,
+      "learning_rate": 9.045084971874738e-05,
+      "loss": 3.1507,
+      "step": 88
+    },
+    {
+      "epoch": 0.05643627140139505,
+      "grad_norm": 1.2562776803970337,
+      "learning_rate": 9.021279861989885e-05,
+      "loss": 3.1243,
+      "step": 89
+    },
+    {
+      "epoch": 0.05707038681039949,
+      "grad_norm": 1.2073371410369873,
+      "learning_rate": 8.997213817017507e-05,
+      "loss": 3.2071,
+      "step": 90
+    },
+    {
+      "epoch": 0.05770450221940393,
+      "grad_norm": 1.179689884185791,
+      "learning_rate": 8.972888398568772e-05,
+      "loss": 3.1118,
+      "step": 91
+    },
+    {
+      "epoch": 0.05833861762840837,
+      "grad_norm": 1.1589559316635132,
+      "learning_rate": 8.948305185085225e-05,
+      "loss": 3.0971,
+      "step": 92
+    },
+    {
+      "epoch": 0.05897273303741281,
+      "grad_norm": 1.5334442853927612,
+      "learning_rate": 8.92346577173636e-05,
+      "loss": 2.9583,
+      "step": 93
+    },
+    {
+      "epoch": 0.05960684844641725,
+      "grad_norm": 1.1919718980789185,
+      "learning_rate": 8.898371770316111e-05,
+      "loss": 2.9499,
+      "step": 94
+    },
+    {
+      "epoch": 0.060240963855421686,
+      "grad_norm": 1.2710996866226196,
+      "learning_rate": 8.873024809138272e-05,
+      "loss": 3.0539,
+      "step": 95
+    },
+    {
+      "epoch": 0.060875079264426125,
+      "grad_norm": 1.2094289064407349,
+      "learning_rate": 8.847426532930831e-05,
+      "loss": 2.9798,
+      "step": 96
+    },
+    {
+      "epoch": 0.061509194673430564,
+      "grad_norm": 1.2845017910003662,
+      "learning_rate": 8.821578602729242e-05,
+      "loss": 2.7427,
+      "step": 97
+    },
+    {
+      "epoch": 0.062143310082435003,
+      "grad_norm": 1.26568603515625,
+      "learning_rate": 8.795482695768658e-05,
+      "loss": 2.7497,
+      "step": 98
+    },
+    {
+      "epoch": 0.06277742549143944,
+      "grad_norm": 1.4030022621154785,
+      "learning_rate": 8.769140505375085e-05,
+      "loss": 2.6945,
+      "step": 99
+    },
+    {
+      "epoch": 0.06341154090044387,
+      "grad_norm": 1.5023269653320312,
+      "learning_rate": 8.742553740855506e-05,
+      "loss": 2.8778,
+      "step": 100
+    },
+    {
+      "epoch": 0.06341154090044387,
+      "eval_loss": 0.8155827522277832,
+      "eval_runtime": 297.7632,
+      "eval_samples_per_second": 8.92,
+      "eval_steps_per_second": 2.23,
+      "step": 100
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 2.3383168908263424e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null