beast33 commited on
Commit
0336946
·
verified ·
1 Parent(s): f145216

Training in progress, step 309, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a518a594168876a8907e142a5ec101420b772f7d4a4d1033df583177b4e92cd3
3
  size 639691872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b1bd90d6e372f45800ceac53c536142c3ee30347ead05d2e23a2b902be42689
3
  size 639691872
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dffda1d4ace02ebb6c8b05443e8e249ede4e48325ec8a300481437db3ac4314c
3
  size 325340244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0ad20e2f09232e22dff7b31a82c489a47c66c3f569e80a704ef610687fd1984
3
  size 325340244
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51cf95d40bcc1d1c9a2583de6235211d4a201114723d69666b6435f8fbdee579
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa0d68e5dd2d58bdc87345576f2f74dc9651c98d7b5f3d0e037c2696583bab96
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1661baebacd5f03a5441403c0632d52750084c359c58314f3911ce2808755a25
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:979dde1f0d281eb2dc3958f83b783edeadae178d67f76589e69c962081c16466
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.4889930784702301,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
- "epoch": 0.9732360097323601,
5
  "eval_steps": 100,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2139,6 +2139,69 @@
2139
  "eval_samples_per_second": 7.196,
2140
  "eval_steps_per_second": 1.803,
2141
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2142
  }
2143
  ],
2144
  "logging_steps": 1,
@@ -2162,12 +2225,12 @@
2162
  "should_evaluate": false,
2163
  "should_log": false,
2164
  "should_save": true,
2165
- "should_training_stop": false
2166
  },
2167
  "attributes": {}
2168
  }
2169
  },
2170
- "total_flos": 4.011444759397663e+17,
2171
  "train_batch_size": 8,
2172
  "trial_name": null,
2173
  "trial_params": null
 
1
  {
2
  "best_metric": 0.4889930784702301,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
+ "epoch": 1.002433090024331,
5
  "eval_steps": 100,
6
+ "global_step": 309,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2139
  "eval_samples_per_second": 7.196,
2140
  "eval_steps_per_second": 1.803,
2141
  "step": 300
2142
+ },
2143
+ {
2144
+ "epoch": 0.9764801297648013,
2145
+ "grad_norm": 1.423581600189209,
2146
+ "learning_rate": 1.889514885109689e-07,
2147
+ "loss": 0.5293,
2148
+ "step": 301
2149
+ },
2150
+ {
2151
+ "epoch": 0.9797242497972425,
2152
+ "grad_norm": 0.757242739200592,
2153
+ "learning_rate": 1.4468735616587904e-07,
2154
+ "loss": 0.4023,
2155
+ "step": 302
2156
+ },
2157
+ {
2158
+ "epoch": 0.9829683698296837,
2159
+ "grad_norm": 1.0456199645996094,
2160
+ "learning_rate": 1.0631452502237737e-07,
2161
+ "loss": 0.7101,
2162
+ "step": 303
2163
+ },
2164
+ {
2165
+ "epoch": 0.986212489862125,
2166
+ "grad_norm": 0.8465937376022339,
2167
+ "learning_rate": 7.383752952010992e-08,
2168
+ "loss": 0.333,
2169
+ "step": 304
2170
+ },
2171
+ {
2172
+ "epoch": 0.9894566098945661,
2173
+ "grad_norm": 0.9582859873771667,
2174
+ "learning_rate": 4.7260207399774105e-08,
2175
+ "loss": 0.5266,
2176
+ "step": 305
2177
+ },
2178
+ {
2179
+ "epoch": 0.9927007299270073,
2180
+ "grad_norm": 0.9226927757263184,
2181
+ "learning_rate": 2.6585699249642716e-08,
2182
+ "loss": 0.3913,
2183
+ "step": 306
2184
+ },
2185
+ {
2186
+ "epoch": 0.9959448499594485,
2187
+ "grad_norm": 0.7818783521652222,
2188
+ "learning_rate": 1.181644813441074e-08,
2189
+ "loss": 0.4817,
2190
+ "step": 307
2191
+ },
2192
+ {
2193
+ "epoch": 0.9991889699918897,
2194
+ "grad_norm": 0.7689955234527588,
2195
+ "learning_rate": 2.9541993065373976e-09,
2196
+ "loss": 0.3477,
2197
+ "step": 308
2198
+ },
2199
+ {
2200
+ "epoch": 1.002433090024331,
2201
+ "grad_norm": 1.9308133125305176,
2202
+ "learning_rate": 0.0,
2203
+ "loss": 1.2261,
2204
+ "step": 309
2205
  }
2206
  ],
2207
  "logging_steps": 1,
 
2225
  "should_evaluate": false,
2226
  "should_log": false,
2227
  "should_save": true,
2228
+ "should_training_stop": true
2229
  },
2230
  "attributes": {}
2231
  }
2232
  },
2233
+ "total_flos": 4.133268278728458e+17,
2234
  "train_batch_size": 8,
2235
  "trial_name": null,
2236
  "trial_params": null