kokovova commited on
Commit
e181544
·
verified ·
1 Parent(s): e48eff2

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9c3c0e1fd517b5abc94df3ca8c0a09e23f16ec05c206eab73e819ba8d7894de
3
  size 639691872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad0c5305d977f5686093eb71fc2bba946488ee7cf7178547dc03de653246bac1
3
  size 639691872
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:763707e860dfb2853aa5098824a9ac4e97fc1d4ee6ab1f90b6a691178fe986b4
3
  size 325339796
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56b4fda42154c448594c42d7e9337c9a05f02d2c1979a69e2d6ab4f0252ad2df
3
  size 325339796
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25dfb2a0841ee402d4a2a03f3d5c26d4cb6314da40dd61b87b3528fcc2c8bccf
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5650c81035b69a51d4dc894dda45abfe03b5dba1f240c8185d4d47d7114a6953
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6da36b358afefa2fe6fe8e3889efc77dfb89ac577ed7bb55c631123a9ebe149
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56e06160672e234a504b2a9f8fb3d80ed8c221e80fde36a5548d37e259bd5bc6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.6532483100891113,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
- "epoch": 0.019674711437565582,
5
  "eval_steps": 50,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -144,6 +144,49 @@
144
  "eval_samples_per_second": 19.357,
145
  "eval_steps_per_second": 4.842,
146
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
  ],
149
  "logging_steps": 10,
@@ -167,12 +210,12 @@
167
  "should_evaluate": false,
168
  "should_log": false,
169
  "should_save": true,
170
- "should_training_stop": false
171
  },
172
  "attributes": {}
173
  }
174
  },
175
- "total_flos": 5.03265242382336e+16,
176
  "train_batch_size": 8,
177
  "trial_name": null,
178
  "trial_params": null
 
1
  {
2
+ "best_metric": 2.6275486946105957,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
+ "epoch": 0.026232948583420776,
5
  "eval_steps": 50,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
144
  "eval_samples_per_second": 19.357,
145
  "eval_steps_per_second": 4.842,
146
  "step": 150
147
+ },
148
+ {
149
+ "epoch": 0.02098635886673662,
150
+ "grad_norm": 3.4204041957855225,
151
+ "learning_rate": 2.2444444444444445e-06,
152
+ "loss": 2.567,
153
+ "step": 160
154
+ },
155
+ {
156
+ "epoch": 0.02229800629590766,
157
+ "grad_norm": 3.5250372886657715,
158
+ "learning_rate": 1.6833333333333332e-06,
159
+ "loss": 2.6192,
160
+ "step": 170
161
+ },
162
+ {
163
+ "epoch": 0.0236096537250787,
164
+ "grad_norm": 4.614537239074707,
165
+ "learning_rate": 1.1222222222222222e-06,
166
+ "loss": 2.7407,
167
+ "step": 180
168
+ },
169
+ {
170
+ "epoch": 0.024921301154249738,
171
+ "grad_norm": 4.56196403503418,
172
+ "learning_rate": 5.611111111111111e-07,
173
+ "loss": 2.6979,
174
+ "step": 190
175
+ },
176
+ {
177
+ "epoch": 0.026232948583420776,
178
+ "grad_norm": 7.957613468170166,
179
+ "learning_rate": 0.0,
180
+ "loss": 2.9207,
181
+ "step": 200
182
+ },
183
+ {
184
+ "epoch": 0.026232948583420776,
185
+ "eval_loss": 2.6275486946105957,
186
+ "eval_runtime": 165.8027,
187
+ "eval_samples_per_second": 19.36,
188
+ "eval_steps_per_second": 4.843,
189
+ "step": 200
190
  }
191
  ],
192
  "logging_steps": 10,
 
210
  "should_evaluate": false,
211
  "should_log": false,
212
  "should_save": true,
213
+ "should_training_stop": true
214
  },
215
  "attributes": {}
216
  }
217
  },
218
+ "total_flos": 6.71020323176448e+16,
219
  "train_batch_size": 8,
220
  "trial_name": null,
221
  "trial_params": null