3sara commited on
Commit
d5627f3
·
verified ·
1 Parent(s): 2de13ba

from 388 checkpoint

Browse files
README.md CHANGED
@@ -16,6 +16,8 @@ should probably proofread and complete it, then remove this comment. -->
16
  # checkpoints-2-epochs
17
 
18
  This model is a fine-tuned version of [vidore/colpaligemma-3b-pt-448-base](https://huggingface.co/vidore/colpaligemma-3b-pt-448-base) on the 3sara/validated_colpali_italian_documents_with_images dataset.
 
 
19
 
20
  ## Model description
21
 
@@ -43,13 +45,14 @@ The following hyperparameters were used during training:
43
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
44
  - lr_scheduler_type: linear
45
  - lr_scheduler_warmup_steps: 100
46
- - num_epochs: 4
47
 
48
  ### Training results
49
 
50
  | Training Loss | Epoch | Step | Validation Loss |
51
  |:-------------:|:------:|:----:|:---------------:|
52
  | No log | 0.0103 | 1 | 0.3835 |
 
53
 
54
 
55
  ### Framework versions
 
16
  # checkpoints-2-epochs
17
 
18
  This model is a fine-tuned version of [vidore/colpaligemma-3b-pt-448-base](https://huggingface.co/vidore/colpaligemma-3b-pt-448-base) on the 3sara/validated_colpali_italian_documents_with_images dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.3171
21
 
22
  ## Model description
23
 
 
45
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
46
  - lr_scheduler_type: linear
47
  - lr_scheduler_warmup_steps: 100
48
+ - num_epochs: 6
49
 
50
  ### Training results
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:------:|:----:|:---------------:|
54
  | No log | 0.0103 | 1 | 0.3835 |
55
+ | 0.0863 | 5.1436 | 500 | 0.3171 |
56
 
57
 
58
  ### Framework versions
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc561df9b5357edf39b1b59e1fe8bae061a0981dec12cbb389886b3eea62423a
3
  size 157210936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b822c544ed100e8d0000e3c8e0b688c85fc743fc3afeebd45ad22c7d82eee25d
3
  size 157210936
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 3.994871794871795,
6
  "eval_steps": 500,
7
- "global_step": 388,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -151,19 +151,97 @@
151
  "step": 380
152
  },
153
  {
154
- "epoch": 3.994871794871795,
155
- "step": 388,
156
- "total_flos": 9.34866912820896e+16,
157
- "train_loss": 0.0268674089582925,
158
- "train_runtime": 4397.2831,
159
- "train_samples_per_second": 1.417,
160
- "train_steps_per_second": 0.088
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  }
162
  ],
163
  "logging_steps": 20,
164
- "max_steps": 388,
165
  "num_input_tokens_seen": 0,
166
- "num_train_epochs": 4,
167
  "save_steps": 200,
168
  "stateful_callbacks": {
169
  "TrainerControl": {
@@ -177,7 +255,7 @@
177
  "attributes": {}
178
  }
179
  },
180
- "total_flos": 9.34866912820896e+16,
181
  "train_batch_size": 4,
182
  "trial_name": null,
183
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 5.984615384615385,
6
  "eval_steps": 500,
7
+ "global_step": 582,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
151
  "step": 380
152
  },
153
  {
154
+ "epoch": 4.123076923076923,
155
+ "grad_norm": 4.136926174163818,
156
+ "learning_rate": 1.8983402489626556e-05,
157
+ "loss": 0.1377,
158
+ "step": 400
159
+ },
160
+ {
161
+ "epoch": 4.328205128205128,
162
+ "grad_norm": 0.3030303716659546,
163
+ "learning_rate": 1.690871369294606e-05,
164
+ "loss": 0.0382,
165
+ "step": 420
166
+ },
167
+ {
168
+ "epoch": 4.533333333333333,
169
+ "grad_norm": 0.07416559755802155,
170
+ "learning_rate": 1.4834024896265561e-05,
171
+ "loss": 0.0615,
172
+ "step": 440
173
+ },
174
+ {
175
+ "epoch": 4.7384615384615385,
176
+ "grad_norm": 6.657896995544434,
177
+ "learning_rate": 1.2759336099585062e-05,
178
+ "loss": 0.0241,
179
+ "step": 460
180
+ },
181
+ {
182
+ "epoch": 4.943589743589744,
183
+ "grad_norm": 0.006628558039665222,
184
+ "learning_rate": 1.0684647302904565e-05,
185
+ "loss": 0.0742,
186
+ "step": 480
187
+ },
188
+ {
189
+ "epoch": 5.143589743589744,
190
+ "grad_norm": 0.06650497764348984,
191
+ "learning_rate": 8.609958506224066e-06,
192
+ "loss": 0.0863,
193
+ "step": 500
194
+ },
195
+ {
196
+ "epoch": 5.143589743589744,
197
+ "eval_loss": 0.3170950710773468,
198
+ "eval_runtime": 176.7917,
199
+ "eval_samples_per_second": 1.165,
200
+ "eval_steps_per_second": 0.294,
201
+ "step": 500
202
+ },
203
+ {
204
+ "epoch": 5.348717948717948,
205
+ "grad_norm": 0.06573180109262466,
206
+ "learning_rate": 6.535269709543569e-06,
207
+ "loss": 0.0203,
208
+ "step": 520
209
+ },
210
+ {
211
+ "epoch": 5.553846153846154,
212
+ "grad_norm": 0.2770240604877472,
213
+ "learning_rate": 4.460580912863071e-06,
214
+ "loss": 0.0249,
215
+ "step": 540
216
+ },
217
+ {
218
+ "epoch": 5.758974358974359,
219
+ "grad_norm": 0.49672994017601013,
220
+ "learning_rate": 2.3858921161825725e-06,
221
+ "loss": 0.0353,
222
+ "step": 560
223
+ },
224
+ {
225
+ "epoch": 5.964102564102564,
226
+ "grad_norm": 0.03965625539422035,
227
+ "learning_rate": 3.112033195020747e-07,
228
+ "loss": 0.052,
229
+ "step": 580
230
+ },
231
+ {
232
+ "epoch": 5.984615384615385,
233
+ "step": 582,
234
+ "total_flos": 1.401094867087152e+17,
235
+ "train_loss": 0.017203848492162128,
236
+ "train_runtime": 4704.2352,
237
+ "train_samples_per_second": 1.987,
238
+ "train_steps_per_second": 0.124
239
  }
240
  ],
241
  "logging_steps": 20,
242
+ "max_steps": 582,
243
  "num_input_tokens_seen": 0,
244
+ "num_train_epochs": 6,
245
  "save_steps": 200,
246
  "stateful_callbacks": {
247
  "TrainerControl": {
 
255
  "attributes": {}
256
  }
257
  },
258
+ "total_flos": 1.401094867087152e+17,
259
  "train_batch_size": 4,
260
  "trial_name": null,
261
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ef95505f4cf0ce88ff57f2dd27802631727fca8c4b58453a7f10d2747fd6259
3
  size 5713
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bc248da90c8585072e81e8f1dfd06c2c4ce4a192733c67cc5205ec4504b9e8e
3
  size 5713