from 388 checkpoint

Browse files

Files changed (4) hide show

README.md +4 -1
adapter_model.safetensors +1 -1
trainer_state.json +90 -12
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -16,6 +16,8 @@ should probably proofread and complete it, then remove this comment. -->
 # checkpoints-2-epochs
 This model is a fine-tuned version of [vidore/colpaligemma-3b-pt-448-base](https://huggingface.co/vidore/colpaligemma-3b-pt-448-base) on the 3sara/validated_colpali_italian_documents_with_images dataset.
 ## Model description
@@ -43,13 +45,14 @@ The following hyperparameters were used during training:
 - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 100
-- num_epochs: 4
 ### Training results
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
 | No log        | 0.0103 | 1    | 0.3835          |
 ### Framework versions

 # checkpoints-2-epochs
 This model is a fine-tuned version of [vidore/colpaligemma-3b-pt-448-base](https://huggingface.co/vidore/colpaligemma-3b-pt-448-base) on the 3sara/validated_colpali_italian_documents_with_images dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.3171
 ## Model description
 - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 100
+- num_epochs: 6
 ### Training results
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
 | No log        | 0.0103 | 1    | 0.3835          |
+| 0.0863        | 5.1436 | 500  | 0.3171          |
 ### Framework versions

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dc561df9b5357edf39b1b59e1fe8bae061a0981dec12cbb389886b3eea62423a
 size 157210936

 version https://git-lfs.github.com/spec/v1
+oid sha256:b822c544ed100e8d0000e3c8e0b688c85fc743fc3afeebd45ad22c7d82eee25d
 size 157210936

trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 3.994871794871795,
   "eval_steps": 500,
-  "global_step": 388,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -151,19 +151,97 @@
       "step": 380
     },
     {
-      "epoch": 3.994871794871795,
-      "step": 388,
-      "total_flos": 9.34866912820896e+16,
-      "train_loss": 0.0268674089582925,
-      "train_runtime": 4397.2831,
-      "train_samples_per_second": 1.417,
-      "train_steps_per_second": 0.088
     }
   ],
   "logging_steps": 20,
-  "max_steps": 388,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 4,
   "save_steps": 200,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -177,7 +255,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9.34866912820896e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 5.984615384615385,
   "eval_steps": 500,
+  "global_step": 582,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "step": 380
     },
     {
+      "epoch": 4.123076923076923,
+      "grad_norm": 4.136926174163818,
+      "learning_rate": 1.8983402489626556e-05,
+      "loss": 0.1377,
+      "step": 400
+    },
+    {
+      "epoch": 4.328205128205128,
+      "grad_norm": 0.3030303716659546,
+      "learning_rate": 1.690871369294606e-05,
+      "loss": 0.0382,
+      "step": 420
+    },
+    {
+      "epoch": 4.533333333333333,
+      "grad_norm": 0.07416559755802155,
+      "learning_rate": 1.4834024896265561e-05,
+      "loss": 0.0615,
+      "step": 440
+    },
+    {
+      "epoch": 4.7384615384615385,
+      "grad_norm": 6.657896995544434,
+      "learning_rate": 1.2759336099585062e-05,
+      "loss": 0.0241,
+      "step": 460
+    },
+    {
+      "epoch": 4.943589743589744,
+      "grad_norm": 0.006628558039665222,
+      "learning_rate": 1.0684647302904565e-05,
+      "loss": 0.0742,
+      "step": 480
+    },
+    {
+      "epoch": 5.143589743589744,
+      "grad_norm": 0.06650497764348984,
+      "learning_rate": 8.609958506224066e-06,
+      "loss": 0.0863,
+      "step": 500
+    },
+    {
+      "epoch": 5.143589743589744,
+      "eval_loss": 0.3170950710773468,
+      "eval_runtime": 176.7917,
+      "eval_samples_per_second": 1.165,
+      "eval_steps_per_second": 0.294,
+      "step": 500
+    },
+    {
+      "epoch": 5.348717948717948,
+      "grad_norm": 0.06573180109262466,
+      "learning_rate": 6.535269709543569e-06,
+      "loss": 0.0203,
+      "step": 520
+    },
+    {
+      "epoch": 5.553846153846154,
+      "grad_norm": 0.2770240604877472,
+      "learning_rate": 4.460580912863071e-06,
+      "loss": 0.0249,
+      "step": 540
+    },
+    {
+      "epoch": 5.758974358974359,
+      "grad_norm": 0.49672994017601013,
+      "learning_rate": 2.3858921161825725e-06,
+      "loss": 0.0353,
+      "step": 560
+    },
+    {
+      "epoch": 5.964102564102564,
+      "grad_norm": 0.03965625539422035,
+      "learning_rate": 3.112033195020747e-07,
+      "loss": 0.052,
+      "step": 580
+    },
+    {
+      "epoch": 5.984615384615385,
+      "step": 582,
+      "total_flos": 1.401094867087152e+17,
+      "train_loss": 0.017203848492162128,
+      "train_runtime": 4704.2352,
+      "train_samples_per_second": 1.987,
+      "train_steps_per_second": 0.124
     }
   ],
   "logging_steps": 20,
+  "max_steps": 582,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 6,
   "save_steps": 200,
   "stateful_callbacks": {
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 1.401094867087152e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0ef95505f4cf0ce88ff57f2dd27802631727fca8c4b58453a7f10d2747fd6259
 size 5713

 version https://git-lfs.github.com/spec/v1
+oid sha256:9bc248da90c8585072e81e8f1dfd06c2c4ce4a192733c67cc5205ec4504b9e8e
 size 5713