End of training

Browse files

Files changed (6) hide show

README.md +2 -38
config.json +4 -3
model-00001-of-00002.safetensors +2 -2
model-00002-of-00002.safetensors +1 -1
model.safetensors.index.json +1 -1
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 library_name: transformers
 license: gemma
-base_model: google/paligemma-3b-pt-224
 tags:
 - generated_from_trainer
 model-index:
@@ -14,9 +14,7 @@ should probably proofread and complete it, then remove this comment. -->
 # paligemma-rocov2
-This model is a fine-tuned version of [google/paligemma-3b-pt-224](https://huggingface.co/google/paligemma-3b-pt-224) on an unknown dataset.
-It achieves the following results on the evaluation set:
-- Loss: 2.4124
 ## Model description
@@ -46,40 +44,6 @@ The following hyperparameters were used during training:
 ### Training results
-| Training Loss | Epoch  | Step  | Validation Loss |
-|:-------------:|:------:|:-----:|:---------------:|
-| 2.5859        | 0.1223 | 1000  | 2.5663          |
-| 2.5199        | 0.2446 | 2000  | 2.5207          |
-| 2.4867        | 0.3668 | 3000  | 2.4848          |
-| 2.4727        | 0.4891 | 4000  | 2.4690          |
-| 2.4836        | 0.6114 | 5000  | 2.4517          |
-| 2.4412        | 0.7337 | 6000  | 2.4379          |
-| 2.4192        | 0.8560 | 7000  | 2.4280          |
-| 2.427         | 0.9782 | 8000  | 2.4206          |
-| 2.2767        | 1.1005 | 9000  | 2.4229          |
-| 2.3071        | 1.2228 | 10000 | 2.4188          |
-| 2.2753        | 1.3451 | 11000 | 2.4153          |
-| 2.2722        | 1.4674 | 12000 | 2.4150          |
-| 2.2533        | 1.5896 | 13000 | 2.4124          |
-| 2.2494        | 1.7119 | 14000 | 2.4099          |
-| 2.2546        | 1.8342 | 15000 | 2.4086          |
-| 2.2548        | 1.9565 | 16000 | 2.4068          |
-| 2.2179        | 2.0787 | 17000 | 2.4116          |
-| 2.2062        | 2.2010 | 18000 | 2.4125          |
-| 2.2588        | 2.3233 | 19000 | 2.4117          |
-| 2.2389        | 2.4456 | 20000 | 2.4122          |
-| 2.2231        | 2.5679 | 21000 | 2.4113          |
-| 2.232         | 2.6901 | 22000 | 2.4112          |
-| 2.2101        | 2.8124 | 23000 | 2.4109          |
-| 2.2038        | 2.9347 | 24000 | 2.4110          |
-| 2.2442        | 3.0570 | 25000 | 2.4116          |
-| 2.2474        | 3.1793 | 26000 | 2.4118          |
-| 2.2272        | 3.3015 | 27000 | 2.4123          |
-| 2.1801        | 3.4238 | 28000 | 2.4125          |
-| 2.1884        | 3.5461 | 29000 | 2.4125          |
-| 2.2271        | 3.6684 | 30000 | 2.4124          |
-| 2.182         | 3.7907 | 31000 | 2.4125          |
-| 2.1832        | 3.9129 | 32000 | 2.4124          |
 ### Framework versions

 ---
 library_name: transformers
 license: gemma
+base_model: google/paligemma-3b-pt-448
 tags:
 - generated_from_trainer
 model-index:
 # paligemma-rocov2
+This model is a fine-tuned version of [google/paligemma-3b-pt-448](https://huggingface.co/google/paligemma-3b-pt-448) on an unknown dataset.
 ## Model description
 ### Training results
 ### Framework versions

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "google/paligemma-3b-pt-224",
   "architectures": [
     "PaliGemmaForConditionalGeneration"
   ],
@@ -17,7 +17,7 @@
     "model_type": "gemma",
     "num_attention_heads": 8,
     "num_hidden_layers": 18,
-    "num_image_tokens": 256,
     "num_key_value_heads": 1,
     "torch_dtype": "float32",
     "vocab_size": 257216
@@ -26,11 +26,12 @@
   "transformers_version": "4.44.2",
   "vision_config": {
     "hidden_size": 1152,
     "intermediate_size": 4304,
     "model_type": "siglip_vision_model",
     "num_attention_heads": 16,
     "num_hidden_layers": 27,
-    "num_image_tokens": 256,
     "patch_size": 14,
     "projection_dim": 2048,
     "projector_hidden_act": "gelu_fast",

 {
+  "_name_or_path": "google/paligemma-3b-pt-448",
   "architectures": [
     "PaliGemmaForConditionalGeneration"
   ],
     "model_type": "gemma",
     "num_attention_heads": 8,
     "num_hidden_layers": 18,
+    "num_image_tokens": 1024,
     "num_key_value_heads": 1,
     "torch_dtype": "float32",
     "vocab_size": 257216
   "transformers_version": "4.44.2",
   "vision_config": {
     "hidden_size": 1152,
+    "image_size": 448,
     "intermediate_size": 4304,
     "model_type": "siglip_vision_model",
     "num_attention_heads": 16,
     "num_hidden_layers": 27,
+    "num_image_tokens": 1024,
     "patch_size": 14,
     "projection_dim": 2048,
     "projector_hidden_act": "gelu_fast",

model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b6c44c379ac1b3e96f9ab51a19c8be0808e6bdb46f01e1f5d3675188e62a591d
-size 4985044392

 version https://git-lfs.github.com/spec/v1
+oid sha256:7f386d29458bd3506cfc56a0160995c78a4ff64f9b54af19cd882d62a4eb25ae
+size 4986813864

model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:381e9b36537211866504dd5519102ad4b215522f1b803d22780b3c27506ca49f
 size 861970608

 version https://git-lfs.github.com/spec/v1
+oid sha256:932622f017540e0293d32ffa49bdb2617f5911415e9874aee228242cf1bf9c85
 size 861970608

model.safetensors.index.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "metadata": {
-    "total_size": 5846932960
   },
   "weight_map": {
     "language_model.model.embed_tokens.weight": "model-00001-of-00002.safetensors",

 {
   "metadata": {
+    "total_size": 5848702432
   },
   "weight_map": {
     "language_model.model.embed_tokens.weight": "model-00001-of-00002.safetensors",

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:da7a48c361d9879cee70373f3501b32a8ab2639e92fe721f2146a61b2d590603
 size 5176

 version https://git-lfs.github.com/spec/v1
+oid sha256:1feedb5d06bfd80099941e903459d7ca332bf5eb36d369b55b76106ac12dcc63
 size 5176